Merge d81793ea56 into e1b3fa242c

[RTV SLO 4D] Added support for subtitles
[RTV SLO 4D] Extract both HTTPS and HTTP HLS URLs
2025-01-10 13:30:09 +09:00 · 2024-07-28 01:19:26 +09:00 · 2021-03-25 11:49:56 +01:00 · 2021-03-25 11:48:40 +01:00 · 2021-02-15 16:38:15 +01:00 · 2021-02-15 15:33:21 +01:00
2 changed files with 99 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1068,6 +1068,7 @@ from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
 from .rtvnh import RTVNHIE
 from .rtvs import RTVSIE
+from .rtvslo import RTVSLO4DIE
 from .ruhd import RUHDIE
 from .rumble import RumbleEmbedIE
 from .rutube import (
--- a/youtube_dl/extractor/rtvslo.py
+++ b/youtube_dl/extractor/rtvslo.py
@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    try_get,
+    unified_timestamp
+)
+
+
+class RTVSLO4DIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:4d\.rtvslo\.si/(?:arhiv/[^/]+|embed)|www\.rtvslo\.si/(?:4d/arhiv|mmr/prispevek))/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://4d.rtvslo.si/arhiv/seje-odbora-za-kmetijstvo-gozdarstvo-in-prehrano/174595438',
+        'md5': '37ab1181292a08e0d6b7952545e6ce8b',
+        'info_dict': {
+            'id': '174595438',
+            'ext': 'mp4',
+            'title': 'Krajčič o tatvini sendviča',
+            'thumbnail': r're:https://img.rtvslo.si/.+\.jpg',
+            'timestamp': 1549999614,
+            'upload_date': '20190212',
+            'duration': 85
+        },
+    }, {
+        'url': 'https://4d.rtvslo.si/arhiv/punto-e-a-capo/174752966',
+        'md5': 'a1ce903ee0a4051e417c9357e3d51c71',
+        'info_dict': {
+            'id': '174752966',
+            'ext': 'mp3',
+            'title': 'Dante divulgatore della scienza, con Gian Italo Bischi. E un ricordo di Federico Roncoroni',
+            'thumbnail': r're:https://img.rtvslo.si/.+\.jpg',
+            'timestamp': 1613033635,
+            'upload_date': '20210211',
+            'duration': 1740
+        },
+    }, {
+        'url': 'https://4d.rtvslo.si/arhiv/punto-e-a-capo/174752966',
+        'only_matching': True,
+    }, {
+        'url': 'https://4d.rtvslo.si/embed/174595438',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.rtvslo.si/4d/arhiv/174752597?s=tv_ita',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.rtvslo.si/mmr/prispevek/174752987',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        media_id = self._match_id(url)
+
+        media_info = self._download_json(
+            'https://api.rtvslo.si/ava/getRecording/' + media_id, media_id,
+            query={'client_id': '19cc0556a5ee31d0d52a0e30b0696b26'})['response']
+
+        if media_info['mediaType'] == 'video':
+            formats = []
+            for proto in ('hls_sec', 'hls',):
+                formats += self._extract_m3u8_formats(
+                    media_info['addaptiveMedia'][proto], media_id, 'mp4',
+                    entry_protocol='m3u8_native', m3u8_id='hls')
+
+        elif media_info['mediaType'] == 'audio':
+            formats = [{
+                'format_id': file['mediaType'],
+                'url': file['streamers']['http'] + '/' + file['filename'],
+                'ext': determine_ext(file['filename']),
+                'tbr': int_or_none(file.get('bitrate')),
+                'filesize': int_or_none(file.get('filesize')),
+                'vcodec': 'none'
+            } for file in media_info['mediaFiles']]
+
+        self._sort_formats(formats)
+
+        return {
+            'id': media_id,
+            'title': media_info['title'],
+            'formats': formats,
+            'description': try_get(media_info, 'description'),
+            'thumbnail': media_info.get('thumbnail_sec'),
+            'timestamp': unified_timestamp(media_info.get('broadcastDate')),
+            'duration': media_info.get('duration'),
+            'subtitles': self.extract_subtitles(media_info)
+        }
+
+    def _get_subtitles(self, media_info):
+        subs = {}
+        for sub in media_info.get('subtitles', []):
+            subs[sub['language']] = [{
+                'ext': 'vtt',
+                'url': sub['file']
+            }]
+
+        return subs
Author	SHA1	Message	Date
Miha Frangež	19d13b9dce	Merge `d81793ea56` into `e1b3fa242c`	2024-07-28 01:19:26 +09:00
Miha Frangež	d81793ea56	[RTV SLO 4D] Added support for subtitles	2021-03-25 11:49:56 +01:00
Miha Frangež	a302001725	[RTV SLO 4D] Extract both HTTPS and HTTP HLS URLs	2021-03-25 11:48:40 +01:00
Miha Frangež	944f674c6d	[RTV SLO 4D] Reordered info extraction	2021-02-15 16:38:15 +01:00
Miha Frangež	8fb3a99c34	[RTV SLO 4D] Fixed test that failed due to different sorting	2021-02-15 15:33:21 +01:00
Miha Frangež	6435b66967	[RTV SLO 4D] Support for multiple audio formats	2021-02-15 15:31:03 +01:00
Miha Frangež	2e75b8092f	[RTV SLO 4D] Cleanup, switched to HTTPS	2021-02-14 16:48:49 +01:00
Miha Frangež	5fe9de5f7a	[RTV SLO 4D] Added support for audio, more tests	2021-02-11 13:52:52 +01:00
Miha Frangež	2cf78de692	[RTV SLO 4D] Improved URL regex	2021-02-11 13:51:54 +01:00
Miha Frangež	8e6eca6432	[RTV SLO 4D] Removed unnecessary requests, improved formatting	2021-02-11 12:49:53 +01:00
Miha Frangež	910ef313e1	[RTV SLO 4D] Add extractor	2021-02-10 19:43:17 +01:00