From bbc3b5b4bb760ce1c04cbe0374a82c53acd5251c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2020 12:23:38 +0700 Subject: [PATCH 01/12] [ChangeLog] Actualize [ci skip] --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4143ec2fb..7610cab17 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version + +Core +* [extractor/common] Relax interaction count extraction in _json_ld ++ [extractor/common] Extract author as uploader for VideoObject in _json_ld +* [downloader/hls] Fix incorrect end byte in Range HTTP header for + media segments with EXT-X-BYTERANGE (#14748, #24512) +* [extractor/common] Handle ssl.CertificateError in _request_webpage (#26601) +* [downloader/http] Improve timeout detection when reading block of data + (#10935) +* [downloader/http] Retry download when urlopen times out (#10935, #26603) + +Extractors +* [redtube] Extend URL regular expression (#26506) +* [twitch] Refactor +* [twitch:stream] Switch to GraphQL and fix reruns (#26535) ++ [telequebec] Add support for brightcove videos (#25833) +* [pornhub] Extract metadata from JSON-LD (#26614) +* [pornhub] Fix view count extraction (#26621, #26614) + + version 2020.09.14 Core From b55715934bb7f9474f69b99e4d51cc83dee7cbef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2020 12:30:45 +0700 Subject: [PATCH 02/12] release 2020.09.20 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 352263789..ce0319fe2 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.09.14 + [debug] youtube-dl version 2020.09.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index fa6509be3..a4002603c 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 70b0f2f19..3f8b6ce2e 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index ec17e4a33..d880c225a 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.09.14 + [debug] youtube-dl version 2020.09.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 6ac963206..dd5fb5144 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.09.14** +- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 7610cab17..9b52b7bd2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2020.09.20 Core * [extractor/common] Relax interaction count extraction in _json_ld diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5625b8324..709e5c74c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.09.14' +__version__ = '2020.09.20' From 0837992a226690d514eb01b7460bed4a33fddb30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2020 06:44:14 +0700 Subject: [PATCH 03/12] [downloader/http] Fix access to not yet opened stream in retry --- youtube_dl/downloader/http.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 6ef26548d..04da14d91 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -223,9 +223,10 @@ class HttpFD(FileDownloader): def retry(e): to_stdout = ctx.tmpfilename == '-' - if not to_stdout: - ctx.stream.close() - ctx.stream = None + if ctx.stream is not None: + if not to_stdout: + ctx.stream.close() + ctx.stream = None ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename)) raise RetryDownload(e) From c5764b3f89b66e0148a186490f522ae7c259a55e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2020 07:01:59 +0700 Subject: [PATCH 04/12] [downloader/http] Properly handle missing message in SSLError (closes #26646) --- youtube_dl/downloader/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 04da14d91..96379caf1 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -241,7 +241,7 @@ class HttpFD(FileDownloader): except socket.error as e: # SSLError on python 2 (inherits socket.error) may have # no errno set but this error message - if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message') == 'The read operation timed out': + if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out': retry(e) raise From adae9e844b0a40bf686a142a20c7ca30e4e1145b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Sep 2020 06:36:07 +0700 Subject: [PATCH 05/12] [README.md] Fix autonumber sequence description (refs #26686) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 45326c69e..cd8856828 100644 --- a/README.md +++ b/README.md @@ -545,7 +545,7 @@ The basic usage is not to set any template arguments when downloading a single f - `extractor` (string): Name of the extractor - `extractor_key` (string): Key name of the extractor - `epoch` (numeric): Unix epoch when creating the file - - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero + - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start` - `playlist` (string): Name or id of the playlist that contains the video - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist - `playlist_id` (string): Playlist identifier From 0c92f1e96b004fc7a04eac0759f115a535c8e03a Mon Sep 17 00:00:00 2001 From: Surkal Date: Thu, 24 Sep 2020 01:46:58 +0200 Subject: [PATCH 06/12] [iprima] Improve video id extraction (#26507) (closes #26494) --- youtube_dl/extractor/iprima.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 53a550c11..648ae6741 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -86,7 +86,8 @@ class IPrimaIE(InfoExtractor): (r']+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', r'data-product="([^"]+)">', r'id=["\']player-(p\d+)"', - r'playerId\s*:\s*["\']player-(p\d+)'), + r'playerId\s*:\s*["\']player-(p\d+)', + r'\bvideos\s*=\s*["\'](p\d+)'), webpage, 'real id') playerpage = self._download_webpage( From d65d89183f645a0e95910c3861491a75c26000eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Sep 2020 07:36:38 +0700 Subject: [PATCH 07/12] [expressen] Add support for di.se (closes #26670) --- youtube_dl/extractor/expressen.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/expressen.py b/youtube_dl/extractor/expressen.py index f79365038..dc8b855d2 100644 --- a/youtube_dl/extractor/expressen.py +++ b/youtube_dl/extractor/expressen.py @@ -15,7 +15,7 @@ from ..utils import ( class ExpressenIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?:www\.)?expressen\.se/ + (?:www\.)?(?:expressen|di)\.se/ (?:(?:tvspelare/video|videoplayer/embed)/)? tv/(?:[^/]+/)* (?P[^/?#&]+) @@ -42,13 +42,16 @@ class ExpressenIE(InfoExtractor): }, { 'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', 'only_matching': True, + }, { + 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', + 'only_matching': True, }] @staticmethod def _extract_urls(webpage): return [ mobj.group('url') for mobj in re.finditer( - r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1', webpage)] def _real_extract(self, url): From 1050e0d09f02209cc5c6f45da46c2b79f73d96f4 Mon Sep 17 00:00:00 2001 From: Felix Yan Date: Sun, 18 Oct 2020 00:02:17 +0800 Subject: [PATCH 08/12] [iqiyi] Fix typo (#26884) --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index cd11aa70f..5df674daf 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -150,7 +150,7 @@ class IqiyiSDKInterpreter(object): elif function in other_functions: other_functions[function]() else: - raise ExtractorError('Unknown funcion %s' % function) + raise ExtractorError('Unknown function %s' % function) return sdk.target From 605535776a8d5beba78b4d1b057d5206ddd969eb Mon Sep 17 00:00:00 2001 From: Sergio Livi Date: Sat, 17 Oct 2020 18:14:46 +0200 Subject: [PATCH 09/12] [ustream] Add support for video.ibm.com (#26894) --- youtube_dl/extractor/ustream.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 582090d0d..9e860aeb7 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -19,7 +19,7 @@ from ..utils import ( class UstreamIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?Precorded|embed|embed/recorded)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/(?Precorded|embed|embed/recorded)/(?P\d+)' IE_NAME = 'ustream' _TESTS = [{ 'url': 'http://www.ustream.tv/recorded/20274954', @@ -67,12 +67,15 @@ class UstreamIE(InfoExtractor): 'params': { 'skip_download': True, # m3u8 download }, + }, { + 'url': 'https://video.ibm.com/embed/recorded/128240221?&autoplay=true&controls=true&volume=100', + 'only_matching': True, }] @staticmethod def _extract_url(webpage): mobj = re.search( - r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) + r']+?src=(["\'])(?Phttp://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage) if mobj is not None: return mobj.group('url') From 4eda10499e8db831167062b0e0dbc7d10d34c1f9 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Sat, 17 Oct 2020 13:10:41 -0400 Subject: [PATCH 10/12] [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851) The current logic in `js_to_json` tries to rewrite octal/hex numbers to decimal. However, when the logic actually happens the `"` or `'` have already been trimmed off. This causes what were originally strings, that happen to look like octal/hex numbers, to get rewritten to decimal and returned as a number rather than a string. In practive something like: ```js { "0x40": "foo", "040": "bar", } ``` would get rewritten as: ```json { 64: "foo", 32: "bar } ``` This is problematic since this isn't valid JSON as you cannot have non-string keys. --- test/test_utils.py | 6 ++++++ youtube_dl/utils.py | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 962fd8d75..c2d1e4fb1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -994,6 +994,12 @@ class TestUtil(unittest.TestCase): on = js_to_json('{42:4.2e1}') self.assertEqual(json.loads(on), {'42': 42.0}) + on = js_to_json('{ "0x40": "0x40" }') + self.assertEqual(json.loads(on), {'0x40': '0x40'}) + + on = js_to_json('{ "040": "040" }') + self.assertEqual(json.loads(on), {'040': '040'}) + def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 01d9c0362..737e2810e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4088,12 +4088,12 @@ def js_to_json(code): '\\\n': '', '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) - - for regex, base in INTEGER_TABLE: - im = re.match(regex, v) - if im: - i = int(im.group(1), base) - return '"%d":' % i if v.endswith(':') else '%d' % i + else: + for regex, base in INTEGER_TABLE: + im = re.match(regex, v) + if im: + i = int(im.group(1), base) + return '"%d":' % i if v.endswith(':') else '%d' % i return '"%s"' % v From 7d740e7dc7149cfd93dde1fa47e9f314e72582c2 Mon Sep 17 00:00:00 2001 From: Hannu Hartikainen Date: Mon, 19 Oct 2020 17:56:23 +0000 Subject: [PATCH 11/12] [23video] Relax _VALID_URL (#26870) --- youtube_dl/extractor/twentythreevideo.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index aa0c6e90f..dc5609192 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -8,8 +8,8 @@ from ..utils import int_or_none class TwentyThreeVideoIE(InfoExtractor): IE_NAME = '23video' - _VALID_URL = r'https?://video\.(?Ptwentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' - _TEST = { + _VALID_URL = r'https?://(?P[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' + _TESTS = [{ 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1', 'md5': '75fcf216303eb1dae9920d651f85ced4', 'info_dict': { @@ -21,11 +21,14 @@ class TwentyThreeVideoIE(InfoExtractor): 'uploader_id': '12258964', 'uploader': 'Rasmus Bysted', } - } + }, { + 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620', + 'only_matching': True, + }] def _real_extract(self, url): domain, query, photo_id = re.match(self._VALID_URL, url).groups() - base_url = 'https://video.%s' % domain + base_url = 'https://%s' % domain photo_data = self._download_json( base_url + '/api/photo/list?' + query, photo_id, query={ 'format': 'json', From 48c5663c5f7dd9ecc4720f7c1522627665197939 Mon Sep 17 00:00:00 2001 From: Toan Nguyen Date: Thu, 22 Oct 2020 19:15:05 +0700 Subject: [PATCH 12/12] [afreecatv] Fix typo (#26970) --- youtube_dl/extractor/afreecatv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 6275e5209..b56abb1e6 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor): video_element = video_xml.findall(compat_xpath('./track/video'))[-1] if video_element is None or video_element.text is None: raise ExtractorError( - 'Video %s video does not exist' % video_id, expected=True) + 'Video %s does not exist' % video_id, expected=True) video_url = video_element.text.strip()