From 691d549938c480bfc284ccb2ac95c6032adac0f6 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 30 Jan 2021 21:33:24 -0800 Subject: [PATCH 01/14] youtube-dl support for https://pac-12.com. Generally works on pages that have a free video, though in this commit getting the description sometimes fails. Does not work on the pages that require a TV provider login. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pac12.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/pac12.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fd19f0f0a..9e7f9bf96 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -822,6 +822,7 @@ from .orf import ( ORFIPTVIE, ) from .outsidetv import OutsideTVIE +from .pac12 import Pac12IE from .packtpub import ( PacktPubIE, PacktPubCourseIE, diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py new file mode 100644 index 000000000..5106901b3 --- /dev/null +++ b/youtube_dl/extractor/pac12.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class Pac12IE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-z]+\.)?pac-12.com/(?:embed/)?(?P.*)' + + _TEST = { + 'url': 'https://pac-12.com/videos/2020-pac-12-womens-basketball-media-day-arizona-cal-stanford', + 'md5': 'b2e3c0cb99458c8b8e2dc22cb5ac922d', + 'info_dict': { + 'id': 'vod-VGQNKGlo9Go', + 'ext': 'mp4', + 'title': '2020 Pac-12 Women\'s Basketball Media Day - Arizona, Cal & Stanford | Pac-12', + 'description': 'During the 2020 Pac-12 Women\'s Basketball Media Day, Ros Gold-Onwude moderates a discussion with Arizona\'s Adia Barnes & Aari McDonald, Cal\'s Charmin Smith & Evelien Lutje Schipholt & Stanford\'s Tara VanDerveer & Kiana Williams.', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_url = \ + self._search_regex(r'"manifest_url":"(?Phttps:[^"]+)"', + webpage, 'url', group='url', default=None) + vod_url = None + if (video_url is None) or ('vod-' not in url): + vod_url = self._search_regex(r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-[0-9a-zA-Z]+)', + webpage, 'url', default=None) + if video_url is None: + if vod_url is None: + return None + return self.url_result(vod_url) + video_url = re.sub(r'\\', '', video_url) + title = self._html_search_regex(r'(.+?)', + webpage, 'title') + if 'vod-' not in url and vod_url is not None: + video_id = self._match_id(vod_url) + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'url': video_url, + 'ext': 'mp4', + } From f5e049fbc28a6d1957b68f72b9b55b3b94f193ce Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 30 Jan 2021 21:42:31 -0800 Subject: [PATCH 02/14] Add a test for a page that doesn't contain manifest_url itself, but does contain an embed/vod-XXXXXXXX url for a page that does contain manifest_url. This test currently fails because the description on the inner page isn't in the OpenGraph format. --- youtube_dl/extractor/pac12.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 5106901b3..004165d72 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -9,7 +9,7 @@ from .common import InfoExtractor class Pac12IE(InfoExtractor): _VALID_URL = r'https?://(?:[a-z]+\.)?pac-12.com/(?:embed/)?(?P.*)' - _TEST = { + _TESTS = [{ 'url': 'https://pac-12.com/videos/2020-pac-12-womens-basketball-media-day-arizona-cal-stanford', 'md5': 'b2e3c0cb99458c8b8e2dc22cb5ac922d', 'info_dict': { @@ -18,7 +18,16 @@ class Pac12IE(InfoExtractor): 'title': '2020 Pac-12 Women\'s Basketball Media Day - Arizona, Cal & Stanford | Pac-12', 'description': 'During the 2020 Pac-12 Women\'s Basketball Media Day, Ros Gold-Onwude moderates a discussion with Arizona\'s Adia Barnes & Aari McDonald, Cal\'s Charmin Smith & Evelien Lutje Schipholt & Stanford\'s Tara VanDerveer & Kiana Williams.', } - } + }, { + 'url': 'https://pac-12.com/article/2020/11/24/sonoran-dog-dish-presented-tums', + 'md5': 'a7a8ac72273b9468924bc058cc220d37', + 'info_dict': { + 'id': 'vod-YLMKpNLZvR0', + 'ext': 'mp4', + 'title': 'Sonoran Dog | The Dish, presented by TUMS | Pac-12', + 'description': 'Pac-12 Networks introduces "The Dish," presented by Tums. Jaymee Sire is bringing fans a closeup to game day treats from around the Conference with each treat connecting to a Pac-12 school, bringing the flavor and recipes fans know and love right to the dish! As Arizona and USC basketball seasons tip off, the first feature item from "The Dish" is the Sonoran Dog, a beloved treat by Trojans & Wildcat fans.', + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -35,7 +44,7 @@ class Pac12IE(InfoExtractor): return None return self.url_result(vod_url) video_url = re.sub(r'\\', '', video_url) - title = self._html_search_regex(r'(.+?)', + title = self._html_search_regex(r'(.+?)', webpage, 'title') if 'vod-' not in url and vod_url is not None: video_id = self._match_id(vod_url) From 92a3cd37a6010abdc844f27038403aab11a16338 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 30 Jan 2021 22:50:18 -0800 Subject: [PATCH 03/14] Look harder for the description. Both tests pass now. --- youtube_dl/extractor/pac12.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 004165d72..2a5020106 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -44,14 +44,18 @@ class Pac12IE(InfoExtractor): return None return self.url_result(vod_url) video_url = re.sub(r'\\', '', video_url) - title = self._html_search_regex(r'(.+?)', - webpage, 'title') if 'vod-' not in url and vod_url is not None: video_id = self._match_id(vod_url) + title = self._html_search_regex(r'(.+?)', + webpage, 'title') + description = self._og_search_description(webpage, default=None) \ + or self._search_regex(r'"description":"(?P[^"]+)"', + webpage, 'description', default="") \ + .encode('utf-8').decode('unicode_escape') return { 'id': video_id, 'title': title, - 'description': self._og_search_description(webpage), + 'description': description, 'url': video_url, 'ext': 'mp4', } From 1ac23b7d8ca862293b4df849ae6d9d284e755364 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 13 Feb 2021 12:27:22 -0800 Subject: [PATCH 04/14] Cleanup for pull request. Both tests still pass, successfully fetching the video and description. --- youtube_dl/extractor/pac12.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 2a5020106..8b46bdc95 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -48,10 +48,12 @@ class Pac12IE(InfoExtractor): video_id = self._match_id(vod_url) title = self._html_search_regex(r'(.+?)', webpage, 'title') - description = self._og_search_description(webpage, default=None) \ - or self._search_regex(r'"description":"(?P[^"]+)"', - webpage, 'description', default="") \ - .encode('utf-8').decode('unicode_escape') + description = self._og_search_description(webpage, default=None) + if description == None: + d = self._search_regex(r'"description":"(?P[^"]+)"', + webpage, 'description', default=None) + if d != None: + description = d.encode('utf-8').decode('unicode_escape') return { 'id': video_id, 'title': title, From ad49e65d1ecf3150c733e329344ba42f9e95ba91 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 13 Feb 2021 12:44:54 -0800 Subject: [PATCH 05/14] Cleanup after running flake8 again. --- youtube_dl/extractor/pac12.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 8b46bdc95..534261e15 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -49,10 +49,10 @@ class Pac12IE(InfoExtractor): title = self._html_search_regex(r'(.+?)', webpage, 'title') description = self._og_search_description(webpage, default=None) - if description == None: + if description is None: d = self._search_regex(r'"description":"(?P[^"]+)"', webpage, 'description', default=None) - if d != None: + if d is not None: description = d.encode('utf-8').decode('unicode_escape') return { 'id': video_id, From fd61f317bf71b91abbc5cd256027877b70f6dbce Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 13 Feb 2021 16:26:33 -0800 Subject: [PATCH 06/14] Step 1 of a rewrite to find and parse embedded JSON instead of just running a regexp over the whole page. This version passes the tests, but more work is needed. --- youtube_dl/extractor/pac12.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 534261e15..07da749d3 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -1,5 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +from __future__ import print_function #XXX +import pprint #XXX import re @@ -32,9 +34,15 @@ class Pac12IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = \ - self._search_regex(r'"manifest_url":"(?Phttps:[^"]+)"', - webpage, 'url', group='url', default=None) + + drupal_settings = self._parse_json( + self._search_regex( + r']+type="application/json"[^>]*data-drupal-selector="drupal-settings-json">([^<]+)', + webpage, 'drupal settings'), video_id) + pprint.pprint(drupal_settings.get('currentVideo')) + + video_url = drupal_settings.get('currentVideo', {}).get('manifest_url') + vod_url = None if (video_url is None) or ('vod-' not in url): vod_url = self._search_regex(r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-[0-9a-zA-Z]+)', From fb35bd50b0d58540102226edb2b62f6f05d90360 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 13 Feb 2021 17:34:34 -0800 Subject: [PATCH 07/14] Finished rewrite to parse JSON as JSON. This looks much better. --- youtube_dl/extractor/pac12.py | 45 ++++++++++++----------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 07da749d3..69447cdf4 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -1,13 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals -from __future__ import print_function #XXX -import pprint #XXX - -import re from .common import InfoExtractor - class Pac12IE(InfoExtractor): _VALID_URL = r'https?://(?:[a-z]+\.)?pac-12.com/(?:embed/)?(?P.*)' @@ -17,8 +12,8 @@ class Pac12IE(InfoExtractor): 'info_dict': { 'id': 'vod-VGQNKGlo9Go', 'ext': 'mp4', - 'title': '2020 Pac-12 Women\'s Basketball Media Day - Arizona, Cal & Stanford | Pac-12', - 'description': 'During the 2020 Pac-12 Women\'s Basketball Media Day, Ros Gold-Onwude moderates a discussion with Arizona\'s Adia Barnes & Aari McDonald, Cal\'s Charmin Smith & Evelien Lutje Schipholt & Stanford\'s Tara VanDerveer & Kiana Williams.', + 'title': '2020 Pac-12 Women\'s Basketball Media Day - Arizona, Cal & Stanford', + 'description': 'During the 2020 Pac-12 Women\'s Basketball Media Day, Ros Gold-Onwude moderates a discussion with Arizona\'s Adia Barnes & Aari McDonald, Cal\'s Charmin Smith & Evelien Lutje Schipholt & Stanford\'s Tara VanDerveer & Kiana Williams. ', } }, { 'url': 'https://pac-12.com/article/2020/11/24/sonoran-dog-dish-presented-tums', @@ -26,7 +21,7 @@ class Pac12IE(InfoExtractor): 'info_dict': { 'id': 'vod-YLMKpNLZvR0', 'ext': 'mp4', - 'title': 'Sonoran Dog | The Dish, presented by TUMS | Pac-12', + 'title': 'Sonoran Dog | The Dish, presented by TUMS', 'description': 'Pac-12 Networks introduces "The Dish," presented by Tums. Jaymee Sire is bringing fans a closeup to game day treats from around the Conference with each treat connecting to a Pac-12 school, bringing the flavor and recipes fans know and love right to the dish! As Arizona and USC basketball seasons tip off, the first feature item from "The Dish" is the Sonoran Dog, a beloved treat by Trojans & Wildcat fans.', } }] @@ -39,33 +34,23 @@ class Pac12IE(InfoExtractor): self._search_regex( r']+type="application/json"[^>]*data-drupal-selector="drupal-settings-json">([^<]+)', webpage, 'drupal settings'), video_id) - pprint.pprint(drupal_settings.get('currentVideo')) - video_url = drupal_settings.get('currentVideo', {}).get('manifest_url') + cv = drupal_settings.get('currentVideo', {}) + manifest_url = cv.get('manifest_url') - vod_url = None - if (video_url is None) or ('vod-' not in url): - vod_url = self._search_regex(r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-[0-9a-zA-Z]+)', - webpage, 'url', default=None) - if video_url is None: + if manifest_url is None: + # Video may be embedded one level deeper + vod_url = self._search_regex( + r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-\w+)', + webpage, 'url', default=None) if vod_url is None: return None return self.url_result(vod_url) - video_url = re.sub(r'\\', '', video_url) - if 'vod-' not in url and vod_url is not None: - video_id = self._match_id(vod_url) - title = self._html_search_regex(r'(.+?)', - webpage, 'title') - description = self._og_search_description(webpage, default=None) - if description is None: - d = self._search_regex(r'"description":"(?P[^"]+)"', - webpage, 'description', default=None) - if d is not None: - description = d.encode('utf-8').decode('unicode_escape') + return { - 'id': video_id, - 'title': title, - 'description': description, - 'url': video_url, + 'id': cv.get('id'), + 'title': cv.get('title'), + 'description': cv.get('description'), + 'url': manifest_url, 'ext': 'mp4', } From d900bd96b4309b8824a5b87c44af4814747e5d58 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 13 Feb 2021 17:36:31 -0800 Subject: [PATCH 08/14] flake8 cleanup --- youtube_dl/extractor/pac12.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 69447cdf4..5619cac9d 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor + class Pac12IE(InfoExtractor): _VALID_URL = r'https?://(?:[a-z]+\.)?pac-12.com/(?:embed/)?(?P.*)' @@ -41,7 +42,7 @@ class Pac12IE(InfoExtractor): if manifest_url is None: # Video may be embedded one level deeper vod_url = self._search_regex( - r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-\w+)', + r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-\w+)', webpage, 'url', default=None) if vod_url is None: return None From 10273fbd22619c5a5055ef49ebfb706402a87949 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 13 Feb 2021 17:49:58 -0800 Subject: [PATCH 09/14] Add fallbacks for id and title, just in case. --- youtube_dl/extractor/pac12.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 5619cac9d..785730cd6 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -49,8 +49,10 @@ class Pac12IE(InfoExtractor): return self.url_result(vod_url) return { - 'id': cv.get('id'), - 'title': cv.get('title'), + 'id': cv.get('id') or video_id, + 'title': (cv.get('title') + or self._html_search_regex(r'(.+?)', + webpage, 'title')), 'description': cv.get('description'), 'url': manifest_url, 'ext': 'mp4', From cec9f4bf3cd9fa8d74d820fe30ff0fc5694ae3a9 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 20 Feb 2021 17:47:48 -0800 Subject: [PATCH 10/14] Add support for live streams on event pages. --- youtube_dl/extractor/pac12.py | 36 ++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 785730cd6..80a5ca18f 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -1,5 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +try: + unicode +except NameError: + unicode = str from .common import InfoExtractor @@ -36,24 +40,42 @@ class Pac12IE(InfoExtractor): r']+type="application/json"[^>]*data-drupal-selector="drupal-settings-json">([^<]+)', webpage, 'drupal settings'), video_id) - cv = drupal_settings.get('currentVideo', {}) - manifest_url = cv.get('manifest_url') + cv = drupal_settings.get('currentVideo') - if manifest_url is None: - # Video may be embedded one level deeper + if cv is False: + # May be an event page; look for the live stream. + try: + network = drupal_settings['pac12_react'][ + 'pac12_react_event_widget']['event'][ + 'broadcast_info']['broadcast_networks'][0]['id'] + cv = drupal_settings['pac12_react']['networks'][str(network)] + except (KeyError, IndexError): + # Can't find a live stream this way. + pass + + if not cv or 'manifest_url' not in cv: + # Video may be embedded one level deeper; look for embed URL. vod_url = self._search_regex( r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-\w+)', webpage, 'url', default=None) if vod_url is None: + # Failure; no video found. return None return self.url_result(vod_url) return { - 'id': cv.get('id') or video_id, + # cv['id'] might be an integer, string, or missing. + 'id': unicode(cv.get('id') or video_id), 'title': (cv.get('title') + or self._html_search_meta( + ['og:title', 'twitter:title', + 'branch.deeplink.title'], webpage) or self._html_search_regex(r'(.+?)', webpage, 'title')), - 'description': cv.get('description'), - 'url': manifest_url, + 'description': (cv.get('description') + or self._html_search_meta( + ['og:description', 'twitter:description', + 'description'], webpage, fatal=False)), + 'url': cv['manifest_url'], 'ext': 'mp4', } From d4512dfd52a2f224d35818fe8f1875bf75f8af29 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Mon, 8 Mar 2021 12:03:50 -0800 Subject: [PATCH 11/14] Coding standard: use compat_str --- youtube_dl/extractor/pac12.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 80a5ca18f..5f6f1692c 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -1,11 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals -try: - unicode -except NameError: - unicode = str from .common import InfoExtractor +from ..compat import compat_str class Pac12IE(InfoExtractor): @@ -65,7 +62,7 @@ class Pac12IE(InfoExtractor): return { # cv['id'] might be an integer, string, or missing. - 'id': unicode(cv.get('id') or video_id), + 'id': compat_str(cv.get('id') or video_id), 'title': (cv.get('title') or self._html_search_meta( ['og:title', 'twitter:title', From 5987eb1302cd15c9e60dd801feb828fcc96270f0 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Mon, 8 Mar 2021 12:24:00 -0800 Subject: [PATCH 12/14] Coding standard: Use try_get. --- youtube_dl/extractor/pac12.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 5f6f1692c..d90904327 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str +from ..utils import try_get class Pac12IE(InfoExtractor): @@ -41,14 +42,12 @@ class Pac12IE(InfoExtractor): if cv is False: # May be an event page; look for the live stream. - try: - network = drupal_settings['pac12_react'][ - 'pac12_react_event_widget']['event'][ - 'broadcast_info']['broadcast_networks'][0]['id'] - cv = drupal_settings['pac12_react']['networks'][str(network)] - except (KeyError, IndexError): - # Can't find a live stream this way. - pass + network = try_get(drupal_settings, lambda x: x['pac12_react'][ + 'pac12_react_event_widget']['event']['broadcast_info'][ + 'broadcast_networks'][0]['id'], int) + if network is not None: + cv = try_get(drupal_settings, lambda x: x['pac12_react' + ]['networks'][str(network)], dict) if not cv or 'manifest_url' not in cv: # Video may be embedded one level deeper; look for embed URL. From f4ef8145efb7481ad8a9f8f52739d2406396bbd1 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Mon, 8 Mar 2021 12:27:53 -0800 Subject: [PATCH 13/14] Make flake8 happy. This way is more readable. --- youtube_dl/extractor/pac12.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index d90904327..7344dae80 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -42,12 +42,15 @@ class Pac12IE(InfoExtractor): if cv is False: # May be an event page; look for the live stream. - network = try_get(drupal_settings, lambda x: x['pac12_react'][ - 'pac12_react_event_widget']['event']['broadcast_info'][ - 'broadcast_networks'][0]['id'], int) + network = try_get(drupal_settings, + lambda x: x['pac12_react'][ + 'pac12_react_event_widget']['event'][ + 'broadcast_info']['broadcast_networks'][0][ + 'id'], int) if network is not None: - cv = try_get(drupal_settings, lambda x: x['pac12_react' - ]['networks'][str(network)], dict) + cv = try_get(drupal_settings, + lambda x: x['pac12_react']['networks'][ + str(network)], dict) if not cv or 'manifest_url' not in cv: # Video may be embedded one level deeper; look for embed URL. From 96a49c78380266888974307f63a35d7acbfba89f Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Wed, 30 Nov 2022 12:13:38 -0800 Subject: [PATCH 14/14] Make the tests pass again The md5sums of the test videos changed at some point. --- youtube_dl/extractor/pac12.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 7344dae80..8e7cbe832 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -11,7 +11,7 @@ class Pac12IE(InfoExtractor): _TESTS = [{ 'url': 'https://pac-12.com/videos/2020-pac-12-womens-basketball-media-day-arizona-cal-stanford', - 'md5': 'b2e3c0cb99458c8b8e2dc22cb5ac922d', + 'md5': 'c134cb64fc884658497690dca50094a3', 'info_dict': { 'id': 'vod-VGQNKGlo9Go', 'ext': 'mp4', @@ -20,7 +20,7 @@ class Pac12IE(InfoExtractor): } }, { 'url': 'https://pac-12.com/article/2020/11/24/sonoran-dog-dish-presented-tums', - 'md5': 'a7a8ac72273b9468924bc058cc220d37', + 'md5': 'a91ae1eaf05cea2c5dbe6c1ab7997cc3', 'info_dict': { 'id': 'vod-YLMKpNLZvR0', 'ext': 'mp4',