From fb35bd50b0d58540102226edb2b62f6f05d90360 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sat, 13 Feb 2021 17:34:34 -0800 Subject: [PATCH] Finished rewrite to parse JSON as JSON. This looks much better. --- youtube_dl/extractor/pac12.py | 45 ++++++++++++----------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py index 07da749d3..69447cdf4 100644 --- a/youtube_dl/extractor/pac12.py +++ b/youtube_dl/extractor/pac12.py @@ -1,13 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals -from __future__ import print_function #XXX -import pprint #XXX - -import re from .common import InfoExtractor - class Pac12IE(InfoExtractor): _VALID_URL = r'https?://(?:[a-z]+\.)?pac-12.com/(?:embed/)?(?P.*)' @@ -17,8 +12,8 @@ class Pac12IE(InfoExtractor): 'info_dict': { 'id': 'vod-VGQNKGlo9Go', 'ext': 'mp4', - 'title': '2020 Pac-12 Women\'s Basketball Media Day - Arizona, Cal & Stanford | Pac-12', - 'description': 'During the 2020 Pac-12 Women\'s Basketball Media Day, Ros Gold-Onwude moderates a discussion with Arizona\'s Adia Barnes & Aari McDonald, Cal\'s Charmin Smith & Evelien Lutje Schipholt & Stanford\'s Tara VanDerveer & Kiana Williams.', + 'title': '2020 Pac-12 Women\'s Basketball Media Day - Arizona, Cal & Stanford', + 'description': 'During the 2020 Pac-12 Women\'s Basketball Media Day, Ros Gold-Onwude moderates a discussion with Arizona\'s Adia Barnes & Aari McDonald, Cal\'s Charmin Smith & Evelien Lutje Schipholt & Stanford\'s Tara VanDerveer & Kiana Williams. ', } }, { 'url': 'https://pac-12.com/article/2020/11/24/sonoran-dog-dish-presented-tums', @@ -26,7 +21,7 @@ class Pac12IE(InfoExtractor): 'info_dict': { 'id': 'vod-YLMKpNLZvR0', 'ext': 'mp4', - 'title': 'Sonoran Dog | The Dish, presented by TUMS | Pac-12', + 'title': 'Sonoran Dog | The Dish, presented by TUMS', 'description': 'Pac-12 Networks introduces "The Dish," presented by Tums. Jaymee Sire is bringing fans a closeup to game day treats from around the Conference with each treat connecting to a Pac-12 school, bringing the flavor and recipes fans know and love right to the dish! As Arizona and USC basketball seasons tip off, the first feature item from "The Dish" is the Sonoran Dog, a beloved treat by Trojans & Wildcat fans.', } }] @@ -39,33 +34,23 @@ class Pac12IE(InfoExtractor): self._search_regex( r']+type="application/json"[^>]*data-drupal-selector="drupal-settings-json">([^<]+)', webpage, 'drupal settings'), video_id) - pprint.pprint(drupal_settings.get('currentVideo')) - video_url = drupal_settings.get('currentVideo', {}).get('manifest_url') + cv = drupal_settings.get('currentVideo', {}) + manifest_url = cv.get('manifest_url') - vod_url = None - if (video_url is None) or ('vod-' not in url): - vod_url = self._search_regex(r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-[0-9a-zA-Z]+)', - webpage, 'url', default=None) - if video_url is None: + if manifest_url is None: + # Video may be embedded one level deeper + vod_url = self._search_regex( + r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-\w+)', + webpage, 'url', default=None) if vod_url is None: return None return self.url_result(vod_url) - video_url = re.sub(r'\\', '', video_url) - if 'vod-' not in url and vod_url is not None: - video_id = self._match_id(vod_url) - title = self._html_search_regex(r'(.+?)', - webpage, 'title') - description = self._og_search_description(webpage, default=None) - if description is None: - d = self._search_regex(r'"description":"(?P[^"]+)"', - webpage, 'description', default=None) - if d is not None: - description = d.encode('utf-8').decode('unicode_escape') + return { - 'id': video_id, - 'title': title, - 'description': description, - 'url': video_url, + 'id': cv.get('id'), + 'title': cv.get('title'), + 'description': cv.get('description'), + 'url': manifest_url, 'ext': 'mp4', }