Compare commits

...

15 Commits

Author SHA1 Message Date
Timothy Mann
a367187d48
Merge 96a49c7838 into e1b3fa242c 2024-07-28 01:19:34 +09:00
Tim Mann
96a49c7838 Make the tests pass again
The md5sums of the test videos changed at some point.
2022-11-30 12:13:38 -08:00
Tim Mann
f4ef8145ef Make flake8 happy. This way is more readable. 2021-03-08 12:27:53 -08:00
Tim Mann
5987eb1302 Coding standard: Use try_get. 2021-03-08 12:24:00 -08:00
Tim Mann
d4512dfd52 Coding standard: use compat_str 2021-03-08 12:03:50 -08:00
Tim Mann
cec9f4bf3c Add support for live streams on event pages. 2021-02-20 17:47:48 -08:00
Tim Mann
10273fbd22 Add fallbacks for id and title, just in case. 2021-02-13 17:49:58 -08:00
Tim Mann
d900bd96b4 flake8 cleanup 2021-02-13 17:36:31 -08:00
Tim Mann
fb35bd50b0 Finished rewrite to parse JSON as JSON. This looks much better. 2021-02-13 17:34:34 -08:00
Tim Mann
fd61f317bf Step 1 of a rewrite to find and parse embedded JSON instead of just
running a regexp over the whole page.  This version passes the tests,
but more work is needed.
2021-02-13 16:26:33 -08:00
Tim Mann
ad49e65d1e Cleanup after running flake8 again. 2021-02-13 12:44:54 -08:00
Tim Mann
1ac23b7d8c Cleanup for pull request. Both tests still pass, successfully
fetching the video and description.
2021-02-13 12:27:22 -08:00
Tim Mann
92a3cd37a6 Look harder for the description. Both tests pass now. 2021-01-30 22:50:18 -08:00
Tim Mann
f5e049fbc2 Add a test for a page that doesn't contain manifest_url itself, but
does contain an embed/vod-XXXXXXXX url for a page that does contain
manifest_url.  This test currently fails because the description on
the inner page isn't in the OpenGraph format.
2021-01-30 22:11:21 -08:00
Tim Mann
691d549938 youtube-dl support for https://pac-12.com.
Generally works on pages that have a free video, though in this commit
getting the description sometimes fails.  Does not work on the pages
that require a TV provider login.
2021-01-30 21:33:24 -08:00
2 changed files with 81 additions and 0 deletions

View File

@ -907,6 +907,7 @@ from .orf import (
ORFRadioCollectionIE,
)
from .outsidetv import OutsideTVIE
from .pac12 import Pac12IE
from .packtpub import (
PacktPubIE,
PacktPubCourseIE,

View File

@ -0,0 +1,80 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import try_get
class Pac12IE(InfoExtractor):
_VALID_URL = r'https?://(?:[a-z]+\.)?pac-12.com/(?:embed/)?(?P<id>.*)'
_TESTS = [{
'url': 'https://pac-12.com/videos/2020-pac-12-womens-basketball-media-day-arizona-cal-stanford',
'md5': 'c134cb64fc884658497690dca50094a3',
'info_dict': {
'id': 'vod-VGQNKGlo9Go',
'ext': 'mp4',
'title': '2020 Pac-12 Women\'s Basketball Media Day - Arizona, Cal & Stanford',
'description': 'During the 2020 Pac-12 Women\'s Basketball Media Day, Ros Gold-Onwude moderates a discussion with Arizona\'s Adia Barnes & Aari McDonald, Cal\'s Charmin Smith & Evelien Lutje Schipholt & Stanford\'s Tara VanDerveer & Kiana Williams. ',
}
}, {
'url': 'https://pac-12.com/article/2020/11/24/sonoran-dog-dish-presented-tums',
'md5': 'a91ae1eaf05cea2c5dbe6c1ab7997cc3',
'info_dict': {
'id': 'vod-YLMKpNLZvR0',
'ext': 'mp4',
'title': 'Sonoran Dog | The Dish, presented by TUMS',
'description': 'Pac-12 Networks introduces "The Dish," presented by Tums. Jaymee Sire is bringing fans a closeup to game day treats from around the Conference with each treat connecting to a Pac-12 school, bringing the flavor and recipes fans know and love right to the dish! As Arizona and USC basketball seasons tip off, the first feature item from "The Dish" is the Sonoran Dog, a beloved treat by Trojans & Wildcat fans.',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
drupal_settings = self._parse_json(
self._search_regex(
r'<script[^>]+type="application/json"[^>]*data-drupal-selector="drupal-settings-json">([^<]+)</script>',
webpage, 'drupal settings'), video_id)
cv = drupal_settings.get('currentVideo')
if cv is False:
# May be an event page; look for the live stream.
network = try_get(drupal_settings,
lambda x: x['pac12_react'][
'pac12_react_event_widget']['event'][
'broadcast_info']['broadcast_networks'][0][
'id'], int)
if network is not None:
cv = try_get(drupal_settings,
lambda x: x['pac12_react']['networks'][
str(network)], dict)
if not cv or 'manifest_url' not in cv:
# Video may be embedded one level deeper; look for embed URL.
vod_url = self._search_regex(
r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-\w+)',
webpage, 'url', default=None)
if vod_url is None:
# Failure; no video found.
return None
return self.url_result(vod_url)
return {
# cv['id'] might be an integer, string, or missing.
'id': compat_str(cv.get('id') or video_id),
'title': (cv.get('title')
or self._html_search_meta(
['og:title', 'twitter:title',
'branch.deeplink.title'], webpage)
or self._html_search_regex(r'<title>(.+?)</title>',
webpage, 'title')),
'description': (cv.get('description')
or self._html_search_meta(
['og:description', 'twitter:description',
'description'], webpage, fatal=False)),
'url': cv['manifest_url'],
'ext': 'mp4',
}