Compare commits

...

4 Commits

Author SHA1 Message Date
Remita Amine
3bb7769c40 [wat] remove unused variable 2021-01-21 17:22:30 +01:00
Remita Amine
8d286bd5b6 [wat] fix format extraction(closes #27901) 2021-01-21 17:20:32 +01:00
Remita Amine
cff72b4cc0 [americastestkitchen] improve season extraction 2021-01-21 16:47:55 +01:00
Brian Marks
657221c81d
[americastestkitchen] Add support for downloading entire seasons (#27861) 2021-01-21 15:46:29 +00:00
3 changed files with 84 additions and 52 deletions

View File

@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
@ -90,3 +91,69 @@ class AmericasTestKitchenIE(InfoExtractor):
'series': try_get(episode, lambda x: x['show']['title']),
'episode': episode.get('title'),
}
class AmericasTestKitchenSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
_TESTS = [{
# ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
'info_dict': {
'id': 'season_1',
'title': 'Season 1',
},
'playlist_count': 13,
}, {
# Cooks Country Season
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'info_dict': {
'id': 'season_12',
'title': 'Season 12',
},
'playlist_count': 13,
}]
def _real_extract(self, url):
show_name, season_number = re.match(self._VALID_URL, url).groups()
season_number = int(season_number)
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
season = 'Season %d' % season_number
season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={
'Origin': 'https://www.%s.com' % show_name,
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={
'facetFilters': json.dumps([
'search_season_list:' + season,
'search_document_klass:episode',
'search_show_slug:' + slug,
]),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
'attributesToHighlight': '',
'hitsPerPage': 1000,
})
def entries():
for episode in (season_search.get('hits') or []):
search_url = episode.get('search_url')
if not search_url:
continue
yield {
'_type': 'url',
'url': 'https://www.%s.com%s' % (show_name, search_url),
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
'title': episode.get('title'),
'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')),
'season_number': season_number,
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
'ie_key': AmericasTestKitchenIE.ie_key(),
}
return self.playlist_result(
entries(), 'season_%d' % season_number, season)

View File

@ -42,7 +42,10 @@ from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
from .amcnetworks import AMCNetworksIE
from .americastestkitchen import AmericasTestKitchenIE
from .americastestkitchen import (
AmericasTestKitchenIE,
AmericasTestKitchenSeasonIE,
)
from .animeondemand import AnimeOnDemandIE
from .anvato import AnvatoIE
from .aol import AolIE

View File

@ -1,12 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
unified_strdate,
HEADRequest,
int_or_none,
@ -46,15 +43,6 @@ class WatIE(InfoExtractor):
},
]
_FORMATS = (
(200, 416, 234),
(400, 480, 270),
(600, 640, 360),
(1200, 640, 360),
(1800, 960, 540),
(2500, 1280, 720),
)
def _real_extract(self, url):
video_id = self._match_id(url)
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
@ -97,46 +85,20 @@ class WatIE(InfoExtractor):
return red_url
return None
def remove_bitrate_limit(manifest_url):
return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
formats = []
try:
alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
m3u8_url = manifest_urls.get('hls')
if m3u8_url:
m3u8_url = remove_bitrate_limit(m3u8_url)
for m3u8_alt_url in alt_urls(m3u8_url):
formats.extend(self._extract_m3u8_formats(
m3u8_alt_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
mpd_url = manifest_urls.get('mpd')
if mpd_url:
mpd_url = remove_bitrate_limit(mpd_url)
for mpd_alt_url in alt_urls(mpd_url):
formats.extend(self._extract_mpd_formats(
mpd_alt_url, video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
except ExtractorError:
abr = 64
for vbr, width, height in self._FORMATS:
tbr = vbr + abr
format_id = 'http-%s' % tbr
fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
if self._is_valid_url(fmt_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': fmt_url,
'vbr': vbr,
'abr': abr,
'width': width,
'height': height,
})
manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
m3u8_url = manifest_urls.get('hls')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
mpd_url = manifest_urls.get('mpd')
if mpd_url:
formats.extend(self._extract_mpd_formats(
mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
upload_date = unified_strdate(date_diffusion) if date_diffusion else None