Compare commits

...

6 Commits

Author SHA1 Message Date
dirkf
07d7c307a7
Merge 0c1db866dbf9eaff9f3b934d5660d81d8d8eaf6d into 3eb8d22ddb8982ca4fb56bb7a8d6517538bf14c6 2025-04-01 08:57:29 +02:00
dirkf
3eb8d22ddb
[JSInterp] Temporary fix for #33102 2025-03-31 04:21:09 +01:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
mk-pmb
0c1db866db [core] Add format selection criteria longside/shortside, and tests for both (#30737) 2024-05-16 03:00:43 +02:00
mk-pmb
ce031e9d18 [core] Empty format selection string means anything goes. 2024-05-16 02:54:15 +02:00
9 changed files with 150 additions and 16 deletions

View File

@ -0,0 +1,115 @@
#!/usr/bin/env python
# coding: utf-8
"""Tests module for longside/shortside format selector."""
from __future__ import unicode_literals
# Allow direct execution
if __name__ == '__main__':
import os
import sys
repo_dir = os.path.abspath(os.path.join(__file__, '../' * 3))
sys.path.insert(0, repo_dir)
import unittest
from youtube_dl.extractor import YoutubeIE
from test.test_YoutubeDL import YDL, TEST_URL, _make_result
default_common_video_properties = {
'url': TEST_URL,
}
def prepare_formats_info_dict(sizes, common={}):
"""Convert sizes (id, width, height) to info_dict."""
def make_one_format(size):
d = default_common_video_properties.copy()
(d['format_id'], d['width'], d['height']) = size
d.update(common)
return d
info_dict = _make_result([make_one_format(size) for size in sizes])
return info_dict
def pick_format_ids(sizes, criteria):
"""Check which size(s) match the criteria. Return their IDs."""
ydl = YDL({'format': criteria})
yie = YoutubeIE(ydl)
info_dict = prepare_formats_info_dict(sizes)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict.copy())
picked_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
return picked_ids
class TestFormatSelection(unittest.TestCase):
"""Tests class for longside/shortside format selector."""
def test_fmtsel_criteria_longside_shortside(self):
"""Find largest video within upper limits."""
# Feature request: https://github.com/ytdl-org/youtube-dl/issues/30737
crit = {'tendency': '', 'short': '', 'long': ''}
def verify_fmtsel(sizes, want):
crit_all = crit['tendency'] + crit['short'] + crit['long']
picked_ids = pick_format_ids(sizes, crit_all)
self.assertEqual(','.join(picked_ids), want)
sizes_h = [
('A', 256, 144,),
('B', 426, 240,),
('C', 640, 360,),
('D', 854, 480,),
]
sizes_v = [(id, h, w) for (id, w, h) in sizes_h]
self.assertEqual(sizes_v, [
# This list is non-authoritative, merely for readers' reference.
('A', 144, 256,),
('B', 240, 426,),
('C', 360, 640,),
('D', 480, 854,),
])
# def size_by_id(sizes, id):
# return next((s for s in sizes if s[0] == id))
def verify_all_shapes_same(expected_id):
verify_fmtsel(sizes_h, expected_id)
verify_fmtsel(sizes_v, expected_id)
# First, test with no criteria (still empty from initialization above):
verify_fmtsel(sizes_h, 'A')
crit['tendency'] = 'best'
verify_all_shapes_same('D')
crit['long'] = '[longside<=720]'
verify_all_shapes_same('C')
crit['long'] = '[longside<=420]'
verify_all_shapes_same('A')
def shortside_group_1(long, best):
crit['long'] = long
crit['short'] = '[shortside<=720]'
verify_all_shapes_same(best)
crit['short'] = '[shortside<=420]'
verify_all_shapes_same('C')
crit['short'] = '[shortside<=360]'
verify_all_shapes_same('C')
crit['short'] = '[shortside<360]'
verify_all_shapes_same('B')
shortside_group_1(long='', best='D')
shortside_group_1(long='[longside<=720]', best='C')
if __name__ == '__main__':
unittest.main()

View File

@ -136,6 +136,11 @@ class TestFormatSelection(unittest.TestCase):
]
info_dict = _make_result(formats)
ydl = YDL({'format': ''}) # no criteria => anything goes
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], '35')
ydl = YDL({'format': '20/47'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]

View File

@ -361,6 +361,7 @@ class YoutubeDL(object):
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'average_rating', 'comment_count', 'age_limit',
'start_time', 'end_time',
'longside', 'shortside',
'chapter_number', 'season_number', 'episode_number',
'track_number', 'disc_number', 'release_year',
'playlist_index',
@ -1227,7 +1228,7 @@ class YoutubeDL(object):
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*
(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
(?P<key>width|height|shortside|longside|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
$
@ -1309,6 +1310,9 @@ class YoutubeDL(object):
'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
return SyntaxError(message)
if not format_spec:
format_spec = 'worst'
PICKFIRST = 'PICKFIRST'
MERGE = 'MERGE'
SINGLE = 'SINGLE'
@ -1515,6 +1519,8 @@ class YoutubeDL(object):
formats_info[1].get('format_id')),
'width': formats_info[0].get('width'),
'height': formats_info[0].get('height'),
'longside': formats_info[0].get('longside'),
'shortside': formats_info[0].get('shortside'),
'resolution': formats_info[0].get('resolution'),
'fps': formats_info[0].get('fps'),
'vcodec': formats_info[0].get('vcodec'),
@ -1666,6 +1672,17 @@ class YoutubeDL(object):
sanitize_string_field(info_dict, 'id')
sanitize_numeric_fields(info_dict)
def add_calculated_video_proprties(fmt):
if type(fmt) is not dict: return
dims = [fmt.get(side) for side in ('width', 'height',)]
dims = [n for n in dims if n is not None]
if len(dims):
fmt['shortside'] = min(iter(dims))
fmt['longside'] = max(iter(dims))
for fmt in [info_dict] + (info_dict.get('formats') or []):
add_calculated_video_proprties(fmt)
if 'playlist' not in info_dict:
# It isn't part of a playlist
info_dict['playlist'] = None

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频'
IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec'
IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None
_WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader."""

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False:
return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX'
IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{
'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'),
})
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{
'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/']
]
_IE_NAME = 'senate.gov'
IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',

View File

@ -686,6 +686,8 @@ class JSInterpreter(object):
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace):
if obj is JS_Undefined:
return 'undefined'
try:
return json.dumps(obj)
except TypeError: