Compare commits

...

5 Commits

Author SHA1 Message Date
kerrynich
5ad9d9ac05
Merge ab6ade9c4c0c0ef3f3c5ceb3f3c03619d1899309 into 3eb8d22ddb8982ca4fb56bb7a8d6517538bf14c6 2025-04-01 08:56:04 +02:00
dirkf
3eb8d22ddb
[JSInterp] Temporary fix for #33102 2025-03-31 04:21:09 +01:00
dirkf
4e714f9df1 [Misc] Correct [_]IE_DESC/NAME in a few IEs
* thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c
* also add documenting comment in `InfoExtractor`
2025-03-26 12:47:19 +00:00
dirkf
c1ea7f5a24 [ITV] Mark ITVX not working
* update old shim
* correct [_]IE_DESC
2025-03-26 12:17:49 +00:00
kerry.nich
ab6ade9c4c Added progress hook updates for postprocessing 2022-03-22 19:19:38 -04:00
13 changed files with 116 additions and 18 deletions

View File

@ -266,11 +266,13 @@ class YoutubeDL(object):
postprocessor. postprocessor.
progress_hooks: A list of functions that get called on download progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries progress, with a dictionary with the entries
* status: One of "downloading", "error", or "finished". * status: One of "downloading", "error", "finished",
or "postprocessed".
Check this first and ignore unknown values. Check this first and ignore unknown values.
If status is one of "downloading", or "finished", the If status is one of "downloading", "finished", or
following properties may also be present: "postprocessed", the following properties may also be
present:
* filename: The final filename (always present) * filename: The final filename (always present)
* tmpfilename: The filename we're currently writing to * tmpfilename: The filename we're currently writing to
* downloaded_bytes: Bytes on disk * downloaded_bytes: Bytes on disk
@ -285,6 +287,9 @@ class YoutubeDL(object):
downloaded video fragment. downloaded video fragment.
* fragment_count: The number of fragments (= individual * fragment_count: The number of fragments (= individual
files that will be merged) files that will be merged)
* postprocessor: The specific postprocessor that ran.
See youtube_dl/postprocessor/__init__.py
for a list of possibilities.
Progress hooks are guaranteed to be called at least once Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful. (with status "finished") if the download is successful.
@ -2318,6 +2323,8 @@ class YoutubeDL(object):
pps_chain.extend(ie_info['__postprocessors']) pps_chain.extend(ie_info['__postprocessors'])
pps_chain.extend(self._pps) pps_chain.extend(self._pps)
for pp in pps_chain: for pp in pps_chain:
for ph in self._progress_hooks:
pp.add_progress_hook(ph)
files_to_delete = [] files_to_delete = []
try: try:
files_to_delete, info = pp.run(info) files_to_delete, info = pp.run(info)

View File

@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{

View File

@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor): class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec' IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac', 'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@ -422,6 +422,8 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None _GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None _GEO_IP_BLOCKS = None
_WORKING = True _WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None): def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader.""" """Constructor. Receives an optional downloader."""

View File

@ -35,15 +35,6 @@ from ..utils import (
class ITVBaseIE(InfoExtractor): class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True): def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False: if errnote is False:
return False return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE): class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
_IE_DESC = 'ITVX' IE_DESC = 'ITVX'
_WORKING = False
_TESTS = [{ _TESTS = [{
'note': 'Hub URLs redirect to ITVX', 'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012', 'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'), 'ext': determine_ext(href, 'vtt'),
}) })
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}') next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {}) video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle') title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title) info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE): class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
_IE_DESC = 'ITV articles: News, British Touring Car Championship' IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{ _TESTS = [{
'note': 'British Touring Car Championship', 'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch', 'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/'] ['arch', '', 'http://ussenate-f.akamaihd.net/']
] ]
_IE_NAME = 'senate.gov' IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',

View File

@ -686,6 +686,8 @@ class JSInterpreter(object):
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e) raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace): def _dump(self, obj, namespace):
if obj is JS_Undefined:
return 'undefined'
try: try:
return json.dumps(obj) return json.dumps(obj)
except TypeError: except TypeError:

View File

@ -33,6 +33,7 @@ class PostProcessor(object):
def __init__(self, downloader=None): def __init__(self, downloader=None):
self._downloader = downloader self._downloader = downloader
self._progress_hooks = []
def set_downloader(self, downloader): def set_downloader(self, downloader):
"""Sets the downloader for this PP.""" """Sets the downloader for this PP."""
@ -64,6 +65,15 @@ class PostProcessor(object):
def _configuration_args(self, default=[]): def _configuration_args(self, default=[]):
return cli_configuration_args(self._downloader.params, 'postprocessor_args', default) return cli_configuration_args(self._downloader.params, 'postprocessor_args', default)
def _hook_progress(self, status):
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
# See YoutubeDl.py (search for progress_hooks) for a description of
# this interface
self._progress_hooks.append(ph)
class AudioConversionError(PostProcessingError): class AudioConversionError(PostProcessingError):
pass pass

View File

@ -130,4 +130,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
else: else:
raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.') raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
fsize = os.path.getsize(encodeFilename(filename))
self._hook_progress({
'total_bytes': fsize,
'filename': encodeFilename(filename),
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [], info return [], info

View File

@ -27,5 +27,9 @@ class ExecAfterDownloadPP(PostProcessor):
if retCode != 0: if retCode != 0:
raise PostProcessingError( raise PostProcessingError(
'Command returned error code %d' % retCode) 'Command returned error code %d' % retCode)
self._hook_progress({
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [], information return [], information

View File

@ -342,6 +342,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
new_path, time.time(), information['filetime'], new_path, time.time(), information['filetime'],
errnote='Cannot update utime of audio file') errnote='Cannot update utime of audio file')
fsize = os.path.getsize(new_path)
self._hook_progress({
'total_bytes': fsize,
'filename': new_path,
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [path], information return [path], information
@ -365,6 +373,13 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
information['filepath'] = outpath information['filepath'] = outpath
information['format'] = self._preferedformat information['format'] = self._preferedformat
information['ext'] = self._preferedformat information['ext'] = self._preferedformat
fsize = os.path.getsize(outpath)
self._hook_progress({
'total_bytes': fsize,
'filename': outpath,
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [path], information return [path], information
@ -422,6 +437,13 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
fsize = os.path.getsize(encodeFilename(filename))
self._hook_progress({
'total_bytes': fsize,
'filename': encodeFilename(filename),
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return sub_filenames, information return sub_filenames, information
@ -507,6 +529,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
os.remove(metadata_filename) os.remove(metadata_filename)
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
fsize = os.path.getsize(encodeFilename(filename))
self._hook_progress({
'total_bytes': fsize,
'filename': encodeFilename(filename),
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [], info return [], info
@ -518,6 +547,13 @@ class FFmpegMergerPP(FFmpegPostProcessor):
self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
fsize = os.path.getsize(encodeFilename(filename))
self._hook_progress({
'total_bytes': fsize,
'filename': encodeFilename(filename),
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return info['__files_to_merge'], info return info['__files_to_merge'], info
def can_merge(self): def can_merge(self):
@ -553,6 +589,13 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
fsize = os.path.getsize(encodeFilename(filename))
self._hook_progress({
'total_bytes': fsize,
'filename': encodeFilename(filename),
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [], info return [], info
@ -571,6 +614,13 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
fsize = os.path.getsize(encodeFilename(filename))
self._hook_progress({
'total_bytes': fsize,
'filename': encodeFilename(filename),
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [], info return [], info
@ -587,6 +637,14 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor):
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
fsize = os.path.getsize(encodeFilename(filename))
self._hook_progress({
'total_bytes': fsize,
'filename': encodeFilename(filename),
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [], info return [], info
@ -650,4 +708,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
'data': f.read(), 'data': f.read(),
} }
self._hook_progress({
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return sub_filenames, info return sub_filenames, info

View File

@ -46,5 +46,9 @@ class MetadataFromTitlePP(PostProcessor):
self._downloader.to_screen( self._downloader.to_screen(
'[fromtitle] parsed %s: %s' '[fromtitle] parsed %s: %s'
% (attribute, value if value is not None else 'NA')) % (attribute, value if value is not None else 'NA'))
self._hook_progress({
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [], info return [], info

View File

@ -55,6 +55,11 @@ class XAttrMetadataPP(PostProcessor):
write_xattr(filename, xattrname, byte_value) write_xattr(filename, xattrname, byte_value)
num_written += 1 num_written += 1
self._hook_progress({
'status': 'postprocessed',
'postprocessor': self.__class__.__name__
})
return [], info return [], info
except XAttrUnavailableError as e: except XAttrUnavailableError as e: