Compare commits

..

17 Commits

Author SHA1 Message Date
fonkap 6f8c2635a5 [StreamsbIE] Add extractor for streamsb.com (viewsb.com) (#31517)
* Add extractor for streamsb.com (viewsb.com)

* make data url using app.js version

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-13 03:54:51 +00:00
fonkap de48105dd8 [KommunetvIE] Add extractor for kommunetv.no (#31516)
* Add extractor for kommunetv.no
* Using utils.update_url instead of regex

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-13 03:54:51 +00:00
fonkap 822f19f05d [FileMoonIE] Add extractor for filemoon.sx (#31515)
---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-13 03:54:51 +00:00
teddy171 33db85c571 [feat]: Add support to external downloader aria2p (#31500)
* feat: add class Aria2pFD

* feat: create call_downloader function

* feat: a colorful download interface to aria2pFD

* feat: change value name

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Typo in suggestion

* fix: remove unused value

* fix: add not function to return value(0 is normal); add total_seconds to download.eta(timedelta object); add waiting status when hook progress

* fix: remove unuse method ..utils.format_bytes

* fix: be up to flake8

* fix: be up to flake8

* Apply suggestions from code review

* [feat] test external downloader aria2p

* [feat] test external downloader aria2p

* [fix] test_external_downloader.py

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update test/test_external_downloader.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update test/test_external_downloader.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update youtube_dl/downloader/external.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* refactoring code and fix bugs

* Apply suggestions from code review

* Rename test_external_downloader.py to test_downloader_external.py

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-13 03:54:51 +00:00
Valentin Metz f33923cba7 [rbgtum] Add new extractor (#31305)
* [rbgtum] Add new extractor

* Small update, force CI

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-13 03:54:51 +00:00
dirkf e8198c517b [YouTube] Fix tests 2023-02-13 03:54:51 +00:00
dirkf bafb6dec72 [YouTube] Refresh compat/utils usage
* import parse_qs()
* import parse_qs in lazy_extractors (clears old TODO)
* clean up old compiled lazy_extractors for Py2
* use update_url()
2023-02-13 03:54:51 +00:00
dirkf 4e04f10499 [compat] Update test_compat
[skip ci]
2023-02-13 03:54:51 +00:00
dirkf 90c9f789d9 [utils] Add parse_qs, update_url
[skip ci]
2023-02-13 03:54:51 +00:00
dirkf 249f2b6316 [compat] Systematise compat_ naming
[skip ci]
2023-02-13 03:54:51 +00:00
dirkf d6b14ba316 [test] Fix TestAgeRestriction
* age restriction may cause DownloadError
* update obsolete test URLs
[skip ci]
2023-02-13 03:54:51 +00:00
dirkf 30e986b834 [YouTube] Add `signatureTimestamp` for age-gate bypass 2023-02-13 03:54:51 +00:00
dirkf 58988c1421 [YouTube] Bypass age-gating for certain restricted videos
* Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client

* Also add and fix tests

* Introduce and use new utility function `update_url()`
2023-02-13 03:54:51 +00:00
dirkf e19ec52322 [Vimeo] Support /user{video_id}/{slug} URL format 2023-02-12 22:16:00 +00:00
dirkf f2f90887ca [Vimeo] Fix `Unable to extract info section` redux
* as reported in yt-dlp/yt-dlp#6149
* also allow newline in target JSON object
2023-02-12 22:16:00 +00:00
dirkf cd987e6fca [jsinterp] Nits 2023-02-12 22:16:00 +00:00
dirkf d947ffe8e3 [IGN] Overhaul extractor to avoid URL redirection loop
Consequently/also:
* centralise video data extraction
* detect 404 and 503 expected errors
* handle the test video in IGNVideo
* handle two additional page formats for the tests in IGNArticle
2023-02-12 22:16:00 +00:00
3 changed files with 288 additions and 120 deletions

View File

@ -1,19 +1,29 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_filter as filter,
compat_HTTPError,
compat_parse_qs, compat_parse_qs,
compat_urllib_parse_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
HEADRequest,
determine_ext, determine_ext,
error_to_compat_str,
extract_attributes,
ExtractorError,
int_or_none, int_or_none,
merge_dicts,
orderedSet,
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
try_get, traverse_obj,
url_or_none,
urljoin,
) )
@ -22,14 +32,102 @@ class IGNBaseIE(InfoExtractor):
return self._download_json( return self._download_json(
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug) 'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
def _checked_call_api(self, slug):
try:
return self._call_api(slug)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
e.cause.args = e.cause.args or [
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
raise ExtractorError(
'Content not found: expired?', cause=e.cause,
expected=True)
raise
def _extract_video_info(self, video, fatal=True):
video_id = video['videoId']
formats = []
refs = traverse_obj(video, 'refs', expected_type=dict) or {}
m3u8_url = url_or_none(refs.get('m3uUrl'))
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
f4m_url = url_or_none(refs.get('f4mUrl'))
if f4m_url:
formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False))
for asset in (video.get('assets') or []):
asset_url = url_or_none(asset.get('url'))
if not asset_url:
continue
formats.append({
'url': asset_url,
'tbr': int_or_none(asset.get('bitrate'), 1000),
'fps': int_or_none(asset.get('frame_rate')),
'height': int_or_none(asset.get('height')),
'width': int_or_none(asset.get('width')),
})
mezzanine_url = traverse_obj(
video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
if mezzanine_url:
formats.append({
'ext': determine_ext(mezzanine_url, 'mp4'),
'format_id': 'mezzanine',
'preference': 1,
'url': mezzanine_url,
})
if formats or fatal:
self._sort_formats(formats)
else:
return
thumbnails = traverse_obj(
video, ('thumbnails', Ellipsis, {'url': 'url'}), expected_type=url_or_none)
tags = traverse_obj(
video, ('tags', Ellipsis, 'displayName'),
expected_type=lambda x: x.strip() or None)
metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
title = traverse_obj(
metadata, 'longTitle', 'title', 'name',
expected_type=lambda x: x.strip() or None)
return {
'id': video_id,
'title': title,
'description': strip_or_none(metadata.get('description')),
'timestamp': parse_iso8601(metadata.get('publishDate')),
'duration': int_or_none(metadata.get('duration')),
'thumbnails': thumbnails,
'formats': formats,
'tags': tags,
}
# yt-dlp shim
@classmethod
def _extract_from_webpage(cls, url, webpage):
for embed_url in orderedSet(
cls._extract_embed_urls(url, webpage) or [], lazy=True):
yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
class IGNIE(IGNBaseIE): class IGNIE(IGNBaseIE):
""" """
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com. Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
Some videos of it.ign.com are also supported Some videos of it.ign.com are also supported
""" """
_VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
_VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)' _PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
_VALID_URL = (
r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
% '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
IE_NAME = 'ign.com' IE_NAME = 'ign.com'
_PAGE_TYPE = 'video' _PAGE_TYPE = 'video'
@ -44,7 +142,10 @@ class IGNIE(IGNBaseIE):
'timestamp': 1370440800, 'timestamp': 1370440800,
'upload_date': '20130605', 'upload_date': '20130605',
'tags': 'count:9', 'tags': 'count:9',
} },
'params': {
'nocheckcertificate': True,
},
}, { }, {
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
'md5': 'f1581a6fe8c5121be5b807684aeac3f6', 'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
@ -56,86 +157,51 @@ class IGNIE(IGNBaseIE):
'timestamp': 1420571160, 'timestamp': 1420571160,
'upload_date': '20150106', 'upload_date': '20150106',
'tags': 'count:4', 'tags': 'count:4',
} },
'skip': '404 Not Found',
}, { }, {
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix', 'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def _extract_embed_urls(cls, url, webpage):
grids = re.findall(
r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
webpage)
return filter(None,
(urljoin(url, m.group('path')) for m in re.finditer(
r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
% cls._VIDEO_PATH_RE, grids[0] if grids else '')))
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
display_id = m.group('id')
if display_id:
return self._extract_video(url, display_id)
display_id = m.group('filt') or 'all'
return self._extract_playlist(url, display_id)
def _extract_playlist(self, url, display_id):
webpage = self._download_webpage(url, display_id)
return self.playlist_result(
(self.url_result(u, ie=self.ie_key())
for u in self._extract_embed_urls(url, webpage)),
playlist_id=display_id)
def _extract_video(self, url, display_id):
display_id = self._match_id(url) display_id = self._match_id(url)
video = self._call_api(display_id) video = self._checked_call_api(display_id)
video_id = video['videoId']
metadata = video['metadata']
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
formats = [] info = self._extract_video_info(video)
refs = video.get('refs') or {}
m3u8_url = refs.get('m3uUrl') return merge_dicts({
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
f4m_url = refs.get('f4mUrl')
if f4m_url:
formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False))
for asset in (video.get('assets') or []):
asset_url = asset.get('url')
if not asset_url:
continue
formats.append({
'url': asset_url,
'tbr': int_or_none(asset.get('bitrate'), 1000),
'fps': int_or_none(asset.get('frame_rate')),
'height': int_or_none(asset.get('height')),
'width': int_or_none(asset.get('width')),
})
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
if mezzanine_url:
formats.append({
'ext': determine_ext(mezzanine_url, 'mp4'),
'format_id': 'mezzanine',
'preference': 1,
'url': mezzanine_url,
})
self._sort_formats(formats)
thumbnails = []
for thumbnail in (video.get('thumbnails') or []):
thumbnail_url = thumbnail.get('url')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
})
tags = []
for tag in (video.get('tags') or []):
display_name = tag.get('displayName')
if not display_name:
continue
tags.append(display_name)
return {
'id': video_id,
'title': title,
'description': strip_or_none(metadata.get('description')),
'timestamp': parse_iso8601(metadata.get('publishDate')),
'duration': int_or_none(metadata.get('duration')),
'display_id': display_id, 'display_id': display_id,
'thumbnails': thumbnails, }, info)
'formats': formats,
'tags': tags,
}
class IGNVideoIE(InfoExtractor): class IGNVideoIE(IGNBaseIE):
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/' _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
_TESTS = [{ _TESTS = [{
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s', 'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
@ -147,7 +213,8 @@ class IGNVideoIE(InfoExtractor):
'description': 'Taking out assassination targets in Hitman has never been more stylish.', 'description': 'Taking out assassination targets in Hitman has never been more stylish.',
'timestamp': 1444665600, 'timestamp': 1444665600,
'upload_date': '20151012', 'upload_date': '20151012',
} },
'expected_warnings': ['HTTP Error 400: Bad Request'],
}, { }, {
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds', 'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True, 'only_matching': True,
@ -167,22 +234,38 @@ class IGNVideoIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed') parsed_url = compat_urlparse.urlparse(url)
url = self._request_webpage(req, video_id).geturl() embed_url = compat_urlparse.urlunparse(
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
webpage, urlh = self._download_webpage_handle(embed_url, video_id)
new_url = urlh.geturl()
ign_url = compat_parse_qs( ign_url = compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('url', [None])[0] compat_urlparse.urlparse(new_url).query).get('url', [None])[-1]
if ign_url: if ign_url:
return self.url_result(ign_url, IGNIE.ie_key()) return self.url_result(ign_url, IGNIE.ie_key())
return self.url_result(url) video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
if not video:
if new_url == url:
raise ExtractorError('Redirect loop: ' + url)
return self.url_result(new_url)
video = extract_attributes(video)
video_data = video.get('data-settings') or '{}'
video_data = self._parse_json(video_data, video_id)['video']
info = self._extract_video_info(video_data)
return merge_dicts({
'display_id': video_id,
}, info)
class IGNArticleIE(IGNBaseIE): class IGNArticleIE(IGNBaseIE):
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)' _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
_PAGE_TYPE = 'article' _PAGE_TYPE = 'article'
_TESTS = [{ _TESTS = [{
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
'info_dict': { 'info_dict': {
'id': '524497489e4e8ff5848ece34', 'id': '72113',
'title': '100 Little Things in GTA 5 That Will Blow Your Mind', 'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
}, },
'playlist': [ 'playlist': [
@ -190,7 +273,7 @@ class IGNArticleIE(IGNBaseIE):
'info_dict': { 'info_dict': {
'id': '5ebbd138523268b93c9141af17bec937', 'id': '5ebbd138523268b93c9141af17bec937',
'ext': 'mp4', 'ext': 'mp4',
'title': 'GTA 5 Video Review', 'title': 'Grand Theft Auto V Video Review',
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
'timestamp': 1379339880, 'timestamp': 1379339880,
'upload_date': '20130916', 'upload_date': '20130916',
@ -200,7 +283,7 @@ class IGNArticleIE(IGNBaseIE):
'info_dict': { 'info_dict': {
'id': '638672ee848ae4ff108df2a296418ee2', 'id': '638672ee848ae4ff108df2a296418ee2',
'ext': 'mp4', 'ext': 'mp4',
'title': '26 Twisted Moments from GTA 5 in Slow Motion', 'title': 'GTA 5 In Slow Motion',
'description': 'The twisted beauty of GTA 5 in stunning slow motion.', 'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
'timestamp': 1386878820, 'timestamp': 1386878820,
'upload_date': '20131212', 'upload_date': '20131212',
@ -208,16 +291,17 @@ class IGNArticleIE(IGNBaseIE):
}, },
], ],
'params': { 'params': {
'playlist_items': '2-3',
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Backend fetch failed'],
}, { }, {
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch', 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
'info_dict': { 'info_dict': {
'id': '53ee806780a81ec46e0790f8', 'id': '53ee806780a81ec46e0790f8',
'title': 'Rewind Theater - Wild Trailer Gamescom 2014', 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
}, },
'playlist_count': 2, 'playlist_count': 1,
'expected_warnings': ['Backend fetch failed'],
}, { }, {
# videoId pattern # videoId pattern
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned', 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
@ -240,18 +324,91 @@ class IGNArticleIE(IGNBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _checked_call_api(self, slug):
try:
return self._call_api(slug)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
e.cause.args = e.cause.args or [
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
if e.cause.code == 404:
raise ExtractorError(
'Content not found: expired?', cause=e.cause,
expected=True)
elif e.cause.code == 503:
self.report_warning(error_to_compat_str(e.cause))
return
raise
def _search_nextjs_data(self, webpage, video_id, **kw):
return self._parse_json(
self._search_regex(
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
webpage, 'next.js data', **kw),
video_id, **kw)
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
article = self._call_api(display_id) article = self._checked_call_api(display_id)
def entries(): if article:
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url']) # obsolete ?
if media_url: def entries():
yield self.url_result(media_url, IGNIE.ie_key()) media_url = traverse_obj(
for content in (article.get('content') or []): article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content): expected_type=url_or_none)
yield self.url_result(video_url) if media_url:
yield self.url_result(media_url, IGNIE.ie_key())
for content in (article.get('content') or []):
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
if url_or_none(video_url):
yield self.url_result(video_url)
return self.playlist_result(
entries(), article.get('articleId'),
traverse_obj(
article, ('metadata', 'headline'),
expected_type=lambda x: x.strip() or None))
webpage = self._download_webpage(url, display_id)
playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
if playlist_id:
def entries():
for m in re.finditer(
r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
webpage):
flashvars = self._search_regex(
r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
m.group('params'), 'flashvars', default='')
flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
v_url = url_or_none((flashvars.get('url') or [None])[-1])
if v_url:
yield self.url_result(v_url)
else:
playlist_id = self._search_regex(
r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
webpage, 'id', group='id', default=None)
nextjs_data = self._search_nextjs_data(webpage, display_id)
def entries():
for player in traverse_obj(
nextjs_data,
('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
# skip promo links (which may not always be served, eg GH CI servers)
if traverse_obj(nextjs_data,
('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
expected_type=dict):
continue
video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
info = self._extract_video_info(video, fatal=False)
if info:
yield merge_dicts({
'display_id': display_id,
}, info)
return self.playlist_result( return self.playlist_result(
entries(), article.get('articleId'), entries(), playlist_id or display_id,
strip_or_none(try_get(article, lambda x: x['metadata']['headline']))) re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)

View File

@ -261,27 +261,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?: (?:
www| www|
player player
) )
\. \.
)? )?
vimeo(?:pro)?\.com/ vimeo(?:pro)?\.com/
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:
(?:.*?/)?? (?P<u>user)|
(?: (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?: (?:.*?/)??
play_redirect_hls| (?P<q>
moogaloop\.swf)\?clip_id= (?:
)? play_redirect_hls|
(?:videos?/)? moogaloop\.swf)\?clip_id=
(?P<id>[0-9]+) )?
(?:/(?P<unlisted_hash>[\da-f]{10}))? (?:videos?/)?
/?(?:[?&].*)?(?:[#].*)?$ )
''' (?P<id>[0-9]+)
(?(u)
/(?!videos|likes)[^/?#]+/?|
(?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
)
(?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
'''
IE_NAME = 'vimeo' IE_NAME = 'vimeo'
_TESTS = [ _TESTS = [
{ {
@ -539,7 +545,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} },
{
# user playlist alias -> https://vimeo.com/258705797
'url': 'https://vimeo.com/user26785108/newspiritualguide',
'only_matching': True,
},
# https://gettingthingsdone.com/workflowmap/ # https://gettingthingsdone.com/workflowmap/
# vimeo embed with check-password page protected by Referer header # vimeo embed with check-password page protected by Referer header
] ]
@ -663,7 +674,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
if '//player.vimeo.com/video/' in url: if '//player.vimeo.com/video/' in url:
config = self._parse_json(self._search_regex( config = self._parse_json(self._search_regex(
r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id)
if config.get('view') == 4: if config.get('view') == 4:
config = self._verify_player_video_password( config = self._verify_player_video_password(
redirect_url, video_id, headers) redirect_url, video_id, headers)

View File

@ -201,7 +201,7 @@ class JSInterpreter(object):
def __init__(self, msg, *args, **kwargs): def __init__(self, msg, *args, **kwargs):
expr = kwargs.pop('expr', None) expr = kwargs.pop('expr', None)
if expr is not None: if expr is not None:
msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100]) msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs) super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
class JS_RegExp(object): class JS_RegExp(object):
@ -699,7 +699,7 @@ class JSInterpreter(object):
""" assert, but without risk of getting optimized out """ """ assert, but without risk of getting optimized out """
if not cndn: if not cndn:
memb = member memb = member
raise self.Exception('{member} {msg}'.format(**locals()), expr=expr) raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
def eval_method(): def eval_method():
if (variable, member) == ('console', 'debug'): if (variable, member) == ('console', 'debug'):