From aef24d97e9c50cd9db06349b2b25c7f623baf932 Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 29 Feb 2024 14:12:37 +0000 Subject: [PATCH] [Videa] Align with yt-dlp --- youtube_dl/extractor/videa.py | 39 +++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index 4589e78a1..0689764a5 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -6,22 +6,31 @@ import re import string from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_ord, + compat_struct_pack, +) from ..utils import ( ExtractorError, int_or_none, mimetype2ext, parse_codecs, + parse_qs, update_url_query, urljoin, xpath_element, xpath_text, ) -from ..compat import ( - compat_b64decode, - compat_ord, - compat_struct_pack, - compat_urlparse, -) + + +def compat_random_choices(population, *args, **kwargs): + # weights=None, *, cum_weights=None, k=1 + # limited implementation needed here + weights = args[0] if args else kwargs.get('weights') + assert all(w is None for w in (weights, kwargs.get('cum_weights'))) + k = kwargs.get('k', 1) + return ''.join(random.choice(population) for _ in range(k)) class VideaIE(InfoExtractor): @@ -35,6 +44,7 @@ class VideaIE(InfoExtractor): ) (?P[^?#&]+) ''' + _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1'] _TESTS = [{ 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ', 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', @@ -80,11 +90,14 @@ class VideaIE(InfoExtractor): }] _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' - @staticmethod - def _extract_urls(webpage): - return [url for _, url in re.findall( - r']+src=(["\'])(?P(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', - webpage)] + @classmethod + def _extract_urls(cls, webpage): + def yield_urls(): + for pattern in cls._EMBED_REGEX: + for m in re.finditer(pattern, webpage): + yield m.group('url') + + return list(yield_urls()) @staticmethod def rc4(cipher_text, key): @@ -130,8 +143,8 @@ class VideaIE(InfoExtractor): for i in range(0, 32): result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query) - random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) + query = parse_qs(player_url) + random_seed = ''.join(compat_random_choices(string.ascii_letters + string.digits, k=8)) query['_s'] = random_seed query['_t'] = result[:16]