[cda] Fix extraction (closes #17803, closes #24458, closes #24518, closes #26381)

This commit is contained in:
Sergey M․ 2020-11-24 02:18:40 +07:00
parent d0512ac4c5
commit 37258c644f
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 32 additions and 3 deletions

View File

@ -5,10 +5,16 @@ import codecs
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
compat_urllib_parse_unquote,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts,
multipart_encode, multipart_encode,
parse_duration, parse_duration,
random_birthday, random_birthday,
@ -107,8 +113,9 @@ class CDAIE(InfoExtractor):
r'Odsłony:(?:\s| )*([0-9]+)', webpage, r'Odsłony:(?:\s| )*([0-9]+)', webpage,
'view_count', default=None) 'view_count', default=None)
average_rating = self._search_regex( average_rating = self._search_regex(
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
webpage, 'rating', fatal=False, group='rating_value') r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
group='rating_value')
info_dict = { info_dict = {
'id': video_id, 'id': video_id,
@ -123,6 +130,24 @@ class CDAIE(InfoExtractor):
'age_limit': 18 if need_confirm_age else 0, 'age_limit': 18 if need_confirm_age else 0,
} }
# Source: https://www.cda.pl/js/player.js?t=1606154898
def decrypt_file(a):
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
a = a.replace(p, '')
a = compat_urllib_parse_unquote(a)
b = []
for c in a:
f = compat_ord(c)
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
a = ''.join(b)
a = a.replace('.cda.mp4', '')
for p in ('.2cda.pl', '.3cda.pl'):
a = a.replace(p, '.cda.pl')
if '/upstream' in a:
a = a.replace('/upstream', '.mp4/upstream')
return 'https://' + a
return 'https://' + a + '.mp4'
def extract_format(page, version): def extract_format(page, version):
json_str = self._html_search_regex( json_str = self._html_search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
@ -141,6 +166,8 @@ class CDAIE(InfoExtractor):
video['file'] = codecs.decode(video['file'], 'rot_13') video['file'] = codecs.decode(video['file'], 'rot_13')
if video['file'].endswith('adc.mp4'): if video['file'].endswith('adc.mp4'):
video['file'] = video['file'].replace('adc.mp4', '.mp4') video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
f = { f = {
'url': video['file'], 'url': video['file'],
} }
@ -179,4 +206,6 @@ class CDAIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return info_dict info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts(info_dict, info)