From f19d8c988576379685fefa569df8d521c7e5d459 Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Sun, 13 Dec 2020 16:21:07 +0100 Subject: [PATCH 1/9] Added support for The Royal Theatre in Denmark, kglteater.dk --- youtube_dl/extractor/twentythreevideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index dc5609192..c512f6fca 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -8,7 +8,7 @@ from ..utils import int_or_none class TwentyThreeVideoIE(InfoExtractor): IE_NAME = '23video' - _VALID_URL = r'https?://(?P[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' + _VALID_URL = r'https?://(?P[^.]+\.(?:twentythree\.net|kglteater\.dk|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' _TESTS = [{ 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1', 'md5': '75fcf216303eb1dae9920d651f85ced4', From f630222253b62f2098a0fec42be10be7d168320a Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Tue, 22 Dec 2020 12:26:04 +0100 Subject: [PATCH 2/9] computer change --- youtube_dl/extractor/twentythreevideo.py | 29 +++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index c512f6fca..aad4ebab3 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -21,20 +21,43 @@ class TwentyThreeVideoIE(InfoExtractor): 'uploader_id': '12258964', 'uploader': 'Rasmus Bysted', } - }, { + }, + { 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620', 'only_matching': True, + }, + { + 'url': 'https://video.kglteater.dk/v.ihtml/player.html?source=share&photo%5fid=65098499', + 'only_matching': True, }] def _real_extract(self, url): domain, query, photo_id = re.match(self._VALID_URL, url).groups() base_url = 'https://%s' % domain + print(base_url + '/api/protection/verify') + # https://video.kglteater.dk/api/protection/verify?callback=visualplatform_1&protection_method=geoblocking&object_id=65550896&object_type=photo&verification_data=&format=json + # https://video.kglteater.dk/api/protection/verify + + player_settings = self._download_json( + base_url + '/api/protection/verify', + photo_id, + query={ 'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1' }, + transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'protectedtoken'))['protectedtoken']['protected_token'] + + + token = self._download_json( + base_url + '/api/protection/verify', + photo_id, + query={ 'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1' }, + transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'protectedtoken'))['protectedtoken']['protected_token'] + + print(token); + photo_data = self._download_json( base_url + '/api/photo/list?' + query, photo_id, query={ - 'format': 'json', + 'format': 'json', 'token': token }, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo'] title = photo_data['title'] - formats = [] audio_path = photo_data.get('audio_download') From 17a9592f29569d47940bb9461fe849029c682751 Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Tue, 22 Dec 2020 14:52:47 +0100 Subject: [PATCH 3/9] Geoblocking detection for 23video --- youtube_dl/extractor/twentythreevideo.py | 50 +++++++++++++++++------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index aad4ebab3..bfe516156 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import int_or_none +from ..compat import compat_urllib_parse_urlencode class TwentyThreeVideoIE(InfoExtractor): @@ -35,23 +36,44 @@ class TwentyThreeVideoIE(InfoExtractor): domain, query, photo_id = re.match(self._VALID_URL, url).groups() base_url = 'https://%s' % domain print(base_url + '/api/protection/verify') - # https://video.kglteater.dk/api/protection/verify?callback=visualplatform_1&protection_method=geoblocking&object_id=65550896&object_type=photo&verification_data=&format=json - # https://video.kglteater.dk/api/protection/verify + + def is_geo_blocked(): + # /api/player/settings + playersettings_0 = { 'player_id': 0, 'parameters': 'showDescriptions=0&source=site&photo%5fid=' + photo_id + '&autoPlay=1', '_li': 0, '_bot': 0 } + playersettings_0_param = '/api/player/settings?' + compat_urllib_parse_urlencode(playersettings_0) + + # /api/live/list + livelist_1 = { 'include_actions_p': 1, 'showDescriptions': 0, 'source': 'site', 'photo_id': photo_id, 'autoPlay': 1, 'upcoming_p' : 1, 'ordering': 'streaming', 'player_id': 0 } + livelist_1_param = '/api/live/list?' + compat_urllib_parse_urlencode(livelist_1) + + # /api/photo/list + photolist_2 = { 'size':10, 'include_actions_p': 1, 'showDescriptions': 0, 'source': 'site', 'photo_id': photo_id, 'autoPlay':1,'player_id':0 } + photolist_2_param = '/api/photo/list?' + compat_urllib_parse_urlencode(photolist_2) + + new_query = query={ 'format': 'json', 'callback': 'test', 'playersettings_0': playersettings_0_param, 'livelist_1': livelist_1_param, 'photolist_2': photolist_2_param } + photolist_result = self._download_json( + base_url + '/api/concatenate', + photo_id, + query = new_query, + transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photolist_2') + )["photolist_2"]["photos"] + + for photo in photolist_result: + if photo['photo_id'] == photo_id: + return photo['protection_method'] == 'geoblocking' - player_settings = self._download_json( - base_url + '/api/protection/verify', - photo_id, - query={ 'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1' }, - transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'protectedtoken'))['protectedtoken']['protected_token'] + return false + + video_query = { 'format': 'json' } + if(is_geo_blocked()): + token = self._download_json( + base_url + '/api/protection/verify', + photo_id, + query={ 'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1' }, + transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'protectedtoken'))['protectedtoken']['protected_token'] + video_query = { 'format': 'json', token: token} - token = self._download_json( - base_url + '/api/protection/verify', - photo_id, - query={ 'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1' }, - transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'protectedtoken'))['protectedtoken']['protected_token'] - - print(token); photo_data = self._download_json( base_url + '/api/photo/list?' + query, photo_id, query={ From c63cbdb4431966d931ccf12fdf0d07601e6ab3d4 Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Tue, 22 Dec 2020 15:13:26 +0100 Subject: [PATCH 4/9] Cleaned up token retrieval in case of geo block --- youtube_dl/extractor/twentythreevideo.py | 46 +++++++++++++----------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index bfe516156..9035fe5d2 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -56,29 +56,33 @@ class TwentyThreeVideoIE(InfoExtractor): photo_id, query = new_query, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photolist_2') - )["photolist_2"]["photos"] - - for photo in photolist_result: - if photo['photo_id'] == photo_id: - return photo['protection_method'] == 'geoblocking' - - return false - - video_query = { 'format': 'json' } - if(is_geo_blocked()): - token = self._download_json( - base_url + '/api/protection/verify', - photo_id, - query={ 'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1' }, - transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'protectedtoken'))['protectedtoken']['protected_token'] - - video_query = { 'format': 'json', token: token} - + )["photolist_2"] + if "photos" in photolist_result: + for photo in photolist_result['photos']: + if photo['photo_id'] == photo_id: + return photo['protection_method'] == 'geoblocking' + + return False + + def get_video_query(): + if is_geo_blocked(): + token = self._download_json( + base_url + '/api/protection/verify', + photo_id, + query={ 'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1' }, + transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'protectedtoken'))['protectedtoken']['protected_token'] + + return { 'format': 'json', 'token': token} + else: + return { 'format': 'json' } + + video_query = get_video_query() photo_data = self._download_json( - base_url + '/api/photo/list?' + query, photo_id, query={ - 'format': 'json', 'token': token - }, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo'] + base_url + '/api/photo/list?' + query, photo_id, + query = video_query, + transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo'] + title = photo_data['title'] formats = [] From 0edd71f0b1184b780685160f722b26dafc649299 Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Tue, 22 Dec 2020 16:01:09 +0100 Subject: [PATCH 5/9] Testcase added with md5 and the lot --- youtube_dl/extractor/twentythreevideo.py | 56 +++++++++++++----------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index 9035fe5d2..0e2bb21e3 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -22,67 +22,73 @@ class TwentyThreeVideoIE(InfoExtractor): 'uploader_id': '12258964', 'uploader': 'Rasmus Bysted', } - }, - { + }, { 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620', 'only_matching': True, - }, - { + }, { 'url': 'https://video.kglteater.dk/v.ihtml/player.html?source=share&photo%5fid=65098499', - 'only_matching': True, + 'md5': '4e20a33ce86b13ca114ee44a0a8d8efb', + 'info_dict': { + 'id': '65098499', + 'ext': 'mp4', + 'title': 'Askepot', + 'timestamp': 1605173942, + 'upload_date': '20201112', + 'uploader_id': '62151179', + 'uploader': 'jbny', + } }] def _real_extract(self, url): domain, query, photo_id = re.match(self._VALID_URL, url).groups() base_url = 'https://%s' % domain - print(base_url + '/api/protection/verify') - + def is_geo_blocked(): # /api/player/settings - playersettings_0 = { 'player_id': 0, 'parameters': 'showDescriptions=0&source=site&photo%5fid=' + photo_id + '&autoPlay=1', '_li': 0, '_bot': 0 } - playersettings_0_param = '/api/player/settings?' + compat_urllib_parse_urlencode(playersettings_0) - + playersettings_0 = {'player_id': 0, 'parameters': 'showDescriptions=0&source=site&photo%5fid=' + photo_id + '&autoPlay=1', '_li': 0, '_bot': 0} + playersettings_0_param = '/api/player/settings?' + compat_urllib_parse_urlencode(playersettings_0) + # /api/live/list - livelist_1 = { 'include_actions_p': 1, 'showDescriptions': 0, 'source': 'site', 'photo_id': photo_id, 'autoPlay': 1, 'upcoming_p' : 1, 'ordering': 'streaming', 'player_id': 0 } - livelist_1_param = '/api/live/list?' + compat_urllib_parse_urlencode(livelist_1) - + livelist_1 = {'include_actions_p': 1, 'showDescriptions': 0, 'source': 'site', 'photo_id': photo_id, 'autoPlay': 1, 'upcoming_p': 1, 'ordering': 'streaming', 'player_id': 0} + livelist_1_param = '/api/live/list?' + compat_urllib_parse_urlencode(livelist_1) + # /api/photo/list - photolist_2 = { 'size':10, 'include_actions_p': 1, 'showDescriptions': 0, 'source': 'site', 'photo_id': photo_id, 'autoPlay':1,'player_id':0 } + photolist_2 = {'size': 10, 'include_actions_p': 1, 'showDescriptions': 0, 'source': 'site', 'photo_id': photo_id, 'autoPlay': 1, 'player_id': 0} photolist_2_param = '/api/photo/list?' + compat_urllib_parse_urlencode(photolist_2) - - new_query = query={ 'format': 'json', 'callback': 'test', 'playersettings_0': playersettings_0_param, 'livelist_1': livelist_1_param, 'photolist_2': photolist_2_param } + + new_query = {'format': 'json', 'callback': 'test', 'playersettings_0': playersettings_0_param, 'livelist_1': livelist_1_param, 'photolist_2': photolist_2_param} photolist_result = self._download_json( base_url + '/api/concatenate', photo_id, - query = new_query, + query=new_query, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photolist_2') )["photolist_2"] - + if "photos" in photolist_result: for photo in photolist_result['photos']: if photo['photo_id'] == photo_id: return photo['protection_method'] == 'geoblocking' return False - + def get_video_query(): if is_geo_blocked(): token = self._download_json( base_url + '/api/protection/verify', photo_id, - query={ 'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1' }, + query={'protection_method': 'geoblocking', 'object_id': photo_id, 'object_type': 'photo', 'format': 'json', 'callback': 'visualplatform_1'}, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'protectedtoken'))['protectedtoken']['protected_token'] - return { 'format': 'json', 'token': token} + return {'format': 'json', 'token': token} else: - return { 'format': 'json' } + return {'format': 'json'} video_query = get_video_query() photo_data = self._download_json( - base_url + '/api/photo/list?' + query, photo_id, - query = video_query, + base_url + '/api/photo/list?' + query, photo_id, + query=video_query, transform_source=lambda s: self._search_regex(r'(?s)({.+})', s, 'photo data'))['photo'] - + title = photo_data['title'] formats = [] From 0ab278e4884a1c885096a422937f041f11640bc2 Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Tue, 22 Dec 2020 22:41:59 +0100 Subject: [PATCH 6/9] Added more checks for bonnier --- youtube_dl/extractor/twentythreevideo.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index 0e2bb21e3..e69702af7 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -24,7 +24,16 @@ class TwentyThreeVideoIE(InfoExtractor): } }, { 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620', - 'only_matching': True, + 'md5': '772a91f83d129ee5f015b12bea61a78b', + 'info_dict': { + 'id': '36137620', + 'ext': 'mp4', + 'title': 'Photoshop Elements 2019 - Photo Text', + 'timestamp': 1538664032, + 'upload_date': '20181004', + 'uploader_id': '10801356', + 'uploader': 'Kristoffer Engbo', + } }, { 'url': 'https://video.kglteater.dk/v.ihtml/player.html?source=share&photo%5fid=65098499', 'md5': '4e20a33ce86b13ca114ee44a0a8d8efb', From ee9ee5a0b725665bd5adf503f352ce39672420ed Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Tue, 22 Dec 2020 22:46:11 +0100 Subject: [PATCH 7/9] Added a test for non-geoblocked content from kglteater.dk --- youtube_dl/extractor/twentythreevideo.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index e69702af7..142f2cf98 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -33,7 +33,7 @@ class TwentyThreeVideoIE(InfoExtractor): 'upload_date': '20181004', 'uploader_id': '10801356', 'uploader': 'Kristoffer Engbo', - } + } }, { 'url': 'https://video.kglteater.dk/v.ihtml/player.html?source=share&photo%5fid=65098499', 'md5': '4e20a33ce86b13ca114ee44a0a8d8efb', @@ -46,6 +46,18 @@ class TwentyThreeVideoIE(InfoExtractor): 'uploader_id': '62151179', 'uploader': 'jbny', } + }, { + 'url': 'https://video.kglteater.dk/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=52486482&autoPlay=1', + 'md5': 'c39ffb965079fb4395788e6814ec3cdc', + 'info_dict': { + 'id': '52486482', + 'ext': 'mp4', + 'title': u'N\xf8ddekn\xe6kkeren 2019', + 'timestamp': 1558953133, + 'upload_date': '20190527', + 'uploader_id': '7450690', + 'uploader': 'Tejs Holm', + } }] def _real_extract(self, url): From 1734e55a96c10f223f261289351801402f11b18b Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Tue, 22 Dec 2020 22:49:29 +0100 Subject: [PATCH 8/9] Removed unicode --- youtube_dl/extractor/twentythreevideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index 142f2cf98..00c63b3c0 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -52,7 +52,7 @@ class TwentyThreeVideoIE(InfoExtractor): 'info_dict': { 'id': '52486482', 'ext': 'mp4', - 'title': u'N\xf8ddekn\xe6kkeren 2019', + 'title': 'Nøddeknækkeren 2019', 'timestamp': 1558953133, 'upload_date': '20190527', 'uploader_id': '7450690', From 47217e7b547c08a5e20c94aa9e31828183bdb5ca Mon Sep 17 00:00:00 2001 From: Jesenko Mehmedbasic Date: Tue, 22 Dec 2020 22:53:37 +0100 Subject: [PATCH 9/9] Unicode again --- youtube_dl/extractor/twentythreevideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index 00c63b3c0..74c590520 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -52,7 +52,7 @@ class TwentyThreeVideoIE(InfoExtractor): 'info_dict': { 'id': '52486482', 'ext': 'mp4', - 'title': 'Nøddeknækkeren 2019', + 'title': 'N\xf8ddekn\xe6kkeren 2019', 'timestamp': 1558953133, 'upload_date': '20190527', 'uploader_id': '7450690',