From 2ffd1b4747fe920af881336c41389d90bd8a9cb4 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Wed, 23 Feb 2022 22:11:51 +0100 Subject: [PATCH 01/16] Rename YourPorn extractor to SxyPrn --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/{yourporn.py => sxyprn.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename youtube_dl/extractor/{yourporn.py => sxyprn.py} (98%) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 50b7cb4a0..088967eaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1603,7 +1603,7 @@ from .younow import ( YouNowMomentIE, ) from .youporn import YouPornIE -from .yourporn import YourPornIE +from .sxyprn import SxyPrnIE from .yourupload import YourUploadIE from .youtube import ( YoutubeIE, diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/sxyprn.py similarity index 98% rename from youtube_dl/extractor/yourporn.py rename to youtube_dl/extractor/sxyprn.py index 98347491e..234d74038 100644 --- a/youtube_dl/extractor/yourporn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -8,7 +8,7 @@ from ..utils import ( ) -class YourPornIE(InfoExtractor): +class SxyPrnIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P[^/?#&.]+)' _TESTS = [{ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html', From 84b91bbb74ad4f6179bc0ef1bcdf386979258c22 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Wed, 23 Feb 2022 23:34:11 +0100 Subject: [PATCH 02/16] Scrape tags --- youtube_dl/extractor/sxyprn.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 234d74038..70c6f1182 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -20,6 +20,7 @@ class SxyPrnIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 165, 'age_limit': 18, + 'tags': ['porn', 'gratis porno', 'anal', 'free porn videos', 'videos', 'movies'], }, 'params': { 'skip_download': True, @@ -56,6 +57,9 @@ class SxyPrnIE(InfoExtractor): r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', default=None)) + tags = self._search_regex(r' Date: Wed, 23 Feb 2022 23:34:11 +0100 Subject: [PATCH 03/16] Scrape tags modified: youtube_dl/extractor/sxyprn.py --- youtube_dl/extractor/sxyprn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 234d74038..39b0eb722 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -20,6 +20,7 @@ class SxyPrnIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 165, 'age_limit': 18, + 'tags': ['porn', 'gratis porno', 'anal', 'free porn videos', 'videos', 'movies'], }, 'params': { 'skip_download': True, @@ -56,6 +57,8 @@ class SxyPrnIE(InfoExtractor): r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', default=None)) + tags = self._search_regex(r' Date: Fri, 25 Feb 2022 00:15:06 +0100 Subject: [PATCH 04/16] Scrape uploader --- youtube_dl/extractor/sxyprn.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 6aa0b97bc..052702c20 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -21,6 +21,9 @@ class SxyPrnIE(InfoExtractor): 'duration': 165, 'age_limit': 18, 'tags': ['porn', 'gratis porno', 'anal', 'free porn videos', 'videos', 'movies'], + 'uploader': 'PornHot', + 'uploader_id': 'PornHot', + 'uploader_url': 'https://sxyprn.com/blog/porn-hot/0.html', }, 'params': { 'skip_download': True, @@ -56,9 +59,10 @@ class SxyPrnIE(InfoExtractor): duration = parse_duration(self._search_regex( r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', default=None)) - tags = self._search_regex(r'.+?(?P.+?)<', webpage, 'uploader', group='uploader') + uploader_url = urljoin(url, self._search_regex(r'
.+?)\'.+?(?P.+?)<', webpage, 'uploader_url', group='uploader_url')) + return { 'id': video_id, 'url': video_url, @@ -68,4 +72,7 @@ class SxyPrnIE(InfoExtractor): 'age_limit': 18, 'ext': 'mp4', 'tags': tags, + 'uploader': uploader, + 'uploader_id': uploader, + 'uploader_url': uploader_url, } From 13ed37b7cdbe9c9c9ba32687981a8112d983fa07 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Fri, 25 Feb 2022 19:07:36 +0100 Subject: [PATCH 05/16] Scrape actors --- youtube_dl/extractor/sxyprn.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 052702c20..f6b0eb16a 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -1,4 +1,5 @@ from __future__ import unicode_literals +import re from .common import InfoExtractor from ..compat import compat_str @@ -11,25 +12,26 @@ from ..utils import ( class SxyPrnIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P[^/?#&.]+)' _TESTS = [{ - 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html', + 'url': 'https://sxyprn.com/post/6217e4ce4c36e.html', 'md5': '6f8682b6464033d87acaa7a8ff0c092e', 'info_dict': { - 'id': '57ffcb2e1179b', + 'id': '6217e4ce4c36e', 'ext': 'mp4', - 'title': 'md5:c9f43630bd968267672651ba905a7d35', + 'title': 'md5:04e5427c36d2e9e229588059dac45a62', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 165, + 'duration': 3813, 'age_limit': 18, - 'tags': ['porn', 'gratis porno', 'anal', 'free porn videos', 'videos', 'movies'], - 'uploader': 'PornHot', - 'uploader_id': 'PornHot', - 'uploader_url': 'https://sxyprn.com/blog/porn-hot/0.html', + 'tags': ['Nicole Love', 'Cindy Shine', 'Anal', 'DoublePenetration', 'GangBang', 'BigTits', 'BigAss', 'Blowjob'], + 'uploader': 'SmokeCrumb', + 'uploader_id': 'SmokeCrumb', + 'uploader_url': 'https://sxyprn.com/blog/608a6b540ee7b/0.html', + 'actors': [{'given_name': 'Nicole Love'}, {'given_name': 'Cindy Shine'}], }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html', + 'url': 'https://sxyprn.com/post/6217e4ce4c36e.html', 'only_matching': True, }] @@ -62,6 +64,12 @@ class SxyPrnIE(InfoExtractor): tags = self._search_regex(r'.+?(?P.+?)<', webpage, 'uploader', group='uploader') uploader_url = urljoin(url, self._search_regex(r'
.+?)\'.+?(?P.+?)<', webpage, 'uploader_url', group='uploader_url')) + actors_names = re.findall(r'·(?P.+?)', webpage) + actors=[] + for name in actors_names: + actors.append({ + 'given_name': name + }) return { 'id': video_id, @@ -75,4 +83,5 @@ class SxyPrnIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader, 'uploader_url': uploader_url, + 'actors': actors } From 3e92da141db25e80ab7e1651bebbdaab23442b14 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Fri, 25 Feb 2022 19:31:28 +0100 Subject: [PATCH 06/16] Scrape views --- youtube_dl/extractor/sxyprn.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index f6b0eb16a..7628ea6d5 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -26,6 +26,7 @@ class SxyPrnIE(InfoExtractor): 'uploader_id': 'SmokeCrumb', 'uploader_url': 'https://sxyprn.com/blog/608a6b540ee7b/0.html', 'actors': [{'given_name': 'Nicole Love'}, {'given_name': 'Cindy Shine'}], + 'views': 0, }, 'params': { 'skip_download': True, @@ -70,6 +71,8 @@ class SxyPrnIE(InfoExtractor): actors.append({ 'given_name': name }) + views = int(self._search_regex(r'
.+? (?P.+) views
', webpage, 'views', group='views')) + self._TESTS[0]['info_dict']['views'] = views return { 'id': video_id, @@ -83,5 +86,6 @@ class SxyPrnIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader, 'uploader_url': uploader_url, - 'actors': actors + 'actors': actors, + 'views': views, } From 92fa1c553ec2c3329821f7a077fb982971be2d49 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Fri, 25 Feb 2022 20:16:25 +0100 Subject: [PATCH 07/16] Fixed test --- youtube_dl/extractor/sxyprn.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 7628ea6d5..9dba2c211 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -26,7 +26,7 @@ class SxyPrnIE(InfoExtractor): 'uploader_id': 'SmokeCrumb', 'uploader_url': 'https://sxyprn.com/blog/608a6b540ee7b/0.html', 'actors': [{'given_name': 'Nicole Love'}, {'given_name': 'Cindy Shine'}], - 'views': 0, + 'views': int, }, 'params': { 'skip_download': True, @@ -72,7 +72,6 @@ class SxyPrnIE(InfoExtractor): 'given_name': name }) views = int(self._search_regex(r'
.+? (?P.+) views
', webpage, 'views', group='views')) - self._TESTS[0]['info_dict']['views'] = views return { 'id': video_id, From 64f9e5f6b1b24ad169613ab668fecf7ebc23d57a Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Fri, 25 Feb 2022 20:26:02 +0100 Subject: [PATCH 08/16] Format code according to coding conventions --- youtube_dl/extractor/sxyprn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 9dba2c211..d6a698061 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -66,7 +66,7 @@ class SxyPrnIE(InfoExtractor): uploader = self._search_regex(r'
.+?(?P.+?)<', webpage, 'uploader', group='uploader') uploader_url = urljoin(url, self._search_regex(r'
.+?)\'.+?(?P.+?)<', webpage, 'uploader_url', group='uploader_url')) actors_names = re.findall(r'·(?P.+?)', webpage) - actors=[] + actors = [] for name in actors_names: actors.append({ 'given_name': name From 055351fddbc0aae22c977e6e89dc4bb70b8fa231 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 13:28:16 +0100 Subject: [PATCH 09/16] Scrape actors urls --- youtube_dl/extractor/sxyprn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index d6a698061..9c30e6293 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -65,11 +65,12 @@ class SxyPrnIE(InfoExtractor): tags = self._search_regex(r'.+?(?P.+?)<', webpage, 'uploader', group='uploader') uploader_url = urljoin(url, self._search_regex(r'
.+?)\'.+?(?P.+?)<', webpage, 'uploader_url', group='uploader_url')) - actors_names = re.findall(r'·(?P.+?)', webpage) + actors_data = re.findall(r'.+?)\' class=\'tdn htag_rel_a\'>
·(?P.+?)', webpage) actors = [] - for name in actors_names: + for actor_tuple in actors_data: actors.append({ - 'given_name': name + 'given_name': actor_tuple[1], + 'url': urljoin(url, actor_tuple[0]) }) views = int(self._search_regex(r'
.+? (?P.+) views
', webpage, 'views', group='views')) From c8c3585ae2b0384590289231ac599ce6f495fd04 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 13:28:16 +0100 Subject: [PATCH 10/16] Scrape actors urls modified: youtube_dl/extractor/sxyprn.py --- youtube_dl/extractor/sxyprn.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index d6a698061..5112f9e4d 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -25,7 +25,7 @@ class SxyPrnIE(InfoExtractor): 'uploader': 'SmokeCrumb', 'uploader_id': 'SmokeCrumb', 'uploader_url': 'https://sxyprn.com/blog/608a6b540ee7b/0.html', - 'actors': [{'given_name': 'Nicole Love'}, {'given_name': 'Cindy Shine'}], + 'actors': [{'given_name': 'Nicole Love', 'url': 'https://sxyprn.com/Nicole-Love.html'}, {'given_name': 'Cindy Shine', 'url': 'https://sxyprn.com/Cindy-Shine.html'}], 'views': int, }, 'params': { @@ -65,11 +65,12 @@ class SxyPrnIE(InfoExtractor): tags = self._search_regex(r'.+?(?P.+?)<', webpage, 'uploader', group='uploader') uploader_url = urljoin(url, self._search_regex(r'
.+?)\'.+?(?P.+?)<', webpage, 'uploader_url', group='uploader_url')) - actors_names = re.findall(r'·(?P.+?)', webpage) + actors_data = re.findall(r'.+?)\' class=\'tdn htag_rel_a\'>
·(?P.+?)', webpage) actors = [] - for name in actors_names: + for actor_tuple in actors_data: actors.append({ - 'given_name': name + 'given_name': actor_tuple[1], + 'url': urljoin(url, actor_tuple[0]) }) views = int(self._search_regex(r'
.+? (?P.+) views
', webpage, 'views', group='views')) From ec184937c4018c958feeaea9f78113f05ac55753 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:28:55 +0100 Subject: [PATCH 11/16] Rename views to view_count --- youtube_dl/extractor/sxyprn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 5112f9e4d..1fd4ea90c 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -26,7 +26,7 @@ class SxyPrnIE(InfoExtractor): 'uploader_id': 'SmokeCrumb', 'uploader_url': 'https://sxyprn.com/blog/608a6b540ee7b/0.html', 'actors': [{'given_name': 'Nicole Love', 'url': 'https://sxyprn.com/Nicole-Love.html'}, {'given_name': 'Cindy Shine', 'url': 'https://sxyprn.com/Cindy-Shine.html'}], - 'views': int, + 'view_count': int, }, 'params': { 'skip_download': True, @@ -87,5 +87,5 @@ class SxyPrnIE(InfoExtractor): 'uploader_id': uploader, 'uploader_url': uploader_url, 'actors': actors, - 'views': views, + 'view_count': views, } From 042b348adab764e90f6f650fae0e2663e5ae68ce Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:40:17 +0100 Subject: [PATCH 12/16] Check whether actors_data has been found --- youtube_dl/extractor/sxyprn.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 1fd4ea90c..ef2bcdd12 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -67,11 +67,12 @@ class SxyPrnIE(InfoExtractor): uploader_url = urljoin(url, self._search_regex(r'
.+?)\'.+?(?P.+?)<', webpage, 'uploader_url', group='uploader_url')) actors_data = re.findall(r'.+?)\' class=\'tdn htag_rel_a\'>
·(?P.+?)', webpage) actors = [] - for actor_tuple in actors_data: - actors.append({ - 'given_name': actor_tuple[1], - 'url': urljoin(url, actor_tuple[0]) - }) + if actors_data is not None: + for actor_tuple in actors_data: + actors.append({ + 'given_name': actor_tuple[1], + 'url': urljoin(url, actor_tuple[0]) + }) views = int(self._search_regex(r'
.+? (?P.+) views
', webpage, 'views', group='views')) return { From e51db9573d4e188d8a2b5cef3665552be6f63a94 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:42:07 +0100 Subject: [PATCH 13/16] Add default fallback values --- youtube_dl/extractor/sxyprn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index ef2bcdd12..791d7e9cc 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -62,8 +62,8 @@ class SxyPrnIE(InfoExtractor): duration = parse_duration(self._search_regex( r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', default=None)) - tags = self._search_regex(r'.+?(?P.+?)<', webpage, 'uploader', group='uploader') + tags = self._search_regex(r'.+?(?P.+?)<', webpage, 'uploader', group='uploader', default=None) uploader_url = urljoin(url, self._search_regex(r'
.+?)\'.+?(?P.+?)<', webpage, 'uploader_url', group='uploader_url')) actors_data = re.findall(r'.+?)\' class=\'tdn htag_rel_a\'>
·(?P.+?)', webpage) actors = [] From ff65034e9aaed45b420fd144573910246d5a9554 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 15:42:07 +0100 Subject: [PATCH 14/16] Add default fallback values modified: youtube_dl/extractor/sxyprn.py --- youtube_dl/extractor/sxyprn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index ef2bcdd12..e7556eb9d 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -62,8 +62,8 @@ class SxyPrnIE(InfoExtractor): duration = parse_duration(self._search_regex( r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration', default=None)) - tags = self._search_regex(r'.+?(?P.+?)<', webpage, 'uploader', group='uploader') + tags = self._search_regex(r'.+?(?P.+?)<', webpage, 'uploader', group='uploader', default=None) uploader_url = urljoin(url, self._search_regex(r'
.+?)\'.+?(?P.+?)<', webpage, 'uploader_url', group='uploader_url')) actors_data = re.findall(r'.+?)\' class=\'tdn htag_rel_a\'>
·(?P.+?)', webpage) actors = [] @@ -73,7 +73,7 @@ class SxyPrnIE(InfoExtractor): 'given_name': actor_tuple[1], 'url': urljoin(url, actor_tuple[0]) }) - views = int(self._search_regex(r'
.+? (?P.+) views
', webpage, 'views', group='views')) + views = int(self._search_regex(r'
.+? (?P.+) views
', webpage, 'views', group='views', default=0)) return { 'id': video_id, From b87f00dd85c80755123fcc54fe13cecdadc04858 Mon Sep 17 00:00:00 2001 From: DarkFighterLuke Date: Sat, 26 Feb 2022 16:51:31 +0100 Subject: [PATCH 15/16] Fix checks on empty data --- youtube_dl/extractor/sxyprn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sxyprn.py b/youtube_dl/extractor/sxyprn.py index 0a888674c..b4df9f83d 100644 --- a/youtube_dl/extractor/sxyprn.py +++ b/youtube_dl/extractor/sxyprn.py @@ -67,7 +67,7 @@ class SxyPrnIE(InfoExtractor): uploader_url = urljoin(url, self._search_regex(r'