mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-04-14 19:14:51 +09:00
Scrape actors urls
This commit is contained in:
parent
64f9e5f6b1
commit
055351fddb
@ -65,11 +65,12 @@ class SxyPrnIE(InfoExtractor):
|
|||||||
tags = self._search_regex(r'<meta name="keywords".+content="(?P<tags>.+)"', webpage, 'tags', group='tags').split(', ')
|
tags = self._search_regex(r'<meta name="keywords".+content="(?P<tags>.+)"', webpage, 'tags', group='tags').split(', ')
|
||||||
uploader = self._search_regex(r'<div class=\'pes_author_div pes_edit_div transition\'.+?>.+?<span class=\'a_name\'>(?P<uploader>.+?)<', webpage, 'uploader', group='uploader')
|
uploader = self._search_regex(r'<div class=\'pes_author_div pes_edit_div transition\'.+?>.+?<span class=\'a_name\'>(?P<uploader>.+?)<', webpage, 'uploader', group='uploader')
|
||||||
uploader_url = urljoin(url, self._search_regex(r'<div class=\'pes_author_div pes_edit_div transition\'.+?><a href=\'(?P<uploader_url>.+?)\'.+?<span class=\'a_name\'>(?P<uploader>.+?)<', webpage, 'uploader_url', group='uploader_url'))
|
uploader_url = urljoin(url, self._search_regex(r'<div class=\'pes_author_div pes_edit_div transition\'.+?><a href=\'(?P<uploader_url>.+?)\'.+?<span class=\'a_name\'>(?P<uploader>.+?)<', webpage, 'uploader_url', group='uploader_url'))
|
||||||
actors_names = re.findall(r'<span>·</span><b>(?P<actor>.+?)</b>', webpage)
|
actors_data = re.findall(r'<a href=\'(?P<actor_url>.+?)\' class=\'tdn htag_rel_a\'><div class=\'htag_rel\'><span>·</span><b>(?P<actor_name>.+?)</b>', webpage)
|
||||||
actors = []
|
actors = []
|
||||||
for name in actors_names:
|
for actor_tuple in actors_data:
|
||||||
actors.append({
|
actors.append({
|
||||||
'given_name': name
|
'given_name': actor_tuple[1],
|
||||||
|
'url': urljoin(url, actor_tuple[0])
|
||||||
})
|
})
|
||||||
views = int(self._search_regex(r'<div class=\'post_control_time\'>.+?</strong> (?P<views>.+) views</div>', webpage, 'views', group='views'))
|
views = int(self._search_regex(r'<div class=\'post_control_time\'>.+?</strong> (?P<views>.+) views</div>', webpage, 'views', group='views'))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user