Redesigned link extraction process

This commit is contained in:
Thomas Christlieb 2017-02-05 21:13:51 +01:00
parent ee32d5ae0f
commit 63cf1124fa

View File

@ -19,6 +19,7 @@ from ..utils import (
ExtractorError, ExtractorError,
ohdave_rsa_encrypt, ohdave_rsa_encrypt,
remove_start, remove_start,
extract_attributes,
) )
@ -301,10 +302,14 @@ class IqiyiIE(InfoExtractor):
def _extract_playlist(self, webpage): def _extract_playlist(self, webpage):
PAGE_SIZE = 50 PAGE_SIZE = 50
links = re.findall( links = []
r'<a[^>]+href="(http://www\.iqiyi\.com/.+\.html)"[^>]+class="site-piclist_pic_link".*>', for link in re.findall(r'<a[^>]+class="[^"]*site-piclist_pic_link[^"]*"[^>]*>', webpage):
webpage) attribs = extract_attributes(link)
if not links: # It must be a valid url, and links on the playlist page have NO title-Attribute in them
# (links to other videos on the video page have, so beware of that!)
if attribs['href'].startswith('http') and 'title' not in attribs:
links.append(attribs['href'])
if len(links) == 0:
return return
album_id = self._search_regex( album_id = self._search_regex(