[prosiebensat1] fix playlist extraction

This commit is contained in:
Moritz Heinemann 2018-03-01 14:54:10 +01:00
parent 44dc11db61
commit 5140730235

View File

@ -374,10 +374,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
] ]
_PAGE_TYPE_REGEXES = [ _PAGE_TYPE_REGEXES = [
r'<meta name="page_type" content="([^"]+)">', r'<meta name="page_type" content="([^"]+)"/?>',
r"'itemType'\s*:\s*'([^']*)'", r"'itemType'\s*:\s*'([^']*)'",
] ]
_PLAYLIST_ID_REGEXES = [ _PLAYLIST_ID_REGEXES = [
r'<meta name="node_id" content="([0-9]+)"/?>',
r'content[iI]d=(\d+)', r'content[iI]d=(\d+)',
r"'itemId'\s*:\s*'([^']*)'", r"'itemId'\s*:\s*'([^']*)'",
] ]
@ -414,11 +415,12 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
playlist = self._parse_json( playlist = self._parse_json(
self._search_regex( self._search_regex(
r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', r'<script id="state" type="text/plain">(.+?)</script>',
webpage, 'playlist'), webpage, 'playlist'),
playlist_id) playlist_id).get('page').get('clips')
entries = [] entries = []
for item in playlist: for playlist_item in playlist:
item = self._parse_json(playlist_item.get('contentResource'), playlist_id)[0]
clip_id = item.get('id') or item.get('upc') clip_id = item.get('id') or item.get('upc')
if not clip_id: if not clip_id:
continue continue