First version of a VPRO regex

This commit is contained in:
Bart Broere 2024-03-05 13:55:59 +01:00
parent 28ba01f1cc
commit eb6e396bfb

View File

@ -4,9 +4,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import ExtractorError
ExtractorError,
)
class NPOIE(InfoExtractor): class NPOIE(InfoExtractor):
@ -189,3 +187,29 @@ class ONIE(NPOIE):
'title': video_id, 'title': video_id,
'formats': formats, 'formats': formats,
} }
class VPROIE(NPOIE):
IE_NAME = 'vpro'
IE_DESC = 'vpro.nl'
_VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
_TESTS = [{
'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
}]
def _real_extract(self, url):
video_id = url.rstrip('/').split('/')[-1]
page, _ = self._download_webpage_handle(url, video_id)
results = re.findall('data-media-id="(.+_.+)"\s', page)
formats = []
for result in results:
formats.extend(self._download_by_product_id(result, video_id))
if not formats:
raise ExtractorError('Could not find a POMS product id in the provided URL.')
return {
'id': video_id,
'title': video_id,
'formats': formats,
}