Compare commits

...

3 Commits

Author SHA1 Message Date
Bart Broere
4c90b2f587
Adhere to code style
Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-03-14 13:39:59 +01:00
Bart Broere
bc86c5f73b
Make regex more specific and remove redundant .* 2024-03-14 13:37:41 +01:00
Bart Broere
ad64f3751e
Improve regex
Co-authored-by: Roy <git@rvsit.nl>
2024-03-14 13:34:33 +01:00

View File

@ -11,7 +11,7 @@ from ..utils import ExtractorError
class NPOIE(InfoExtractor):
IE_NAME = 'npo'
IE_DESC = 'npo.nl'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/.*'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/start/serie/'
_TESTS = [{
'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
@ -55,8 +55,7 @@ class NPOIE(InfoExtractor):
slug = url.split('/')[-1]
program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
slug,
query={'slug': slug})
slug, query={'slug': slug})
product_id = program_metadata.get('productId')
images = program_metadata.get('images')
thumbnail = None
@ -307,7 +306,7 @@ class VPROIE(NPOIE):
def _real_extract(self, url):
video_id = url.rstrip('/').split('/')[-1]
page, _ = self._download_webpage_handle(url, video_id)
results = re.findall(r'data-media-id="(.+_.+)"\s', page)
results = re.findall(r'data-media-id="([a-zA-Z0-9_]+)"\s', page)
formats = []
for result in results:
formats.extend(self._extract_formats_by_product_id(result, video_id))