Encoding suggestion from PR

Re-add Zapp
First version of a VPRO regex
2025-08-07 03:04:14 +09:00 · 2024-03-05 14:11:49 +01:00 · 2024-03-05 14:04:03 +01:00 · 2024-03-05 13:55:59 +01:00 · 2024-03-05 13:43:56 +01:00 · 2024-03-05 12:49:22 +01:00
2 changed files with 84 additions and 9 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -847,7 +847,7 @@ from .nowness import (
    NownessSeriesIE,
 )
 from .noz import NozIE
-from .npo import BNNVaraIE, NPOIE
+from .npo import BNNVaraIE, NPOIE, ONIE, VPROIE
 from .npr import NprIE
 from .nrk import (
    NRKIE,
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
-from ..utils import (
+from ..utils import ExtractorError
    ExtractorError,
 )
 class NPOIE(InfoExtractor):
@ -80,11 +80,9 @@ class NPOIE(InfoExtractor):
                        product_id = entry.get('productId')
                        title = entry.get('title')
                        synopsis = entry.get('synopsis', {})
-                        description = (
+                        description = (synopsis.get('long')
-                                synopsis.get('long')
+                                       or synopsis.get('short')
-                                or synopsis.get('short')
+                                       or synopsis.get('brief'))
                                or synopsis.get('brief')
                        )
                        thumbnails = entry.get('images')
                        for thumbnail_entry in thumbnails:
                            if 'url' in thumbnail_entry:
@ -131,6 +129,9 @@ class BNNVaraIE(NPOIE):
    IE_NAME = 'bnnvara'
    IE_DESC = 'bnnvara.nl'
    _VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*'
    _TESTS = [{
        'url': 'https://www.bnnvara.nl/videos/27455',
    }]
    def _real_extract(self, url):
        url = url.rstrip('/')
@ -161,3 +162,77 @@ class BNNVaraIE(NPOIE):
            'formats': formats,
            'thumbnail': media.get('data', {}).get('player', {}).get('image').get('url'),
        }
 class ONIE(NPOIE):
    IE_NAME = 'on'
    IE_DESC = 'ongehoordnederland.tv'
    _VALID_URL = r'https?://(?:www\.)?ongehoordnederland.tv/.*'
    _TESTS = [{
        'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
    }]
    def _real_extract(self, url):
        video_id = url.rstrip('/').split('/')[-1]
        page, _ = self._download_webpage_handle(url, video_id)
        results = re.findall("page: '(.+)'", page)
        formats = []
        for result in results:
            formats.extend(self._download_by_product_id(result, video_id))
        if not formats:
            raise ExtractorError('Could not find a POMS product id in the provided URL.')
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
        }
 class VPROIE(NPOIE):
    IE_NAME = 'vpro'
    IE_DESC = 'vpro.nl'
    _VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
    _TESTS = [{
        'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
    }]
    def _real_extract(self, url):
        video_id = url.rstrip('/').split('/')[-1]
        page, _ = self._download_webpage_handle(url, video_id)
        results = re.findall('data-media-id="(.+_.+)"\s', page)
        formats = []
        for result in results:
            formats.extend(self._download_by_product_id(result, video_id))
            break  # TODO find a better solution, VPRO pages can have multiple videos embedded
        if not formats:
            raise ExtractorError('Could not find a POMS product id in the provided URL.')
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
        }
 class ZAPPIE(NPOIE):
    IE_NAME = 'zapp'
    IE_DESC = 'zapp.nl'
    _VALID_URL = r'https?://(?:www\.)?zapp.nl/.*'
    _TESTS = [{
        'url': 'https://www.zapp.nl/programmas/zappsport/gemist/AT_300003973',
    }]
    def _real_extract(self, url):
        video_id = url.rstrip('/').split('/')[-1]
        formats = self._download_by_product_id(url, video_id)
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
        }
Author	SHA1	Message	Date
Bart Broere	d426a92a60	Encoding suggestion from PR	2024-03-05 14:11:49 +01:00
Bart Broere	d36d50fe5c	Re-add Zapp	2024-03-05 14:04:03 +01:00
Bart Broere	eb6e396bfb	First version of a VPRO regex	2024-03-05 13:55:59 +01:00
Bart Broere	28ba01f1cc	Add Ongehoord Nederland and test URL for BNNVARA	2024-03-05 13:43:56 +01:00
Bart Broere	4fc423845e	Fix lint	2024-03-05 12:49:22 +01:00