mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-08-07 19:24:15 +09:00
Compare commits
No commits in common. "0cbcd1aec656998d44dbffe59cbb0adac4b84b45" and "d426a92a60ba9b6eb01256d3dcad4dcbfecd742c" have entirely different histories.
0cbcd1aec6
...
d426a92a60
@ -847,16 +847,7 @@ from .nowness import (
|
|||||||
NownessSeriesIE,
|
NownessSeriesIE,
|
||||||
)
|
)
|
||||||
from .noz import NozIE
|
from .noz import NozIE
|
||||||
from .npo import (
|
from .npo import BNNVaraIE, NPOIE, ONIE, VPROIE
|
||||||
AndereTijdenIE,
|
|
||||||
BNNVaraIE,
|
|
||||||
NPOIE,
|
|
||||||
ONIE,
|
|
||||||
SchoolTVIE,
|
|
||||||
HetKlokhuisIE,
|
|
||||||
VPROIE,
|
|
||||||
WNLIE,
|
|
||||||
)
|
|
||||||
from .npr import NprIE
|
from .npr import NprIE
|
||||||
from .nrk import (
|
from .nrk import (
|
||||||
NRKIE,
|
NRKIE,
|
||||||
|
@ -11,11 +11,32 @@ from ..utils import ExtractorError
|
|||||||
class NPOIE(InfoExtractor):
|
class NPOIE(InfoExtractor):
|
||||||
IE_NAME = 'npo'
|
IE_NAME = 'npo'
|
||||||
IE_DESC = 'npo.nl'
|
IE_DESC = 'npo.nl'
|
||||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/.*'
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
npo\.nl/(?:[^/]+/)*
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(?P<id>[^/?#]+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
|
'url': 'https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/',
|
||||||
# TODO fill in other test attributes
|
# TODO fill in other test attributes
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
|
||||||
|
'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VARA_101191800',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'De Mega Mike & Mega Thomas show: The best of.',
|
||||||
|
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
|
||||||
|
'upload_date': '20090227',
|
||||||
|
'duration': 2400,
|
||||||
|
},
|
||||||
|
'skip': 'Video gone',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
|
'url': 'https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika',
|
||||||
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
||||||
@ -45,21 +66,27 @@ class NPOIE(InfoExtractor):
|
|||||||
url = url[:-10]
|
url = url[:-10]
|
||||||
url = url.rstrip('/')
|
url = url.rstrip('/')
|
||||||
slug = url.split('/')[-1]
|
slug = url.split('/')[-1]
|
||||||
|
page = self._download_webpage(url, slug, 'Finding productId using slug: %s' % slug)
|
||||||
program_metadata = self._download_json('https://npo.nl/start/api/domain/program-detail',
|
# TODO find out what proper HTML parsing utilities are available in youtube-dl
|
||||||
slug,
|
next_data = page.split('<script id="__NEXT_DATA__" type="application/json">')[1].split('</script>')[0]
|
||||||
query={'slug': slug})
|
# TODO The data in this script tag feels like GraphQL, so there might be an easier way
|
||||||
product_id = program_metadata.get('productId')
|
# to get the product id, maybe using a GraphQL endpoint
|
||||||
images = program_metadata.get('images')
|
next_data = self._parse_json(next_data, slug)
|
||||||
thumbnail = None
|
product_id, title, description, thumbnail = None, None, None, None
|
||||||
for image in images:
|
for query in next_data['props']['pageProps']['dehydratedState']['queries']:
|
||||||
thumbnail = image.get('url')
|
if isinstance(query['state']['data'], list):
|
||||||
break
|
for entry in query['state']['data']:
|
||||||
title = program_metadata.get('title')
|
if entry['slug'] == slug:
|
||||||
descriptions = program_metadata.get('description', {})
|
product_id = entry.get('productId')
|
||||||
description = descriptions.get('long') or descriptions.get('short') or descriptions.get('brief')
|
title = entry.get('title')
|
||||||
duration = program_metadata.get('durationInSeconds')
|
synopsis = entry.get('synopsis', {})
|
||||||
|
description = (synopsis.get('long')
|
||||||
|
or synopsis.get('short')
|
||||||
|
or synopsis.get('brief'))
|
||||||
|
thumbnails = entry.get('images')
|
||||||
|
for thumbnail_entry in thumbnails:
|
||||||
|
if 'url' in thumbnail_entry:
|
||||||
|
thumbnail = thumbnail_entry.get('url')
|
||||||
if not product_id:
|
if not product_id:
|
||||||
raise ExtractorError('No productId found for slug: %s' % slug)
|
raise ExtractorError('No productId found for slug: %s' % slug)
|
||||||
|
|
||||||
@ -69,9 +96,9 @@ class NPOIE(InfoExtractor):
|
|||||||
'id': slug,
|
'id': slug,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': title or slug,
|
'title': title or slug,
|
||||||
'description': description or title or slug,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
# TODO fill in other metadata that's available
|
||||||
}
|
}
|
||||||
|
|
||||||
def _download_by_product_id(self, product_id, slug, url=None):
|
def _download_by_product_id(self, product_id, slug, url=None):
|
||||||
@ -79,8 +106,7 @@ class NPOIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for profile in (
|
for profile in (
|
||||||
'dash',
|
'dash',
|
||||||
# 'hls' is available too, but implementing it doesn't add much
|
# 'hls', # TODO test what needs to change for 'hls' support
|
||||||
# As far as I know 'dash' is always available
|
|
||||||
):
|
):
|
||||||
stream_link = self._download_json(
|
stream_link = self._download_json(
|
||||||
'https://prod.npoplayer.nl/stream-link', video_id=slug,
|
'https://prod.npoplayer.nl/stream-link', video_id=slug,
|
||||||
@ -105,7 +131,6 @@ class BNNVaraIE(NPOIE):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*'
|
_VALID_URL = r'https?://(?:www\.)?bnnvara\.nl/videos/[0-9]*'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bnnvara.nl/videos/27455',
|
'url': 'https://www.bnnvara.nl/videos/27455',
|
||||||
# TODO fill in other test attributes
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -145,7 +170,6 @@ class ONIE(NPOIE):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?ongehoordnederland.tv/.*'
|
_VALID_URL = r'https?://(?:www\.)?ongehoordnederland.tv/.*'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
|
'url': 'https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/',
|
||||||
# TODO fill in other test attributes
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -157,8 +181,34 @@ class ONIE(NPOIE):
|
|||||||
formats.extend(self._download_by_product_id(result, video_id))
|
formats.extend(self._download_by_product_id(result, video_id))
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('Could not find a POMS product id in the provided URL, '
|
raise ExtractorError('Could not find a POMS product id in the provided URL.')
|
||||||
'perhaps because all stream URLs are DRM protected.')
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VPROIE(NPOIE):
|
||||||
|
IE_NAME = 'vpro'
|
||||||
|
IE_DESC = 'vpro.nl'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = url.rstrip('/').split('/')[-1]
|
||||||
|
page, _ = self._download_webpage_handle(url, video_id)
|
||||||
|
results = re.findall('data-media-id="(.+_.+)"\s', page)
|
||||||
|
formats = []
|
||||||
|
for result in results:
|
||||||
|
formats.extend(self._download_by_product_id(result, video_id))
|
||||||
|
break # TODO find a better solution, VPRO pages can have multiple videos embedded
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('Could not find a POMS product id in the provided URL.')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -174,7 +224,6 @@ class ZAPPIE(NPOIE):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.zapp.nl/programmas/zappsport/gemist/AT_300003973',
|
'url': 'https://www.zapp.nl/programmas/zappsport/gemist/AT_300003973',
|
||||||
# TODO fill in other test attributes
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -187,93 +236,3 @@ class ZAPPIE(NPOIE):
|
|||||||
'title': video_id,
|
'title': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SchoolTVIE(NPOIE):
|
|
||||||
IE_NAME = 'schooltv'
|
|
||||||
IE_DESC = 'schooltv.nl'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?schooltv.nl/item/.*'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://schooltv.nl/item/zapp-music-challenge-2015-zapp-music-challenge-2015',
|
|
||||||
# TODO fill in other test attributes
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = url.rstrip('/').split('/')[-1]
|
|
||||||
|
|
||||||
# TODO Find out how we could obtain this automatically
|
|
||||||
# Otherwise this extractor might break each time SchoolTV deploys a new release
|
|
||||||
build_id = 'b7eHUzAVO7wHXCopYxQhV'
|
|
||||||
|
|
||||||
metadata_url = 'https://schooltv.nl/_next/data/' \
|
|
||||||
+ build_id \
|
|
||||||
+ '/item/' \
|
|
||||||
+ video_id + '.json'
|
|
||||||
|
|
||||||
metadata = self._download_json(metadata_url,
|
|
||||||
video_id).get('pageProps', {}).get('data', {})
|
|
||||||
|
|
||||||
formats = self._download_by_product_id(metadata.get('poms_mid'), video_id)
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
raise ExtractorError('Could not find a POMS product id in the provided URL, '
|
|
||||||
'perhaps because all stream URLs are DRM protected.')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': metadata.get('title', '') + ' - ' + metadata.get('subtitle', ''),
|
|
||||||
'description': metadata.get('description') or metadata.get('short_description'),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class HetKlokhuisIE(NPOIE):
|
|
||||||
...
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
...
|
|
||||||
|
|
||||||
|
|
||||||
class VPROIE(NPOIE):
|
|
||||||
IE_NAME = 'vpro'
|
|
||||||
IE_DESC = 'vpro.nl'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?vpro.nl/.*'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html',
|
|
||||||
# TODO fill in other test attributes
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = url.rstrip('/').split('/')[-1]
|
|
||||||
page, _ = self._download_webpage_handle(url, video_id)
|
|
||||||
results = re.findall(r'data-media-id="(.+_.+)"\s', page)
|
|
||||||
formats = []
|
|
||||||
for result in results:
|
|
||||||
formats.extend(self._download_by_product_id(result, video_id))
|
|
||||||
break # TODO find a better solution, VPRO pages can have multiple videos embedded
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
raise ExtractorError('Could not find a POMS product id in the provided URL, '
|
|
||||||
'perhaps because all stream URLs are DRM protected.')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class WNLIE(NPOIE):
|
|
||||||
...
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
...
|
|
||||||
|
|
||||||
|
|
||||||
class AndereTijdenIE(NPOIE):
|
|
||||||
...
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
...
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user