Merge 5860937e17a249d80739efd07629dacd2392eba2 into da7223d4aa42ff9fc680b0951d043dd03cec2d30

This commit is contained in:
dirkf 2025-03-22 07:19:37 +08:00 committed by GitHub
commit 6e8837d66f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 65 additions and 0 deletions

View File

@ -1430,6 +1430,7 @@ from .dlive import (
from .umg import UMGDeIE from .umg import UMGDeIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .unity import UnityIE from .unity import UnityIE
from .uno import UNOIE
from .uol import UOLIE from .uol import UOLIE
from .uplynk import ( from .uplynk import (
UplynkIE, UplynkIE,

View File

@ -0,0 +1,64 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_timestamp,
url_or_none,
)
class UNOIE(InfoExtractor):
_VALID_URL = r'https?://media\.un\.org/(?:\w+/)+(?P<id>k\d[\w]+)'
_TESTS = [{
'url': 'https://media.un.org/en/asset/k1r/k1r3vy9ikk',
'md5': '981c41cb283227f079d1e5059fd0d30c',
'info_dict': {
'id': '1_r3vy9ikk',
'ext': 'mp4',
'title': 'md5:abde2a46d396051535e5e6fd6f627a19',
'description': 'md5:2cba11ee153ae3e6ae2c629e7c4e39b0',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 5768,
'timestamp': 1625216872,
'upload_date': '20210702',
'uploader_id': 'UNWebTV_New_York',
}
}, {
'url': 'https://media.un.org/en/asset/k12/k12gpkg3qx',
'md5': '5978503ca886a922a0f00cf5a7e82395',
'info_dict': {
'id': '1_vohfjqkj',
'ext': 'mp4',
'title': '1851st Meeting, 81st session Committee on the Elimination of Discrimination Against Women (CEDAW)',
'description': 'Informal meeting with NGOs and human rights institutions - 1851st Meeting, 81st session CEDAW',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 3502,
'timestamp': 1644235332,
'upload_date': '20220207',
'uploader_id': 'nathalie.minard@un.org',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(r'partnerId\s*:\s*(\d+)\b', webpage, 'Partner ID')
video_id = self._search_regex(r'/p/%s(?:/\w+)+?/entry_id/(\w+)/' % (partner_id, ), webpage, 'Kaltura ID')
title = (
self._html_search_meta(('title', 'og:title'), webpage)
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title').rsplit('|', 1)[0]).strip()
result = self.url_result(
'kaltura:%s:%s' % (partner_id, video_id), 'Kaltura',
video_title=title,
video_id=video_id)
if result:
result.update({
'_type': 'url_transparent',
'description': self._html_search_meta(('description', 'og:description'), webpage, 'description'),
'creator': self._html_search_meta('author', webpage),
'upoader_id': self._html_search_meta('publisher', webpage),
'thumbnail': url_or_none(self._og_search_thumbnail(webpage)),
'timestamp': unified_timestamp(self._og_search_property('updated_time', webpage)),
})
return result