From 2bbe77d4847ce37ddaf29c9668456c36509c36ed Mon Sep 17 00:00:00 2001 From: guredora Date: Sun, 21 Feb 2021 03:46:49 +0900 Subject: [PATCH] [NhkRadio] add new extractor --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/nhkRadio.py | 106 +++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 youtube_dl/extractor/nhkRadio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 62819ddcf..d30c2694a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -770,6 +770,10 @@ from .nhk import ( NhkVodIE, NhkVodProgramIE, ) +from .nhkRadio import ( + NhkRadioIE, + NhkRadioProgramIE +) from .nhl import NHLIE from .nick import ( NickIE, diff --git a/youtube_dl/extractor/nhkRadio.py b/youtube_dl/extractor/nhkRadio.py new file mode 100644 index 000000000..4912aa846 --- /dev/null +++ b/youtube_dl/extractor/nhkRadio.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ExtractorError, parse_iso8601 +from ..compat import compat_HTTPError +import re + + +class NhkRadioBase(InfoExtractor): + def _get_json_meta(self, program_id, corner_id): + program_corner_id = program_id + "_" + corner_id + try: + data = self._download_json( + "https://www.nhk.or.jp/radioondemand/json/" + + program_id + + "/bangumi_" + + program_corner_id + + ".json", + program_corner_id, + ) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + raise ExtractorError("The invalid url", expected=True) + return data + + def _extract_program(self, info, program_corner_id): + id = program_corner_id + "_" + info.get("headline_id") + file = info.get("file_list")[0] + formats = self._extract_m3u8_formats(file.get("file_name"), id, "m4a", entry_protocol = "m3u8_native") + self._sort_formats(formats) + return { + "id": id, + "title": file.get("file_title"), + "formats": formats, + "timestamp": parse_iso8601(file.get("close_time")), + } + + +class NhkRadioIE(NhkRadioBase): + _VALID_URL = r"https?://www\.nhk\.or\.jp/radio/player/ondemand\.html\?p=(?P\d+)_(?P\d+)_(?P\d+)" + + _TESTS = [ + { + "url": "https://www.nhk.or.jp/radio/player/ondemand.html?p=4812_01_2898188", + "info_dict": { + "id": "4812_01_2898188", + "ext": "m4a", + "title": "世界へ発信!ニュースで英語術 #209▽“首相長男から接待” 総務省11人を処分", + "upload_date": str, + "timestamp": int, + }, + }, + { + "url": "https://www.nhk.or.jp/radio/player/ondemand.html?p=0444_01_2890944", + "info_dict": { + "ext": "m4a", + "id": "0444_01_2890944", + "title": "歌謡スクランブル 春色コレクション(3) ▽尾崎亜美", + "upload_date": str, + "timestamp": int, + }, + }, + ] + + def _real_extract(self, url): + program_id, corner_id, headline_id = re.match(self._VALID_URL, url).groups() + program_corner_id = program_id + "_" + corner_id + data = self._download_json( + "https://www.nhk.or.jp/radioondemand/json/" + + program_id + + "/bangumi_" + + program_corner_id + + ".json", + program_corner_id, + ) + for detail in data["main"]["detail_list"]: + if headline_id == detail.get("headline_id"): + return self._extract_program(detail, program_corner_id) + raise ExtractorError("The program not found", expected=True) + + +class NhkRadioProgramIE(NhkRadioBase): + _VALID_URL = r"https?://www\.nhk\.or\.jp/radio/ondemand/detail\.html\?p=(?P\d+)_(?P\d+)" + + _TESTS = [ + { + "url": "https://www.nhk.or.jp/radio/ondemand/detail.html?p=0164_01", + "info_dict": {"title": "青春アドベンチャー", "id": "0164_01"}, + "playlist_mincount": 5, + }, + { + "url": "https://www.nhk.or.jp/radio/ondemand/detail.html?p=0455_01", + "info_dict": {"id": "0455_01", "title": "弾き語りフォーユー"}, + "playlist_mincount": 5, + }, + ] + + def _real_extract(self, url): + program_id, corner_id = re.match(self._VALID_URL, url).groups() + data = self._get_json_meta(program_id, corner_id) + entries = [] + for detail in data["main"]["detail_list"]: + entries.append(self._extract_program(detail, program_id + "_" + corner_id)) + return self.playlist_result( + entries, program_id + "_" + corner_id, data["main"]["program_name"] + )