[NhkRadio] add new extractor

This commit is contained in:
guredora 2021-02-21 03:46:49 +09:00
parent cf2dbec630
commit 2bbe77d484
2 changed files with 110 additions and 0 deletions

View File

@ -770,6 +770,10 @@ from .nhk import (
NhkVodIE, NhkVodIE,
NhkVodProgramIE, NhkVodProgramIE,
) )
from .nhkRadio import (
NhkRadioIE,
NhkRadioProgramIE
)
from .nhl import NHLIE from .nhl import NHLIE
from .nick import ( from .nick import (
NickIE, NickIE,

View File

@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError, parse_iso8601
from ..compat import compat_HTTPError
import re
class NhkRadioBase(InfoExtractor):
def _get_json_meta(self, program_id, corner_id):
program_corner_id = program_id + "_" + corner_id
try:
data = self._download_json(
"https://www.nhk.or.jp/radioondemand/json/"
+ program_id
+ "/bangumi_"
+ program_corner_id
+ ".json",
program_corner_id,
)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
raise ExtractorError("The invalid url", expected=True)
return data
def _extract_program(self, info, program_corner_id):
id = program_corner_id + "_" + info.get("headline_id")
file = info.get("file_list")[0]
formats = self._extract_m3u8_formats(file.get("file_name"), id, "m4a", entry_protocol = "m3u8_native")
self._sort_formats(formats)
return {
"id": id,
"title": file.get("file_title"),
"formats": formats,
"timestamp": parse_iso8601(file.get("close_time")),
}
class NhkRadioIE(NhkRadioBase):
_VALID_URL = r"https?://www\.nhk\.or\.jp/radio/player/ondemand\.html\?p=(?P<program_id>\d+)_(?P<corner_id>\d+)_(?P<headline_id>\d+)"
_TESTS = [
{
"url": "https://www.nhk.or.jp/radio/player/ondemand.html?p=4812_01_2898188",
"info_dict": {
"id": "4812_01_2898188",
"ext": "m4a",
"title": "世界へ発信!ニュースで英語術 #209▽“首相長男から接待” 総務省11人を処分",
"upload_date": str,
"timestamp": int,
},
},
{
"url": "https://www.nhk.or.jp/radio/player/ondemand.html?p=0444_01_2890944",
"info_dict": {
"ext": "m4a",
"id": "0444_01_2890944",
"title": "歌謡スクランブル 春色コレクション(3) ▽尾崎亜美",
"upload_date": str,
"timestamp": int,
},
},
]
def _real_extract(self, url):
program_id, corner_id, headline_id = re.match(self._VALID_URL, url).groups()
program_corner_id = program_id + "_" + corner_id
data = self._download_json(
"https://www.nhk.or.jp/radioondemand/json/"
+ program_id
+ "/bangumi_"
+ program_corner_id
+ ".json",
program_corner_id,
)
for detail in data["main"]["detail_list"]:
if headline_id == detail.get("headline_id"):
return self._extract_program(detail, program_corner_id)
raise ExtractorError("The program not found", expected=True)
class NhkRadioProgramIE(NhkRadioBase):
_VALID_URL = r"https?://www\.nhk\.or\.jp/radio/ondemand/detail\.html\?p=(?P<program_id>\d+)_(?P<corner_id>\d+)"
_TESTS = [
{
"url": "https://www.nhk.or.jp/radio/ondemand/detail.html?p=0164_01",
"info_dict": {"title": "青春アドベンチャー", "id": "0164_01"},
"playlist_mincount": 5,
},
{
"url": "https://www.nhk.or.jp/radio/ondemand/detail.html?p=0455_01",
"info_dict": {"id": "0455_01", "title": "弾き語りフォーユー"},
"playlist_mincount": 5,
},
]
def _real_extract(self, url):
program_id, corner_id = re.match(self._VALID_URL, url).groups()
data = self._get_json_meta(program_id, corner_id)
entries = []
for detail in data["main"]["detail_list"]:
entries.append(self._extract_program(detail, program_id + "_" + corner_id))
return self.playlist_result(
entries, program_id + "_" + corner_id, data["main"]["program_name"]
)