Compare commits

...

15 Commits

Author SHA1 Message Date
dt-rush
f19cb8efbf
Merge 00d8a07eb6 into e1b3fa242c 2024-07-25 10:40:27 -04:00
dirkf
e1b3fa242c [Youtube] Find n function name in player 3400486c
Fixes #32877
2024-07-25 00:16:00 +01:00
dirkf
451046d62a [Youtube] Make n-sig throttling diagnostic up-to-date 2024-07-24 14:33:34 +01:00
dirkf
00d8a07eb6
Update youtube_dl/extractor/bandlab.py 2024-01-23 04:02:26 +00:00
dirkf
f7c7ffbd09
Apply suggestions from code review 2024-01-23 04:00:35 +00:00
dirkf
2d7b8c95e3
Update with traverse_obj() 2024-01-23 03:56:46 +00:00
npayne
d699bafdfc Merge branch 'bandlabextractor' of github.com:dt-rush/youtube-dl into bandlabextractor 2023-01-28 18:13:38 -05:00
npayne
d6b19dca91 bandlab nits 2023-01-28 18:13:12 -05:00
dirkf
b5676df6da
Linted? 2023-01-28 19:47:16 +00:00
npayne
8dced09ed0 fix some errors in bandlab extractor - test now passes 2023-01-28 11:33:00 -05:00
npayne
c673485d4e use try_get in bandlab extractor 2023-01-28 11:23:41 -05:00
npayne
06400d1d0f merge bandlab playlist and album extractors 2023-01-28 11:17:51 -05:00
dirkf
6dd45b8d43
Add Unicode compatibility header 2022-08-29 19:53:51 +01:00
Nick Payne
0201ee1082 remove utf8 song name in test for bandlab playlist extractor 2022-08-29 12:04:58 -04:00
Nick Payne
e24c976035 [bandlabextractor] added new bandlab extractor for track, album, and playlist 2022-08-28 13:39:40 -04:00
4 changed files with 190 additions and 4 deletions

View File

@ -166,6 +166,14 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A', 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
), ),
(
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
),
(
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
),
] ]

View File

@ -0,0 +1,170 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
merge_dicts,
T,
traverse_obj,
txt_or_none,
url_or_none,
)
class BandlabIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bandlab\.com/post/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://www.bandlab.com/post/f5f04998635a44ea819cacdba7ae2076_f8d8574c3bdaec11b6562818783151a1',
'info_dict': {
'id': 'f5f04998635a44ea819cacdba7ae2076_f8d8574c3bdaec11b6562818783151a1',
'ext': 'm4a',
'title': 'ON MY OWN (unreleased)',
'artist': 'Michael MacDonald',
},
}]
def _real_extract(self, url):
track_id = self._match_id(url)
config = self._download_json(
'http://www.bandlab.com/api/v1.3/posts/%s' % track_id, track_id)
track_url = traverse_obj(config, ('track', 'sample', 'audioUrl', T(url_or_none)))
if not track_url:
raise ExtractorError(
'[%s] No video formats found!' % (self.ie_key(), ),
video_id=track_id, expected=True)
title = config['track']['name']
return {
'id': track_id,
'title': title,
'url': track_url,
'artist': traverse_obj(config, ('creator', 'name', T(txt_or_none))),
}
class BandlabAlbumOrPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bandlab\.com/[^/]+/(?P<kind>albums|collections)/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://www.bandlab.com/sbsdasani/albums/dc26e307-e51f-ed11-95d7-002248452390',
'playlist': [
{
'info_dict': {
'id': '91feeb36-8e10-4c91-ae57-ffac0a98c6b4',
'title': 'How\'d I Lose You? (Intro)',
'ext': 'm4a',
},
},
{
'info_dict': {
'id': 'd87c50a2-70cb-4937-9b97-3ae8646ca3aa',
'title': 'Money $$$',
'ext': 'm4a',
},
},
{
'info_dict': {
'id': 'ff2909ff-348f-448d-9d2c-7edbf2f0ec5e',
'title': 'You\'ll Be Mine',
'ext': 'm4a',
},
},
{
'info_dict': {
'id': '53786c38-1f3c-4921-9271-793a64af7186',
'title': 'Who You Are',
'ext': 'm4a',
},
},
{
'info_dict': {
'id': '0394bb27-113f-4d19-b902-f9c1fe6ba8a8',
'title': 'In Your Eyes',
'ext': 'm4a',
},
},
{
'info_dict': {
'id': '2aa44689-c7fa-4d0e-b28a-e0d1dd570372',
'title': 'The Same',
'ext': 'm4a',
},
},
{
'info_dict': {
'id': '281fe589-a559-4260-802d-78a6c7a973d8',
'title': 'Fall In Love',
'ext': 'm4a',
},
},
{
'info_dict': {
'id': '83a9dc0a-702f-40fd-82f6-ab847f6a7b46',
'title': 'Tried So Hard',
'ext': 'm4a',
},
},
{
'info_dict': {
'id': '3f6a4a1c-eb73-449f-bd45-f7958d6f2de1',
'title': 'The End Of Everything',
'ext': 'm4a',
},
}
],
'info_dict': {
'id': 'dc26e307-e51f-ed11-95d7-002248452390',
'album': 'ENDLESS SUMMER',
'artist': 'Michael MacDonald'
},
}, {
'url': 'https://www.bandlab.com/hexatetrahedronx/collections/8fb1041c-e865-eb11-9889-0050f28a2802',
'playlist': [
{
'info_dict': {
'id': '8f37e4aa-92c4-eb11-a7ad-0050f280467f',
'title': 'psych ward',
'ext': 'm4a'
}
}
],
'info_dict': {
'id': '8fb1041c-e865-eb11-9889-0050f28a2802',
'album': 'DOOMTAPE',
'artist': '12days'
}
}]
def _real_extract(self, url):
resource_id, kind = self._match_valid_url(url).group('id', 'kind')
config = self._download_json(
'http://www.bandlab.com/api/v1.3/%s/%s' % (kind, resource_id), resource_id)
entries = []
for track in traverse_obj(config, ('posts', Ellipsis)):
url, name = (traverse_obj(track, ('track', {
'url': ('sample', 'audioUrl', T(url_or_none)),
'name': ('name', T(txt_or_none)),
}), ('revision', {
'url': ('mixdown', 'file', T(url_or_none)),
'name': ('song', 'name', T(txt_or_none)),
})).get(x) for x in ('url', 'name'))
if not (url and name):
continue
track_id = self._search_regex(
r'/([^/]+)\.m4a$', url, 'track id', default=None)
if not track_id:
continue
entries.append({
'url': url,
'id': track_id,
'title': name,
})
return merge_dicts(
self.playlist_result(entries, playlist_id=resource_id),
traverse_obj(config, {
'album': ('name', T(txt_or_none)),
'artist': ('creator', 'name', T(txt_or_none)),
}))

View File

@ -98,6 +98,10 @@ from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
from .bandaichannel import BandaiChannelIE from .bandaichannel import BandaiChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bandlab import (
BandlabIE,
BandlabAlbumOrPlaylistIE
)
from .bbc import ( from .bbc import (
BBCCoUkIE, BBCCoUkIE,
BBCCoUkArticleIE, BBCCoUkArticleIE,

View File

@ -1647,7 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except JSInterpreter.Exception as e: except JSInterpreter.Exception as e:
self.report_warning( self.report_warning(
'%s (%s %s)' % ( '%s (%s %s)' % (
'Unable to decode n-parameter: download likely to be throttled', 'Unable to decode n-parameter: expect download to be blocked or throttled',
error_to_compat_str(e), error_to_compat_str(e),
traceback.format_exc()), traceback.format_exc()),
video_id=video_id) video_id=video_id)
@ -1659,18 +1659,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex( func_name, idx = self._search_regex(
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
# old: .get("n"))&&(b=nfunc[idx](b) # old: .get("n"))&&(b=nfunc[idx](b)
# older: .get("n"))&&(b=nfunc(b) # older: .get("n"))&&(b=nfunc(b)
r'''(?x) r'''(?x)
(?:\(\s*(?P<b>[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)? (?:\(\s*(?P<b>[a-z])\s*=\s*(?:
\.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s* String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
"n+"\[\s*\+?s*[\w$.]+\s*]
)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
\.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\) (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx: if not idx:
return func_name return func_name
return self._parse_json(self._search_regex( return self._parse_json(self._search_regex(
r'var {0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode, r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)), 'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
func_name, transform_source=js_to_json)[int(idx)] func_name, transform_source=js_to_json)[int(idx)]