Compare commits

...

12 Commits

Author SHA1 Message Date
dirkf 9d17948b5a
[myvideoge] Add new extractor (#31360)
NB download tests on CI servers blocked 

Co-authored-by: Alfonso Solbes <fonk666@gmail.com>
2023-02-02 23:25:44 +00:00
afterdelight f316f5d4e3
[xhamster] add support for new domain xhvid.com (#31370) 2023-02-02 23:20:14 +00:00
dirkf bc6f94e459
[FIFA] Back-port extractor from yt-dlp (#31385) 2023-02-02 23:19:03 +00:00
Epsilonator be3392a0d4
[Blerp] Add new extractor (#31398)
Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-02 17:33:09 +00:00
zhangeric-15 6d829d8119
[YouTube] Fix not finding videos listed under a channel's "shorts" subpage. (#31409)
Resolves #31336

Co-authored-by: Jouni Järvinen <rautamiekka@users.noreply.github.com>
Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-02 17:26:31 +00:00
Ruowang Sun 98b0cf1cd0
[Callin] Add new extractor (#31414)
Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-02 17:21:05 +00:00
Leon Etienne e9611a2a36
[pr0gramm] implement InfoExtractor, Resolves #31433 (#31434)
* [pr0gramm] implement infoextractor

* [pr0gramm] remove misplaced comment, uncapture regex-group

* [pr0gramm]: specify utf-8 coding

* [pr0gramm]: add trailing comma to lists for maintainability

* [pr0gramm]: ie only sets upload_date attribute

* [pr0gramm]: add video_id to title

* [pr0gramm]: more forgiving _valid_url regex

* [pr0gramm]: add uploader to title, if set

* Discriminate URL pattern

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-02 17:13:39 +00:00
JChris246 807e593a32
[cammodels] fix and improve extractor (#31453)
Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-02 17:12:36 +00:00
Rodrigo Dias 297fbff23b
[doc] Fixed typo appearing to promise an example (#31489)
Resolves #31425 

Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-02 17:10:09 +00:00
Brian Marks 37cbdfa0e7
[americastestkitchen] Add support for downloading entire series (#31493)
Also
* support new sites and URL patterns
* back-port from yt-dlp

Co-authored-by: dirkf <fieldhouse@gmx.net>
2023-02-02 16:58:21 +00:00
dirkf 295736c9cb [jsinterp] Improve parsing
* support subset `... else if ...`
* support `while`
* add `RegExp` class
* generalise `new` support
* limited more debug strings
* matching test changes
2023-02-02 16:31:49 +00:00
pukkandan 14ef89a8da Support `if` statements
Fix for yt-dlp/yt_dlp#6131
Closes #31509
2023-02-02 13:12:46 +00:00
14 changed files with 883 additions and 109 deletions

View File

@ -632,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use
The current default template is `%(title)s-%(id)s.%(ext)s`. The current default template is `%(title)s-%(id)s.%(ext)s`.
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
#### Output template and Windows batch files #### Output template and Windows batch files

View File

@ -11,8 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import math import math
import re import re
from youtube_dl.compat import compat_re_Pattern
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
@ -140,15 +138,23 @@ class TestJSInterpreter(unittest.TestCase):
''') ''')
self.assertTrue(math.isnan(jsi.call_function('x'))) self.assertTrue(math.isnan(jsi.call_function('x')))
def test_Date(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; } function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
''') ''')
self.assertEqual(jsi.call_function('x'), 86000) self.assertEqual(jsi.call_function('x'), 86000)
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x(dt) { return new Date(dt) - 0; } function x(dt) { return new Date(dt) - 0; }
''') ''')
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000) self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
# date format m/d/y
jsi = JSInterpreter('''
function x() { return new Date('12/31/1969 18:01:26 MDT') - 0; }
''')
self.assertEqual(jsi.call_function('x'), 86000)
def test_call(self): def test_call(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x() { return 2; } function x() { return 2; }
@ -158,6 +164,57 @@ class TestJSInterpreter(unittest.TestCase):
self.assertEqual(jsi.call_function('z'), 5) self.assertEqual(jsi.call_function('z'), 5)
self.assertEqual(jsi.call_function('y'), 2) self.assertEqual(jsi.call_function('y'), 2)
def test_if(self):
jsi = JSInterpreter('''
function x() {
let a = 9;
if (0==0) {a++}
return a
}''')
self.assertEqual(jsi.call_function('x'), 10)
jsi = JSInterpreter('''
function x() {
if (0==0) {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
jsi = JSInterpreter('''
function x() {
if (0!=0) {return 1}
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
""" # Unsupported
jsi = JSInterpreter('''
function x() {
if (0!=0) return 1;
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
"""
def test_elseif(self):
jsi = JSInterpreter('''
function x() {
if (0!=0) {return 1}
else if (1==0) {return 2}
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
""" # Unsupported
jsi = JSInterpreter('''
function x() {
if (0!=0) return 1;
else if (1==0) {return 2}
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
# etc
"""
def test_for_loop(self): def test_for_loop(self):
# function x() { a=0; for (i=0; i-10; i++) {a++} a } # function x() { a=0; for (i=0; i-10; i++) {a++} a }
jsi = JSInterpreter(''' jsi = JSInterpreter('''
@ -165,6 +222,13 @@ class TestJSInterpreter(unittest.TestCase):
''') ''')
self.assertEqual(jsi.call_function('x'), 10) self.assertEqual(jsi.call_function('x'), 10)
def test_while_loop(self):
# function x() { a=0; while (a<10) {a++} a }
jsi = JSInterpreter('''
function x() { a=0; while (a<10) {a++} return a }
''')
self.assertEqual(jsi.call_function('x'), 10)
def test_switch(self): def test_switch(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x(f) { switch(f){ function x(f) { switch(f){
@ -383,13 +447,28 @@ class TestJSInterpreter(unittest.TestCase):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/; return a; } function x() { let a=/,,[/,913,/](,)}/; return a; }
''') ''')
self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern) attrs = set(('findall', 'finditer', 'flags', 'groupindex',
'groups', 'match', 'pattern', 'scanner',
'search', 'split', 'sub', 'subn'))
self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/i; return a; } function x() { let a=/,,[/,913,/](,)}/i; return a; }
''') ''')
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I) self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
jsi = JSInterpreter(r'''
function x() { let a=[/[)\\]/]; return a[0]; }
''')
self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
""" # fails
jsi = JSInterpreter(r'''
function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
''')
self.assertEqual(jsi.call_function('x'), 5)
"""
def test_char_code_at(self): def test_char_code_at(self):
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}') jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
self.assertEqual(jsi.call_function('x', 0), 116) self.assertEqual(jsi.call_function('x', 0), 116)

View File

@ -135,6 +135,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ', 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
), ),
(
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
),
] ]

View File

@ -15,7 +15,7 @@ from ..utils import (
class AmericasTestKitchenIE(InfoExtractor): class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers', 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f', 'md5': 'b861c3e365ac38ad319cfd509c30577f',
@ -23,15 +23,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5b400b9ee338f922cb06450c', 'id': '5b400b9ee338f922cb06450c',
'title': 'Japanese Suppers', 'title': 'Japanese Suppers',
'ext': 'mp4', 'ext': 'mp4',
'display_id': 'weeknight-japanese-suppers',
'description': 'md5:64e606bfee910627efc4b5f050de92b3', 'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://', 'timestamp': 1523304000,
'timestamp': 1523318400, 'upload_date': '20180409',
'upload_date': '20180410', 'release_date': '20180409',
'release_date': '20180410',
'series': "America's Test Kitchen", 'series': "America's Test Kitchen",
'season': 'Season 18',
'season_number': 18, 'season_number': 18,
'episode': 'Japanese Suppers', 'episode': 'Japanese Suppers',
'episode_number': 15, 'episode_number': 15,
'duration': 1376,
'thumbnail': r're:^https?://',
'average_rating': 0,
'view_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -44,15 +49,20 @@ class AmericasTestKitchenIE(InfoExtractor):
'id': '5fbe8c61bda2010001c6763b', 'id': '5fbe8c61bda2010001c6763b',
'title': 'Simple Chicken Dinner', 'title': 'Simple Chicken Dinner',
'ext': 'mp4', 'ext': 'mp4',
'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7', 'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
'thumbnail': r're:^https?://', 'timestamp': 1610737200,
'timestamp': 1610755200, 'upload_date': '20210115',
'upload_date': '20210116', 'release_date': '20210115',
'release_date': '20210116',
'series': "America's Test Kitchen", 'series': "America's Test Kitchen",
'season': 'Season 21',
'season_number': 21, 'season_number': 21,
'episode': 'Simple Chicken Dinner', 'episode': 'Simple Chicken Dinner',
'episode_number': 3, 'episode_number': 3,
'duration': 1397,
'thumbnail': r're:^https?://',
'view_count': int,
'average_rating': 0,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -60,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
}, { }, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
'only_matching': True,
}, {
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
'only_matching': True,
}, { }, {
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do', 'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
'only_matching': True, 'only_matching': True,
@ -94,7 +110,7 @@ class AmericasTestKitchenIE(InfoExtractor):
class AmericasTestKitchenSeasonIE(InfoExtractor): class AmericasTestKitchenSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
_TESTS = [{ _TESTS = [{
# ATK Season # ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1', 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
@ -105,48 +121,93 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'playlist_count': 13, 'playlist_count': 13,
}, { }, {
# Cooks Country Season # Cooks Country Season
'url': 'https://www.cookscountry.com/episodes/browse/season_12', 'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
'info_dict': { 'info_dict': {
'id': 'season_12', 'id': 'season_12',
'title': 'Season 12', 'title': 'Season 12',
}, },
'playlist_count': 13, 'playlist_count': 13,
}, {
# America's Test Kitchen Series
'url': 'https://www.americastestkitchen.com/',
'info_dict': {
'id': 'americastestkitchen',
'title': 'America\'s Test Kitchen',
},
'playlist_count': 558,
}, {
# Cooks Country Series
'url': 'https://www.americastestkitchen.com/cookscountry',
'info_dict': {
'id': 'cookscountry',
'title': 'Cook\'s Country',
},
'playlist_count': 199,
}, {
'url': 'https://www.americastestkitchen.com/cookscountry/',
'only_matching': True,
}, {
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'only_matching': True,
}, {
'url': 'https://www.cookscountry.com',
'only_matching': True,
}, {
'url': 'https://www.americastestkitchen.com/cooksillustrated/',
'only_matching': True,
}, {
'url': 'https://www.cooksillustrated.com',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_name, season_number = re.match(self._VALID_URL, url).groups() match = re.match(self._VALID_URL, url).groupdict()
season_number = int(season_number) show = match.get('show2')
show_path = ('/' + show) if show else ''
show = show or match['show']
season_number = int_or_none(match.get('season'))
slug = 'atk' if show_name == 'americastestkitchen' else 'cco' slug, title = {
'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
'cookscountry': ('cco', 'Cook\'s Country'),
'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
}[show]
season = 'Season %d' % season_number facet_filters = [
'search_document_klass:episode',
'search_show_slug:' + slug,
]
if season_number:
playlist_id = 'season_%d' % season_number
playlist_title = 'Season %d' % season_number
facet_filters.append('search_season_list:' + playlist_title)
else:
playlist_id = show
playlist_title = title
season_search = self._download_json( season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={ playlist_id, headers={
'Origin': 'https://www.%s.com' % show_name, 'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30', 'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={ }, query={
'facetFilters': json.dumps([ 'facetFilters': json.dumps(facet_filters),
'search_season_list:' + season, 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
'search_document_klass:episode',
'search_show_slug:' + slug,
]),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
'attributesToHighlight': '', 'attributesToHighlight': '',
'hitsPerPage': 1000, 'hitsPerPage': 1000,
}) })
def entries(): def entries():
for episode in (season_search.get('hits') or []): for episode in (season_search.get('hits') or []):
search_url = episode.get('search_url') search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
if not search_url: if not search_url:
continue continue
yield { yield {
'_type': 'url', '_type': 'url',
'url': 'https://www.%s.com%s' % (show_name, search_url), 'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]), 'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
'title': episode.get('title'), 'title': episode.get('title'),
'description': episode.get('description'), 'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')), 'timestamp': unified_timestamp(episode.get('search_document_date')),
@ -156,4 +217,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
} }
return self.playlist_result( return self.playlist_result(
entries(), 'season_%d' % season_number, season) entries(), playlist_id, playlist_title)

View File

@ -0,0 +1,173 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from ..utils import (
strip_or_none,
traverse_obj,
)
from .common import InfoExtractor
class BlerpIE(InfoExtractor):
IE_NAME = 'blerp'
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
'info_dict': {
'id': '6320fe8745636cb4dd677a5a',
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
'uploader': 'luminousaj',
'uploader_id': '5fb81e51aa66ae000c395478',
'ext': 'mp3',
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
}
}, {
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
'info_dict': {
'id': '5bc94ef4796001000498429f',
'title': 'Yee',
'uploader': '179617322678353920',
'uploader_id': '5ba99cf71386730004552c42',
'ext': 'mp3',
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
}
}]
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
_GRAPHQL_QUERY = (
'''query webBitePageGetBite($_id: MongoID!) {
web {
biteById(_id: $_id) {
...bitePageFrag
__typename
}
__typename
}
}
fragment bitePageFrag on Bite {
_id
title
userKeywords
keywords
color
visibility
isPremium
owned
price
extraReview
isAudioExists
image {
filename
original {
url
__typename
}
__typename
}
userReactions {
_id
reactions
createdAt
__typename
}
topReactions
totalSaveCount
saved
blerpLibraryType
license
licenseMetaData
playCount
totalShareCount
totalFavoriteCount
totalAddedToBoardCount
userCategory
userAudioQuality
audioCreationState
transcription
userTranscription
description
createdAt
updatedAt
author
listingType
ownerObject {
_id
username
profileImage {
filename
original {
url
__typename
}
__typename
}
__typename
}
transcription
favorited
visibility
isCurated
sourceUrl
audienceRating
strictAudienceRating
ownerId
reportObject {
reportedContentStatus
__typename
}
giphy {
mp4
gif
__typename
}
audio {
filename
original {
url
__typename
}
mp3 {
url
__typename
}
__typename
}
__typename
}
''')
def _real_extract(self, url):
audio_id = self._match_id(url)
data = {
'operationName': self._GRAPHQL_OPERATIONNAME,
'query': self._GRAPHQL_QUERY,
'variables': {
'_id': audio_id
}
}
headers = {
'Content-Type': 'application/json'
}
json_result = self._download_json('https://api.blerp.com/graphql',
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
bite_json = json_result['data']['web']['biteById']
info_dict = {
'id': bite_json['_id'],
'url': bite_json['audio']['mp3']['url'],
'title': bite_json['title'],
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
'ext': 'mp3',
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
}
return info_dict

View File

@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
traverse_obj,
try_get,
)
class CallinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
_TESTS = [{
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
'md5': '14ede27ee2c957b7e4db93140fc0745c',
'info_dict': {
'id': 'PrumRdSQJW',
'ext': 'mp4',
'title': 'FCC Commissioner Brendan Carr on Elons Starlink',
'description': 'Or, why the government doesnt like SpaceX',
'channel': 'The Pull Request',
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
}
}, {
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
'md5': '16f704ddbf82a27e3930533b12062f07',
'info_dict': {
'id': 'lzxMidUnjA',
'ext': 'mp4',
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
'description': 'Lets talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
'channel': 'The DEBRIEF With Briahna Joy Gray',
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
}
}]
def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
return self._parse_json(
self._search_regex(
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
webpage, 'next.js data', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
next_data = self._search_nextjs_data(webpage, video_id)
episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
if not episode:
raise ExtractorError('Failed to find episode data')
title = episode.get('title') or self._og_search_title(webpage)
description = episode.get('description') or self._og_search_description(webpage)
formats = []
formats.extend(self._extract_m3u8_formats(
episode.get('m3u8'), video_id, 'mp4',
entry_protocol='m3u8_native', fatal=False))
self._sort_formats(formats)
channel = try_get(episode, lambda x: x['show']['title'], compat_str)
channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'channel': channel,
'channel_url': channel_url,
}

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
url_or_none, url_or_none,
) )
@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
user_id = self._match_id(url) user_id = self._match_id(url)
webpage = self._download_webpage(
url, user_id, headers=self.geo_verification_headers())
manifest_root = self._html_search_regex(
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
if not manifest_root:
ERRORS = (
("I'm offline, but let's stay connected", 'This user is currently offline'),
('in a private show', 'This user is in a private show'),
('is currently performing LIVE', 'This model is currently performing live'),
)
for pattern, message in ERRORS:
if pattern in webpage:
error = message
expected = True
break
else:
error = 'Unable to find manifest URL root'
expected = False
raise ExtractorError(error, expected=expected)
manifest = self._download_json( manifest = self._download_json(
'%s%s.json' % (manifest_root, user_id), user_id) 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
formats = [] formats = []
thumbnails = []
for format_id, format_dict in manifest['formats'].items(): for format_id, format_dict in manifest['formats'].items():
if not isinstance(format_dict, dict): if not isinstance(format_dict, dict):
continue continue
@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
'preference': -1, 'preference': -1,
}) })
else: else:
if format_id == 'jpeg':
thumbnails.append({
'url': f['url'],
'width': f['width'],
'height': f['height'],
'format_id': f['format_id'],
})
continue continue
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
return { return {
'id': user_id, 'id': user_id,
'title': self._live_title(user_id), 'title': self._live_title(user_id),
'thumbnails': thumbnails,
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
'age_limit': 18 'age_limit': 18

View File

@ -138,6 +138,7 @@ from .bleacherreport import (
BleacherReportIE, BleacherReportIE,
BleacherReportCMSIE, BleacherReportCMSIE,
) )
from .blerp import BlerpIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE
@ -158,6 +159,7 @@ from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE
from .c56 import C56IE from .c56 import C56IE
from .callin import CallinIE
from .camdemy import ( from .camdemy import (
CamdemyIE, CamdemyIE,
CamdemyFolderIE CamdemyFolderIE
@ -374,6 +376,7 @@ from .fc2 import (
FC2EmbedIE, FC2EmbedIE,
) )
from .fczenit import FczenitIE from .fczenit import FczenitIE
from .fifa import FifaIE
from .filmon import ( from .filmon import (
FilmOnIE, FilmOnIE,
FilmOnChannelIE, FilmOnChannelIE,
@ -725,6 +728,7 @@ from .myvi import (
MyviIE, MyviIE,
MyviEmbedIE, MyviEmbedIE,
) )
from .myvideoge import MyVideoGeIE
from .myvidster import MyVidsterIE from .myvidster import MyVidsterIE
from .nationalgeographic import ( from .nationalgeographic import (
NationalGeographicVideoIE, NationalGeographicVideoIE,
@ -1667,3 +1671,7 @@ from .zingmp3 import (
) )
from .zoom import ZoomIE from .zoom import ZoomIE
from .zype import ZypeIE from .zype import ZypeIE
from .pr0gramm import (
Pr0grammIE,
Pr0grammStaticIE,
)

View File

@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
traverse_obj,
unified_timestamp,
)
if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
BaseInfoExtractor = InfoExtractor
import re
class InfoExtractor(BaseInfoExtractor):
@classmethod
def _match_valid_url(cls, url):
return re.match(cls._VALID_URL, url)
class FifaIE(InfoExtractor):
_VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
'info_dict': {
'id': '7on10qPcnyLajDDU3ntg6y',
'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
'ext': 'mp4',
'categories': ['FIFA Tournaments'],
'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
'duration': 8165,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
'info_dict': {
'id': '1cg5r5Qt6Qt12ilkDgb1sV',
'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
'description': 'md5:d908c74ee66322b804ae2e521b02a855',
'ext': 'mp4',
'categories': ['FIFA Tournaments', 'Highlights'],
'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
'duration': 902,
'release_timestamp': 1404777600,
'release_date': '20140708',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
'info_dict': {
'id': '3C6gQH9C2DLwzNx7BMRQdp',
'title': 'Josimar goal against Northern Ireland | Classic Goals',
'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
'ext': 'mp4',
'categories': ['FIFA Tournaments', 'Goal'],
'duration': 28,
'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id, locale = self._match_valid_url(url).group('id', 'locale')
webpage = self._download_webpage(url, video_id)
preconnect_link = self._search_regex(
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
video_details = self._download_json(
'{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
preplay_parameters = self._download_json(
'{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
content_data = self._download_json(
# 1. query string is expected to be sent as-is
# 2. `sig` must be appended
# 3. if absent, the call appears to work but the manifest is bad (404)
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
video_id, 'Downloading Content Data')
# formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
self._sort_formats(formats)
return {
'id': video_id,
'title': video_details['title'],
'description': video_details.get('description'),
'duration': int_or_none(video_details.get('duration')),
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -0,0 +1,87 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_id,
get_element_by_class,
int_or_none,
js_to_json,
MONTH_NAMES,
qualities,
unified_strdate,
)
class MyVideoGeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.myvideo.ge/v/3941048',
'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
'info_dict': {
'id': '3941048',
'ext': 'mp4',
'title': 'The best prikol',
'upload_date': '20200611',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'chixa33',
'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
},
# working from local dev system
'skip': 'site blocks CI servers',
}
_MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
_quality = staticmethod(qualities(('SD', 'HD')))
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = (
self._og_search_title(webpage, default=None)
or clean_html(get_element_by_class('my_video_title', webpage))
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
jwplayer_sources = self._parse_json(
self._search_regex(
r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
or '',
video_id, transform_source=js_to_json, fatal=False)
formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
for f in formats or []:
f['preference'] = self._quality(f['format_id'])
self._sort_formats(formats)
description = (
self._og_search_description(webpage)
or get_element_by_id('long_desc_holder', webpage)
or self._html_search_meta('description', webpage))
uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
upload_date = get_element_by_class('mv_vid_upl_date', webpage)
# as ka locale may not be present roll a local date conversion
upload_date = (unified_strdate(
# translate any ka month to an en one
re.sub('|'.join(self._MONTH_NAMES_KA),
lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
upload_date, re.I))
if upload_date else None)
return {
'id': video_id,
'title': title,
'description': description,
'uploader': uploader,
'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': upload_date,
'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
}

View File

@ -0,0 +1,105 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
from ..utils import (
merge_dicts,
)
class Pr0grammStaticIE(InfoExtractor):
# Possible urls:
# https://pr0gramm.com/static/5466437
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://pr0gramm.com/static/5466437',
'md5': '52fa540d70d3edc286846f8ca85938aa',
'info_dict': {
'id': '5466437',
'ext': 'mp4',
'title': 'pr0gramm-5466437 by g11st',
'uploader': 'g11st',
'upload_date': '20221221',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# Fetch media sources
entries = self._parse_html5_media_entries(url, webpage, video_id)
media_info = entries[0]
# this raises if there are no formats
self._sort_formats(media_info.get('formats') or [])
# Fetch author
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
# Fetch approx upload timestamp from filename
# Have None-defaults in case the extraction fails
uploadDay = None
uploadMon = None
uploadYear = None
uploadTimestr = None
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
if (m):
# Up to a day of accuracy should suffice...
uploadDay = m.groupdict().get('day')
uploadMon = m.groupdict().get('mon')
uploadYear = m.groupdict().get('year')
uploadTimestr = uploadYear + uploadMon + uploadDay
return merge_dicts({
'id': video_id,
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
'uploader': uploader,
'upload_date': uploadTimestr
}, media_info)
# This extractor is for the primary url (used for sharing, and appears in the
# location bar) Since this page loads the DOM via JS, yt-dl can't find any
# video information here. So let's redirect to a compatibility version of
# the site, which does contain the <video>-element by itself, without requiring
# js to be ran.
class Pr0grammIE(InfoExtractor):
# Possible urls:
# https://pr0gramm.com/new/546637
# https://pr0gramm.com/new/video/546637
# https://pr0gramm.com/top/546637
# https://pr0gramm.com/top/video/546637
# https://pr0gramm.com/user/g11st/uploads/5466437
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
# https://pr0gramm.com/user/froschler/1elf/5232030
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
# https://pr0gramm.com/top/fruher war alles damals/5498175
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
_TEST = {
'url': 'https://pr0gramm.com/new/video/5466437',
'info_dict': {
'id': '5466437',
'ext': 'mp4',
'title': 'pr0gramm-5466437 by g11st',
'uploader': 'g11st',
'upload_date': '20221221',
}
}
def _generic_title():
return "oof"
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'https://pr0gramm.com/static/' + video_id,
video_id=video_id,
ie=Pr0grammStaticIE.ie_key())

View File

@ -24,7 +24,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor): class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)' _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:.+?\.)?%s/ (?:.+?\.)?%s/
@ -123,6 +123,9 @@ class XHamsterIE(InfoExtractor):
}, { }, {
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf', 'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -433,6 +436,9 @@ class XHamsterUserIE(InfoExtractor):
}, { }, {
'url': 'https://xhday.com/users/mobhunter', 'url': 'https://xhday.com/users/mobhunter',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://xhvid.com/users/pelushe21',
'only_matching': True,
}] }]
def _entries(self, user_id): def _entries(self, user_id):

View File

@ -315,7 +315,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
title = try_get( title = try_get(
renderer, renderer,
(lambda x: x['title']['runs'][0]['text'], (lambda x: x['title']['runs'][0]['text'],
lambda x: x['title']['simpleText']), compat_str) lambda x: x['title']['simpleText'],
lambda x: x['headline']['simpleText']), compat_str)
description = try_get( description = try_get(
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'], renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
compat_str) compat_str)
@ -2207,6 +2208,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
IE_NAME = 'youtube:tab' IE_NAME = 'youtube:tab'
_TESTS = [{ _TESTS = [{
# Shorts
'url': 'https://www.youtube.com/@SuperCooperShorts/shorts',
'playlist_mincount': 5,
'info_dict': {
'description': 'Short clips from Super Cooper Sundays!',
'id': 'UCKMA8kHZ8bPYpnMNaUSxfEQ',
'title': 'Super Cooper Shorts - Shorts',
}
}, {
# Channel that does not have a Shorts tab. Test should just download videos on Home tab instead
'url': 'https://www.youtube.com/@emergencyawesome/shorts',
'info_dict': {
'description': 'md5:592c080c06fef4de3c902c4a8eecd850',
'id': 'UCDiFRMQWpcp8_KD4vwIVicw',
'title': 'Emergency Awesome - Home',
},
'playlist_mincount': 5,
}, {
# playlists, multipage # playlists, multipage
'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid', 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
'playlist_mincount': 94, 'playlist_mincount': 94,
@ -2680,7 +2699,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
def _rich_grid_entries(self, contents): def _rich_grid_entries(self, contents):
for content in contents: for content in contents:
video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict) video_renderer = try_get(
content,
(lambda x: x['richItemRenderer']['content']['videoRenderer'],
lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
dict)
if video_renderer: if video_renderer:
entry = self._video_entry(video_renderer) entry = self._video_entry(video_renderer)
if entry: if entry:

View File

@ -187,19 +187,6 @@ class LocalNameSpace(ChainMap):
class JSInterpreter(object): class JSInterpreter(object):
__named_object_counter = 0 __named_object_counter = 0
_RE_FLAGS = {
# special knowledge: Python's re flags are bitmask values, current max 128
# invent new bitmask values well above that for literal parsing
# TODO: new pattern class to execute matches with these flags
'd': 1024, # Generate indices for substring matches
'g': 2048, # Global search
'i': re.I, # Case-insensitive search
'm': re.M, # Multi-line search
's': re.S, # Allows . to match newline characters
'u': re.U, # Treat a pattern as a sequence of unicode code points
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
}
_OBJ_NAME = '__youtube_dl_jsinterp_obj' _OBJ_NAME = '__youtube_dl_jsinterp_obj'
OP_CHARS = None OP_CHARS = None
@ -214,12 +201,51 @@ class JSInterpreter(object):
def __init__(self, msg, *args, **kwargs): def __init__(self, msg, *args, **kwargs):
expr = kwargs.pop('expr', None) expr = kwargs.pop('expr', None)
if expr is not None: if expr is not None:
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr) msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs) super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
class JS_RegExp(object):
_RE_FLAGS = {
# special knowledge: Python's re flags are bitmask values, current max 128
# invent new bitmask values well above that for literal parsing
# TODO: new pattern class to execute matches with these flags
'd': 1024, # Generate indices for substring matches
'g': 2048, # Global search
'i': re.I, # Case-insensitive search
'm': re.M, # Multi-line search
's': re.S, # Allows . to match newline characters
'u': re.U, # Treat a pattern as a sequence of unicode code points
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
}
def __init__(self, pattern_txt, flags=''):
if isinstance(flags, compat_str):
flags, _ = self.regex_flags(flags)
# Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
# First, avoid https://github.com/python/cpython/issues/74534
self.__self = re.compile(pattern_txt.replace('[[', r'[\['), flags)
for name in dir(self.__self):
# Only these? Obviously __class__, __init__.
# PyPy creates a __weakref__ attribute with value None
# that can't be setattr'd but also can't need to be copied.
if name in ('__class__', '__init__', '__weakref__'):
continue
setattr(self, name, getattr(self.__self, name))
@classmethod
def regex_flags(cls, expr):
flags = 0
if not expr:
return flags, expr
for idx, ch in enumerate(expr):
if ch not in cls._RE_FLAGS:
break
flags |= cls._RE_FLAGS[ch]
return flags, expr[idx + 1:]
@classmethod @classmethod
def __op_chars(cls): def __op_chars(cls):
op_chars = set(';,') op_chars = set(';,[')
for op in cls._all_operators(): for op in cls._all_operators():
for c in op[0]: for c in op[0]:
op_chars.add(c) op_chars.add(c)
@ -231,17 +257,6 @@ class JSInterpreter(object):
namespace[name] = obj namespace[name] = obj
return name return name
@classmethod
def _regex_flags(cls, expr):
flags = 0
if not expr:
return flags, expr
for idx, ch in enumerate(expr):
if ch not in cls._RE_FLAGS:
break
flags |= cls._RE_FLAGS[ch]
return flags, expr[idx + 1:]
@classmethod @classmethod
def _separate(cls, expr, delim=',', max_split=None, skip_delims=None): def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
if not expr: if not expr:
@ -268,7 +283,7 @@ class JSInterpreter(object):
elif in_quote == '/' and char in '[]': elif in_quote == '/' and char in '[]':
in_regex_char_group = char == '[' in_regex_char_group = char == '['
escaping = not escaping and in_quote and char == '\\' escaping = not escaping and in_quote and char == '\\'
after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op)) after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
if char != delim[pos] or any(counters.values()) or in_quote: if char != delim[pos] or any(counters.values()) or in_quote:
pos = skipping = 0 pos = skipping = 0
@ -301,7 +316,7 @@ class JSInterpreter(object):
separated = list(cls._separate(expr, delim, 1)) separated = list(cls._separate(expr, delim, 1))
if len(separated) < 2: if len(separated) < 2:
raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals())) raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals()))
return separated[0][1:].strip(), separated[1].strip() return separated[0][1:].strip(), separated[1].strip()
@staticmethod @staticmethod
@ -328,7 +343,7 @@ class JSInterpreter(object):
try: try:
return opfunc(left_val, right_val) return opfunc(left_val, right_val)
except Exception as e: except Exception as e:
raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e) raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
def _index(self, obj, idx, allow_undefined=False): def _index(self, obj, idx, allow_undefined=False):
if idx == 'length': if idx == 'length':
@ -338,7 +353,7 @@ class JSInterpreter(object):
except Exception as e: except Exception as e:
if allow_undefined: if allow_undefined:
return JS_Undefined return JS_Undefined
raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e) raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace): def _dump(self, obj, namespace):
try: try:
@ -352,6 +367,7 @@ class JSInterpreter(object):
allow_recursion -= 1 allow_recursion -= 1
should_return = False should_return = False
# fails on (eg) if (...) stmt1; else stmt2;
sub_statements = list(self._separate(stmt, ';')) or [''] sub_statements = list(self._separate(stmt, ';')) or ['']
expr = stmt = sub_statements.pop().strip() expr = stmt = sub_statements.pop().strip()
for sub_stmt in sub_statements: for sub_stmt in sub_statements:
@ -371,25 +387,30 @@ class JSInterpreter(object):
if expr[0] in _QUOTES: if expr[0] in _QUOTES:
inner, outer = self._separate(expr, expr[0], 1) inner, outer = self._separate(expr, expr[0], 1)
if expr[0] == '/': if expr[0] == '/':
flags, outer = self._regex_flags(outer) flags, outer = self.JS_RegExp.regex_flags(outer)
inner = re.compile(inner[1:], flags=flags) # , strict=True)) inner = self.JS_RegExp(inner[1:], flags=flags)
else: else:
inner = json.loads(js_to_json(inner + expr[0])) # , strict=True)) inner = json.loads(js_to_json(inner + expr[0])) # , strict=True))
if not outer: if not outer:
return inner, should_return return inner, should_return
expr = self._named_object(local_vars, inner) + outer expr = self._named_object(local_vars, inner) + outer
if expr.startswith('new '): new_kw, _, obj = expr.partition('new ')
obj = expr[4:] if not new_kw:
if obj.startswith('Date('): for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
left, right = self._separate_at_paren(obj[4:]) ('RegExp', self.JS_RegExp),
expr = unified_timestamp( ('Error', self.Exception)):
self.interpret_expression(left, local_vars, allow_recursion), False) if not obj.startswith(klass + '('):
continue
left, right = self._separate_at_paren(obj[len(klass):])
argvals = self.interpret_iter(left, local_vars, allow_recursion)
expr = konstr(*argvals)
if not expr: if not expr:
raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr) raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr)
expr = self._dump(int(expr * 1000), local_vars) + right expr = self._dump(expr, local_vars) + right
break
else: else:
raise self.Exception('Unsupported object {obj}'.format(**locals()), expr=expr) raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
if expr.startswith('void '): if expr.startswith('void '):
left = self.interpret_expression(expr[5:], local_vars, allow_recursion) left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
@ -428,11 +449,47 @@ class JSInterpreter(object):
m = re.match(r'''(?x) m = re.match(r'''(?x)
(?P<try>try)\s*\{| (?P<try>try)\s*\{|
(?P<if>if)\s*\(|
(?P<switch>switch)\s*\(| (?P<switch>switch)\s*\(|
(?P<for>for)\s*\( (?P<for>for)\s*\(|
(?P<while>while)\s*\(
''', expr) ''', expr)
md = m.groupdict() if m else {} md = m.groupdict() if m else {}
if md.get('try'): if md.get('if'):
cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
if expr.startswith('{'):
if_expr, expr = self._separate_at_paren(expr)
else:
# may lose ... else ... because of ll.368-374
if_expr, expr = self._separate_at_paren(expr, delim=';')
else_expr = None
m = re.match(r'else\s*(?P<block>\{)?', expr)
if m:
if m.group('block'):
else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
else:
# handle subset ... else if (...) {...} else ...
# TODO: make interpret_statement do this properly, if possible
exprs = list(self._separate(expr[m.end():], delim='}', max_split=2))
if len(exprs) > 1:
if re.match(r'\s*if\s*\(', exprs[0]) and re.match(r'\s*else\b', exprs[1]):
else_expr = exprs[0] + '}' + exprs[1]
expr = (exprs[2] + '}') if len(exprs) == 3 else None
else:
else_expr = exprs[0]
exprs.append('')
expr = '}'.join(exprs[1:])
else:
else_expr = exprs[0]
expr = None
else_expr = else_expr.lstrip() + '}'
cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
ret, should_abort = self.interpret_statement(
if_expr if cndn else else_expr, local_vars, allow_recursion)
if should_abort:
return ret, True
elif md.get('try'):
try_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
err = None err = None
try: try:
@ -469,8 +526,8 @@ class JSInterpreter(object):
if err: if err:
raise err raise err
elif md.get('for'): elif md.get('for') or md.get('while'):
constructor, remaining = self._separate_at_paren(expr[m.end() - 1:]) init_or_cond, remaining = self._separate_at_paren(expr[m.end() - 1:])
if remaining.startswith('{'): if remaining.startswith('{'):
body, expr = self._separate_at_paren(remaining) body, expr = self._separate_at_paren(remaining)
else: else:
@ -481,11 +538,12 @@ class JSInterpreter(object):
body = 'switch(%s){%s}' % (switch_val, body) body = 'switch(%s){%s}' % (switch_val, body)
else: else:
body, expr = remaining, '' body, expr = remaining, ''
start, cndn, increment = self._separate(constructor, ';') if md.get('for'):
self.interpret_expression(start, local_vars, allow_recursion) start, cndn, increment = self._separate(init_or_cond, ';')
while True: self.interpret_expression(start, local_vars, allow_recursion)
if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)): else:
break cndn, increment = init_or_cond, None
while _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
try: try:
ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion) ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
if should_abort: if should_abort:
@ -494,7 +552,8 @@ class JSInterpreter(object):
break break
except JS_Continue: except JS_Continue:
pass pass
self.interpret_expression(increment, local_vars, allow_recursion) if increment:
self.interpret_expression(increment, local_vars, allow_recursion)
elif md.get('switch'): elif md.get('switch'):
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:]) switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
@ -749,6 +808,10 @@ class JSInterpreter(object):
if idx >= len(obj): if idx >= len(obj):
return None return None
return ord(obj[idx]) return ord(obj[idx])
elif member == 'replace':
assertion(isinstance(obj, compat_str), 'must be applied on a string')
assertion(len(argvals) == 2, 'takes exactly two arguments')
return re.sub(argvals[0], argvals[1], obj)
idx = int(member) if isinstance(obj, list) else member idx = int(member) if isinstance(obj, list) else member
return obj[idx](argvals, allow_recursion=allow_recursion) return obj[idx](argvals, allow_recursion=allow_recursion)
@ -780,6 +843,10 @@ class JSInterpreter(object):
raise self.Exception('Cannot return from an expression', expr) raise self.Exception('Cannot return from an expression', expr)
return ret return ret
def interpret_iter(self, list_txt, local_vars, allow_recursion):
for v in self._separate(list_txt):
yield self.interpret_expression(v, local_vars, allow_recursion)
def extract_object(self, objname): def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {} obj = {}