mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-09-28 04:18:36 +09:00
Compare commits
176 Commits
2015.10.23
...
2015.11.13
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a1ec9a7553 | ||
![]() |
91d644b5ba | ||
![]() |
5d6c3d6a66 | ||
![]() |
1ebb4717df | ||
![]() |
cf5881fc4d | ||
![]() |
fcd817a326 | ||
![]() |
031ec536f0 | ||
![]() |
668db403f9 | ||
![]() |
b9ad101926 | ||
![]() |
435911029f | ||
![]() |
699ed30cee | ||
![]() |
9eab37dca0 | ||
![]() |
9a8a12b7d8 | ||
![]() |
a4c2ab35c1 | ||
![]() |
3d9c4bf09a | ||
![]() |
8b8a39e279 | ||
![]() |
82393e2bb2 | ||
![]() |
2eb99a4b98 | ||
![]() |
6abce58a12 | ||
![]() |
990e6e8fa3 | ||
![]() |
bfd88516eb | ||
![]() |
d8b7e80d29 | ||
![]() |
37120974dc | ||
![]() |
42fc93c709 | ||
![]() |
a625e56543 | ||
![]() |
9b738b2caa | ||
![]() |
37ca7b22b5 | ||
![]() |
50f84a9ae1 | ||
![]() |
ff29bf81f8 | ||
![]() |
b25f753397 | ||
![]() |
6a5d6de1e3 | ||
![]() |
1c31a5b0e0 | ||
![]() |
4f5cdf7c9b | ||
![]() |
f09a767d31 | ||
![]() |
cc8034cc4c | ||
![]() |
50506cb607 | ||
![]() |
aa8d2d5be6 | ||
![]() |
114e6025b0 | ||
![]() |
fda2717ef9 | ||
![]() |
937511dfc0 | ||
![]() |
d5c181a14e | ||
![]() |
e8ce2375e0 | ||
![]() |
6fdb39ded1 | ||
![]() |
8e3a2bd620 | ||
![]() |
a06bf87a2c | ||
![]() |
ee4337d100 | ||
![]() |
cff551c0b0 | ||
![]() |
6d02b9a392 | ||
![]() |
2c740cf28d | ||
![]() |
5214f1e31d | ||
![]() |
5d0f84d32c | ||
![]() |
ee223abb88 | ||
![]() |
21d0c33ecd | ||
![]() |
8b6d9406db | ||
![]() |
686f98816e | ||
![]() |
0fa6b17dcc | ||
![]() |
472404953a | ||
![]() |
ae4ddf9efa | ||
![]() |
ea8ed40b2f | ||
![]() |
71bb016160 | ||
![]() |
179ffab69c | ||
![]() |
deb85c32bb | ||
![]() |
92366d189e | ||
![]() |
81413c0165 | ||
![]() |
1e2eb4b40d | ||
![]() |
01003d072c | ||
![]() |
5003e4283b | ||
![]() |
123c781044 | ||
![]() |
e68dd1921a | ||
![]() |
6953d8e95a | ||
![]() |
b3613d36da | ||
![]() |
53472df857 | ||
![]() |
2549e113b8 | ||
![]() |
b15c44cd36 | ||
![]() |
f93ded9852 | ||
![]() |
89ea063eeb | ||
![]() |
44b2264fea | ||
![]() |
cb5a470635 | ||
![]() |
17d1900581 | ||
![]() |
5d501a0901 | ||
![]() |
c13722480b | ||
![]() |
e7d34c03f2 | ||
![]() |
264cd00fff | ||
![]() |
a4a6b7b80f | ||
![]() |
aebb42d32b | ||
![]() |
b4ef6a0038 | ||
![]() |
5d235ca7f6 | ||
![]() |
c3459d24f1 | ||
![]() |
e3778cce0e | ||
![]() |
ad607563a2 | ||
![]() |
236cb2131b | ||
![]() |
66d041f250 | ||
![]() |
f3cb54e6d9 | ||
![]() |
0aeb9a106e | ||
![]() |
fd8102820c | ||
![]() |
bfdf891fd3 | ||
![]() |
3fa3ff1bc3 | ||
![]() |
0a0110fc6b | ||
![]() |
852fad922f | ||
![]() |
fc68d52bb9 | ||
![]() |
dde9fe9788 | ||
![]() |
a230068ff7 | ||
![]() |
6a75040278 | ||
![]() |
c514b0ec65 | ||
![]() |
eb97f46e8b | ||
![]() |
c90d16cf36 | ||
![]() |
ab6ca04802 | ||
![]() |
999079b454 | ||
![]() |
8a06999ba0 | ||
![]() |
80dcee5cd5 | ||
![]() |
30eecc6a04 | ||
![]() |
dbd82a1d4f | ||
![]() |
76f0c50d3d | ||
![]() |
dc519b5421 | ||
![]() |
ae12bc3ebb | ||
![]() |
e327b736ca | ||
![]() |
82b69a5cbb | ||
![]() |
11465da702 | ||
![]() |
578c074575 | ||
![]() |
8cdb5c8453 | ||
![]() |
2b1b2d83ca | ||
![]() |
c3040bd00a | ||
![]() |
8c1aa28c27 | ||
![]() |
78d7ee19dc | ||
![]() |
892015b088 | ||
![]() |
47f2d01a5a | ||
![]() |
33a513faf7 | ||
![]() |
6722ebd437 | ||
![]() |
721f5a277c | ||
![]() |
6fb8ace671 | ||
![]() |
ae37338e68 | ||
![]() |
03c2c162f9 | ||
![]() |
52c3a6e49d | ||
![]() |
4e16c1f80b | ||
![]() |
7ccb2b84dd | ||
![]() |
0a192fbea7 | ||
![]() |
a526167d40 | ||
![]() |
f78546272c | ||
![]() |
c137cc0d33 | ||
![]() |
6e4b8b2891 | ||
![]() |
5dadae079b | ||
![]() |
cd08d806b1 | ||
![]() |
5f9f87c06f | ||
![]() |
387db16a78 | ||
![]() |
36e6f62cd0 | ||
![]() |
755ff8d22c | ||
![]() |
7b3a19e533 | ||
![]() |
4f13f8f798 | ||
![]() |
feb7711cf5 | ||
![]() |
589c33dade | ||
![]() |
e572a1010b | ||
![]() |
7e0dc61334 | ||
![]() |
8e82ecfe8f | ||
![]() |
ec29539e06 | ||
![]() |
8cd9614abf | ||
![]() |
324ac0a243 | ||
![]() |
3711304510 | ||
![]() |
50b936936d | ||
![]() |
d97da29da2 | ||
![]() |
7687b354c5 | ||
![]() |
36d7281037 | ||
![]() |
865d1fbafc | ||
![]() |
ac21e71968 | ||
![]() |
943a1e24b8 | ||
![]() |
50f01302d3 | ||
![]() |
0198807ef9 | ||
![]() |
6856139705 | ||
![]() |
c93153852f | ||
![]() |
ab9c7214ee | ||
![]() |
dae69640d0 | ||
![]() |
edeb3e7cb1 | ||
![]() |
5c43afd40f | ||
![]() |
9170ca5b16 | ||
![]() |
65d49afa48 | ||
![]() |
eb08081330 | ||
![]() |
f870544302 |
2
AUTHORS
2
AUTHORS
@@ -144,3 +144,5 @@ Lee Jenkins
|
||||
Anssi Hannula
|
||||
Lukáš Lalinský
|
||||
Qijiang Fan
|
||||
Rémy Léone
|
||||
Marco Ferragina
|
||||
|
@@ -1,6 +1,6 @@
|
||||
**Please include the full output of youtube-dl when run with `-v`**.
|
||||
|
||||
The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
|
@@ -795,7 +795,7 @@ Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/i
|
||||
|
||||
**Please include the full output of youtube-dl when run with `-v`**.
|
||||
|
||||
The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
|
@@ -93,6 +93,7 @@
|
||||
- **Clipsyndicate**
|
||||
- **Cloudy**
|
||||
- **Clubic**
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNET**
|
||||
- **CNN**
|
||||
@@ -122,6 +123,7 @@
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Discovery**
|
||||
- **Dotsub**
|
||||
@@ -194,10 +196,10 @@
|
||||
- **Giga**
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **GodTube**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com
|
||||
- **Goshgay**
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
@@ -281,7 +283,7 @@
|
||||
- **macgamestore**: MacGameStore trailers
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **Malemotion**
|
||||
- **MDR**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
@@ -616,7 +618,6 @@
|
||||
- **video.mit.edu**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videolectures.net**
|
||||
- **VideoMega**
|
||||
- **VideoPremium**
|
||||
- **VideoTt**: video.tt - Your True Tube
|
||||
@@ -626,6 +627,7 @@
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
- **Viewster**
|
||||
- **Viidea**
|
||||
- **viki**
|
||||
- **viki:channel**
|
||||
- **vimeo**
|
||||
@@ -668,6 +670,7 @@
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XMinus**
|
||||
|
2
setup.py
2
setup.py
@@ -28,7 +28,7 @@ py2exe_options = {
|
||||
"compressed": 1,
|
||||
"optimize": 2,
|
||||
"dist_dir": '.',
|
||||
"dll_excludes": ['w9xpopen.exe'],
|
||||
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
|
||||
}
|
||||
|
||||
py2exe_console = [{
|
||||
|
@@ -13,8 +13,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from youtube_dl.utils import get_filesystem_encoding
|
||||
from youtube_dl.compat import (
|
||||
compat_getenv,
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_shlex_split,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
@@ -71,5 +73,20 @@ class TestCompat(unittest.TestCase):
|
||||
def test_compat_shlex_split(self):
|
||||
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||
|
||||
def test_compat_etree_fromstring(self):
|
||||
xml = '''
|
||||
<root foo="bar" spam="中文">
|
||||
<normal>foo</normal>
|
||||
<chinese>中文</chinese>
|
||||
<foo><bar>spam</bar></foo>
|
||||
</root>
|
||||
'''
|
||||
doc = compat_etree_fromstring(xml.encode('utf-8'))
|
||||
self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
|
||||
self.assertTrue(isinstance(doc.attrib['spam'], compat_str))
|
||||
self.assertTrue(isinstance(doc.find('normal').text, compat_str))
|
||||
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
|
||||
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -102,7 +102,7 @@ def generator(test_case):
|
||||
|
||||
params = get_params(test_case.get('params', {}))
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', True)
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
params.setdefault('skip_download', True)
|
||||
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
|
@@ -19,6 +19,9 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function x3(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x3'), 42)
|
||||
|
||||
jsi = JSInterpreter('var x5 = function(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x5'), 42)
|
||||
|
||||
def test_calc(self):
|
||||
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
||||
self.assertEqual(jsi.call_function('x4', 3), 7)
|
||||
|
@@ -28,6 +28,7 @@ from youtube_dl.extractor import (
|
||||
ThePlatformFeedIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
DemocracynowIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -346,5 +347,25 @@ class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
||||
|
||||
|
||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||
IE = DemocracynowIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
|
||||
|
||||
def test_subtitles_in_page(self):
|
||||
self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -68,6 +68,9 @@ from youtube_dl.utils import (
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
)
|
||||
from youtube_dl.compat import (
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
|
||||
|
||||
class TestUtil(unittest.TestCase):
|
||||
@@ -233,6 +236,7 @@ class TestUtil(unittest.TestCase):
|
||||
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||
'20150202')
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
@@ -242,7 +246,7 @@ class TestUtil(unittest.TestCase):
|
||||
<node x="b" y="d" />
|
||||
<node x="" />
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
doc = compat_etree_fromstring(testxml)
|
||||
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
|
||||
@@ -263,7 +267,7 @@ class TestUtil(unittest.TestCase):
|
||||
<url>http://server.com/download.mp3</url>
|
||||
</media:song>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
doc = compat_etree_fromstring(testxml)
|
||||
find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
|
||||
self.assertTrue(find('media:song') is not None)
|
||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||
@@ -275,9 +279,16 @@ class TestUtil(unittest.TestCase):
|
||||
p = xml.etree.ElementTree.SubElement(div, 'p')
|
||||
p.text = 'Foo'
|
||||
self.assertEqual(xpath_element(doc, 'div/p'), p)
|
||||
self.assertEqual(xpath_element(doc, ['div/p']), p)
|
||||
self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p)
|
||||
self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
|
||||
self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default')
|
||||
self.assertTrue(xpath_element(doc, 'div/bar') is None)
|
||||
self.assertTrue(xpath_element(doc, ['div/bar']) is None)
|
||||
self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None)
|
||||
self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
|
||||
self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True)
|
||||
self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True)
|
||||
|
||||
def test_xpath_text(self):
|
||||
testxml = '''<root>
|
||||
@@ -285,7 +296,7 @@ class TestUtil(unittest.TestCase):
|
||||
<p>Foo</p>
|
||||
</div>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
doc = compat_etree_fromstring(testxml)
|
||||
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||
self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
|
||||
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||
@@ -297,7 +308,7 @@ class TestUtil(unittest.TestCase):
|
||||
<p x="a">Foo</p>
|
||||
</div>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
doc = compat_etree_fromstring(testxml)
|
||||
self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
|
||||
self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
|
||||
self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
|
||||
@@ -425,6 +436,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251)
|
||||
self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None)
|
||||
|
||||
def test_strip_jsonp(self):
|
||||
stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
|
||||
|
@@ -572,7 +572,7 @@ class YoutubeDL(object):
|
||||
if v is not None)
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
|
||||
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
# Temporary fix for #4787
|
||||
@@ -580,7 +580,7 @@ class YoutubeDL(object):
|
||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||
return filename
|
||||
return sanitize_path(filename)
|
||||
except ValueError as err:
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
@@ -14,6 +14,7 @@ import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import itertools
|
||||
import xml.etree.ElementTree
|
||||
|
||||
|
||||
try:
|
||||
@@ -212,6 +213,43 @@ try:
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
compat_etree_fromstring = xml.etree.ElementTree.fromstring
|
||||
else:
|
||||
# python 2.x tries to encode unicode strings with ascii (see the
|
||||
# XMLParser._fixtext method)
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
try:
|
||||
_etree_iter = etree.Element.iter
|
||||
except AttributeError: # Python <=2.6
|
||||
def _etree_iter(root):
|
||||
for el in root.findall('*'):
|
||||
yield el
|
||||
for sub in _etree_iter(el):
|
||||
yield sub
|
||||
|
||||
# on 2.6 XML doesn't have a parser argument, function copied from CPython
|
||||
# 2.7 source
|
||||
def _XML(text, parser=None):
|
||||
if not parser:
|
||||
parser = etree.XMLParser(target=etree.TreeBuilder())
|
||||
parser.feed(text)
|
||||
return parser.close()
|
||||
|
||||
def _element_factory(*args, **kwargs):
|
||||
el = etree.Element(*args, **kwargs)
|
||||
for k, v in el.items():
|
||||
if isinstance(v, bytes):
|
||||
el.set(k, v.decode('utf-8'))
|
||||
return el
|
||||
|
||||
def compat_etree_fromstring(text):
|
||||
doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
|
||||
for el in _etree_iter(doc):
|
||||
if el.text is not None and isinstance(el.text, bytes):
|
||||
el.text = el.text.decode('utf-8')
|
||||
return doc
|
||||
|
||||
try:
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
@@ -507,6 +545,7 @@ __all__ = [
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_etree_fromstring',
|
||||
'compat_expanduser',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
|
@@ -5,12 +5,13 @@ import io
|
||||
import itertools
|
||||
import os
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
@@ -285,9 +286,11 @@ class F4mFD(FragmentFD):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
urlh = self.ydl.urlopen(man_url)
|
||||
man_url = urlh.geturl()
|
||||
manifest = urlh.read()
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
doc = compat_etree_fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
for f in self._get_unencrypted_media(doc)]
|
||||
if requested_bitrate is None:
|
||||
@@ -329,20 +332,25 @@ class F4mFD(FragmentFD):
|
||||
if not live:
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
base_url_parsed = compat_urllib_parse_urlparse(base_url)
|
||||
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
frags_filenames = []
|
||||
while fragments_list:
|
||||
seg_i, frag_i = fragments_list.pop(0)
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
url = base_url + name
|
||||
query = []
|
||||
if base_url_parsed.query:
|
||||
query.append(base_url_parsed.query)
|
||||
if akamai_pv:
|
||||
url += '?' + akamai_pv.strip(';')
|
||||
query.append(akamai_pv.strip(';'))
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
url += info_dict.get('extra_param_to_segment_url')
|
||||
query.append(info_dict['extra_param_to_segment_url'])
|
||||
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': url})
|
||||
success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
|
||||
if not success:
|
||||
return False
|
||||
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
|
||||
|
@@ -90,6 +90,7 @@ from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnet import CNETIE
|
||||
from .cnn import (
|
||||
@@ -123,6 +124,7 @@ from .dbtv import DBTVIE
|
||||
from .dcn import DCNIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .democracynow import DemocracynowIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .dotsub import DotsubIE
|
||||
@@ -210,13 +212,15 @@ from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globo import GloboIE
|
||||
from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .godtube import GodTubeIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
@@ -719,7 +723,6 @@ from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomega import VideoMegaIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
@@ -729,6 +732,7 @@ from .vidme import VidmeIE
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewster import ViewsterIE
|
||||
from .viidea import ViideaIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoAlbumIE,
|
||||
@@ -781,6 +785,7 @@ from .wrzuta import WrzutaIE
|
||||
from .wsj import WSJIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||
@@ -36,6 +36,18 @@ class ABCIE(InfoExtractor):
|
||||
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
|
||||
'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
|
||||
'info_dict': {
|
||||
'id': '6880080',
|
||||
'ext': 'mp3',
|
||||
'title': 'NAB lifts interest rates, following Westpac and CBA',
|
||||
'description': 'md5:f13d8edc81e462fce4a0437c7dc04728',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.abc.net.au/news/2015-10-19/6866214',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -43,7 +55,7 @@ class ABCIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Unable to extract video urls')
|
||||
@@ -60,11 +72,13 @@ class ABCIE(InfoExtractor):
|
||||
|
||||
formats = [{
|
||||
'url': url_info['url'],
|
||||
'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none',
|
||||
'width': int_or_none(url_info.get('width')),
|
||||
'height': int_or_none(url_info.get('height')),
|
||||
'tbr': int_or_none(url_info.get('bitrate')),
|
||||
'filesize': int_or_none(url_info.get('filesize')),
|
||||
} for url_info in urls_info]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -26,8 +26,8 @@ class AnitubeIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
key = self._html_search_regex(
|
||||
r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
|
||||
key = self._search_regex(
|
||||
r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
|
||||
|
||||
config_xml = self._download_xml(
|
||||
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
|
||||
|
@@ -14,8 +14,8 @@ from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
parse_xml,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
@@ -161,7 +161,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = parse_xml(webpage)
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -14,7 +13,10 @@ from ..utils import (
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
)
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
@@ -344,7 +346,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
@@ -33,6 +33,8 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)
|
||||
if not height:
|
||||
|
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
import itertools
|
||||
import json
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
@@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
lq_doc = ET.fromstring(lq_page)
|
||||
lq_doc = compat_etree_fromstring(lq_page)
|
||||
lq_durls = lq_doc.findall('./durl')
|
||||
|
||||
hq_doc = self._download_xml(
|
||||
|
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
@@ -119,7 +119,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
object_str = fix_xml_ampersands(object_str)
|
||||
|
||||
try:
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||
object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
return
|
||||
|
||||
|
@@ -67,9 +67,12 @@ class CBSNewsIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
}
|
||||
if uri.startswith('rtmp'):
|
||||
play_path = re.sub(
|
||||
r'{slistFilePath}', '',
|
||||
uri.split('<break>')[-1].split('{break}')[-1])
|
||||
fmt.update({
|
||||
'app': 'ondemand?auth=cbs',
|
||||
'play_path': 'mp4:' + uri.split('<break>')[-1],
|
||||
'play_path': 'mp4:' + play_path,
|
||||
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
|
||||
'page_url': 'http://www.cbsnews.com',
|
||||
'ext': 'flv',
|
||||
|
57
youtube_dl/extractor/clyp.py
Normal file
57
youtube_dl/extractor/clyp.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ClypIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://clyp.it/ojz2wfah',
|
||||
'md5': '1d4961036c41247ecfdcc439c0cddcbb',
|
||||
'info_dict': {
|
||||
'id': 'ojz2wfah',
|
||||
'ext': 'mp3',
|
||||
'title': 'Krisson80 - bits wip wip',
|
||||
'description': '#Krisson80BitsWipWip #chiptune\n#wip',
|
||||
'duration': 263.21,
|
||||
'timestamp': 1443515251,
|
||||
'upload_date': '20150929',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id)
|
||||
|
||||
formats = []
|
||||
for secure in ('', 'Secure'):
|
||||
for ext in ('Ogg', 'Mp3'):
|
||||
format_id = '%s%s' % (secure, ext)
|
||||
format_url = metadata.get('%sUrl' % format_id)
|
||||
if format_url:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = metadata['Title']
|
||||
description = metadata.get('Description')
|
||||
duration = float_or_none(metadata.get('Duration'))
|
||||
timestamp = parse_iso8601(metadata.get('DateCreated'))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@@ -4,7 +4,7 @@ from .mtv import MTVIE
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -16,4 +16,7 @@ class CMTIE(MTVIE):
|
||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
'description': 'Blame It All On My Roots',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
@@ -10,13 +10,11 @@ import re
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_cookies,
|
||||
compat_getpass,
|
||||
compat_HTTPError,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
@@ -24,6 +22,7 @@ from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_etree_fromstring,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
@@ -311,11 +310,11 @@ class InfoExtractor(object):
|
||||
@classmethod
|
||||
def ie_key(cls):
|
||||
"""A string for getting the InfoExtractor with get_info_extractor"""
|
||||
return cls.__name__[:-2]
|
||||
return compat_str(cls.__name__[:-2])
|
||||
|
||||
@property
|
||||
def IE_NAME(self):
|
||||
return type(self).__name__[:-2]
|
||||
return compat_str(type(self).__name__[:-2])
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
""" Returns the response handle """
|
||||
@@ -462,7 +461,7 @@ class InfoExtractor(object):
|
||||
return xml_string
|
||||
if transform_source:
|
||||
xml_string = transform_source(xml_string)
|
||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
||||
|
||||
def _download_json(self, url_or_request, video_id,
|
||||
note='Downloading JSON metadata',
|
||||
@@ -842,7 +841,7 @@ class InfoExtractor(object):
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||
return False
|
||||
@@ -944,13 +943,15 @@ class InfoExtractor(object):
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
m3u8_doc = self._download_webpage(
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
errnote=errnote or 'Failed to download m3u8 information',
|
||||
fatal=fatal)
|
||||
if m3u8_doc is False:
|
||||
return m3u8_doc
|
||||
if res is False:
|
||||
return res
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
|
@@ -5,12 +5,12 @@ import re
|
||||
import json
|
||||
import base64
|
||||
import zlib
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
@@ -104,7 +105,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'id': '589804',
|
||||
'ext': 'flv',
|
||||
'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
|
||||
'description': 'md5:fe2743efedb49d279552926d0bd0cd9e',
|
||||
'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Danny Choo Network',
|
||||
'upload_date': '20120213',
|
||||
@@ -234,7 +235,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
return output
|
||||
|
||||
def _extract_subtitles(self, subtitle):
|
||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||
sub_root = compat_etree_fromstring(subtitle)
|
||||
return [{
|
||||
'ext': 'srt',
|
||||
'data': self._convert_subtitles_to_srt(sub_root),
|
||||
@@ -287,11 +288,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
webpage, 'video_title')
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||
if not video_description:
|
||||
video_description = None
|
||||
video_description = self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id,
|
||||
webpage, 'description', default=None)
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_upload_date = self._html_search_regex(
|
||||
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
|
||||
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
||||
|
@@ -141,9 +141,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
type_ = media.get('type')
|
||||
if type_ == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if type_ == 'application/x-mpegURL' or determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
ext = determine_ext(media_url)
|
||||
if type_ == 'application/x-mpegURL' or ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
elif type_ == 'application/f4m' or ext == 'f4m':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
media_url, video_id, preference=-1, f4m_id='hds', fatal=False)
|
||||
if f4m_formats:
|
||||
formats.extend(f4m_formats)
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
|
88
youtube_dl/extractor/democracynow.py
Normal file
88
youtube_dl/extractor/democracynow.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import os.path
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
url_basename,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class DemocracynowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)'
|
||||
IE_NAME = 'democracynow'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.democracynow.org/shows/2015/7/3',
|
||||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': 'July 03, 2015 - Democracy Now!',
|
||||
'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
|
||||
'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d',
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
|
||||
'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
json_data = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'),
|
||||
display_id)
|
||||
video_id = None
|
||||
formats = []
|
||||
|
||||
default_lang = 'en'
|
||||
|
||||
subtitles = {}
|
||||
|
||||
def add_subtitle_item(lang, info_dict):
|
||||
if lang not in subtitles:
|
||||
subtitles[lang] = []
|
||||
subtitles[lang].append(info_dict)
|
||||
|
||||
# chapter_file are not subtitles
|
||||
if 'caption_file' in json_data:
|
||||
add_subtitle_item(default_lang, {
|
||||
'url': compat_urlparse.urljoin(url, json_data['caption_file']),
|
||||
})
|
||||
|
||||
for subtitle_item in json_data.get('captions', []):
|
||||
lang = subtitle_item.get('language', '').lower() or default_lang
|
||||
add_subtitle_item(lang, {
|
||||
'url': compat_urlparse.urljoin(url, subtitle_item['url']),
|
||||
})
|
||||
|
||||
for key in ('file', 'audio', 'video'):
|
||||
media_url = json_data.get(key, '')
|
||||
if not media_url:
|
||||
continue
|
||||
media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
|
||||
video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id or display_id,
|
||||
'title': json_data['title'],
|
||||
'description': description,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,39 +1,92 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveIE
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
IE_NAME = 'eitb.tv'
|
||||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?eitb\.tv/(?:eu/bideoa|es/video)/[^/]+/\d+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'add_ie': ['Brightcove'],
|
||||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/4104995148001/4090227752001/lasa-y-zabala-30-anos/',
|
||||
'md5': 'edf4436247185adee3ea18ce64c47998',
|
||||
'info_dict': {
|
||||
'id': '2743577154001',
|
||||
'id': '4090227752001',
|
||||
'ext': 'mp4',
|
||||
'title': '60 minutos (Lasa y Zabala, 30 años)',
|
||||
# All videos from eitb has this description in the brightcove info
|
||||
'description': '.',
|
||||
'uploader': 'Euskal Telebista',
|
||||
'description': 'Programa de reportajes de actualidad.',
|
||||
'duration': 3996.76,
|
||||
'timestamp': 1381789200,
|
||||
'upload_date': '20131014',
|
||||
'tags': list,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
chapter_id = mobj.group('chapter_id')
|
||||
webpage = self._download_webpage(url, chapter_id)
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is None:
|
||||
raise ExtractorError('Could not extract the Brightcove url')
|
||||
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||
# it manually
|
||||
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
||||
return self.url_result(bc_url, BrightcoveIE.ie_key())
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/%s/' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
media = video['web_media'][0]
|
||||
|
||||
formats = []
|
||||
for rendition in media['RENDITIONS']:
|
||||
video_url = rendition.get('PMD_URL')
|
||||
if not video_url:
|
||||
continue
|
||||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
|
||||
format_id = 'http'
|
||||
if tbr:
|
||||
format_id += '-%d' % int(tbr)
|
||||
formats.append({
|
||||
'url': rendition['PMD_URL'],
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(rendition.get('FRAME_WIDTH')),
|
||||
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
hls_url = media.get('HLS_SURL')
|
||||
if hls_url:
|
||||
request = compat_urllib_request.Request(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
|
||||
headers={'Referer': url})
|
||||
token_data = self._download_json(
|
||||
request, video_id, 'Downloading auth token', fatal=False)
|
||||
if token_data:
|
||||
token = token_data.get('token')
|
||||
if token:
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
'%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
hds_url = media.get('HDS_SURL')
|
||||
if hds_url:
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
'%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'),
|
||||
video_id, f4m_id='hds', fatal=False)
|
||||
if f4m_formats:
|
||||
formats.extend(f4m_formats)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'],
|
||||
'description': media.get('SHORT_DESC_ES') or video.get('desc_group') or media.get('SHORT_DESC_EU'),
|
||||
'thumbnail': media.get('STILL_URL') or media.get('THUMBNAIL_URL'),
|
||||
'duration': float_or_none(media.get('LENGTH'), 1000),
|
||||
'timestamp': parse_iso8601(media.get('BROADCST_DATE'), ' '),
|
||||
'tags': media.get('TAGS'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -3,23 +3,20 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
qualities,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class ExtremeTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||
'info_dict': {
|
||||
'id': '652431',
|
||||
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'ext': 'mp4',
|
||||
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||
'uploader': 'unknown',
|
||||
@@ -29,12 +26,16 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/gay/video/abcde-1234',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.extremetube.com/video/652431',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
@@ -49,20 +50,36 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
flash_vars = compat_parse_qs(self._search_regex(
|
||||
r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
|
||||
flash_vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
|
||||
for k, vals in flash_vars.items():
|
||||
m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
|
||||
if m is not None:
|
||||
formats.append({
|
||||
'format_id': m.group('quality'),
|
||||
'quality': quality(m.group('quality')),
|
||||
'url': vals[0],
|
||||
for quality_key, video_url in flash_vars.items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r'quality_(\d+)[pP]$', quality_key, 'height', default=None))
|
||||
if not height:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
mobj = re.search(
|
||||
r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
f.update({
|
||||
'format_id': '%dp-%dk' % (height, bitrate),
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
|
||||
else:
|
||||
f.update({
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -83,6 +83,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
subtitles = {}
|
||||
subtitles_list = [{
|
||||
'url': subformat['url'],
|
||||
'ext': subformat.get('format'),
|
||||
} for subformat in info.get('subtitles', []) if subformat.get('url')]
|
||||
if subtitles_list:
|
||||
subtitles['fr'] = subtitles_list
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -91,20 +99,27 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'pluzz.francetv.fr'
|
||||
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
||||
_VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html'
|
||||
|
||||
# Can't use tests, videos expire in 7 days
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion="(\d+)"', webpage, 'ID')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._html_search_meta(
|
||||
'id_video', webpage, 'video id', default=None)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion=["\'](\d+)', webpage, 'video id')
|
||||
|
||||
return self._extract_video(video_id, 'Pluzz')
|
||||
|
||||
|
||||
@@ -120,6 +135,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'title': 'Soir 3',
|
||||
'upload_date': '20130826',
|
||||
'timestamp': 1377548400,
|
||||
'subtitles': {
|
||||
'fr': 'mincount:2',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
|
@@ -45,11 +45,20 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
|
||||
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
|
||||
|
||||
bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates')
|
||||
bitrates = [int(b) for b in bitrates.rstrip(',').split(',')]
|
||||
bitrates.sort()
|
||||
m3u8_url = self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8)\1',
|
||||
webpage, 'm3u8 url', default=None, group='url')
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
|
||||
bitrates.sort()
|
||||
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
|
@@ -9,6 +9,7 @@ import sys
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -21,7 +22,6 @@ from ..utils import (
|
||||
HEADRequest,
|
||||
is_html,
|
||||
orderedSet,
|
||||
parse_xml,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -141,6 +141,7 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'upload_date': '20130627',
|
||||
'formats': 'mincount:16',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
@@ -1237,7 +1238,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
try:
|
||||
doc = parse_xml(webpage)
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
@@ -1671,8 +1672,8 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||
|
||||
|
@@ -14,79 +14,58 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GloboIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
||||
_VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||
|
||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id="(\d+)"',
|
||||
r'\bdata-player-videosids="(\d+)"',
|
||||
r'<div[^>]+\bid="(\d+)"',
|
||||
]
|
||||
|
||||
_RESIGN_EXPIRATION = 86400
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://globotv.globo.com/sportv/futebol-nacional/v/os-gols-de-atletico-mg-3-x-2-santos-pela-24a-rodada-do-brasileirao/3654973/',
|
||||
'md5': '03ebf41cb7ade43581608b7d9b71fab0',
|
||||
'info_dict': {
|
||||
'id': '3654973',
|
||||
'ext': 'mp4',
|
||||
'title': 'Os gols de Atlético-MG 3 x 2 Santos pela 24ª rodada do Brasileirão',
|
||||
'duration': 251.585,
|
||||
'uploader': 'SporTV',
|
||||
'uploader_id': 698,
|
||||
'like_count': int,
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
||||
'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
|
||||
'info_dict': {
|
||||
'id': '3607726',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
|
||||
'duration': 103.204,
|
||||
'uploader': 'Globo.com',
|
||||
'uploader_id': '265',
|
||||
},
|
||||
{
|
||||
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
||||
'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
|
||||
'info_dict': {
|
||||
'id': '3607726',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
|
||||
'duration': 103.204,
|
||||
'uploader': 'Globo.com',
|
||||
'uploader_id': 265,
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://globoplay.globo.com/v/4581987/',
|
||||
'md5': 'f36a1ecd6a50da1577eee6dd17f67eff',
|
||||
'info_dict': {
|
||||
'id': '4581987',
|
||||
'ext': 'mp4',
|
||||
'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
|
||||
'duration': 137.973,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
},
|
||||
{
|
||||
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
|
||||
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
|
||||
'info_dict': {
|
||||
'id': '3652183',
|
||||
'ext': 'mp4',
|
||||
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
|
||||
'duration': 110.711,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': 196,
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
|
||||
'md5': 'c1defca721ce25b2354e927d3e4b3dec',
|
||||
'info_dict': {
|
||||
'id': '3928201',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
|
||||
'duration': 1472.906,
|
||||
'uploader': 'Canal Brasil',
|
||||
'uploader_id': 705,
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
class MD5():
|
||||
class MD5:
|
||||
HEX_FORMAT_LOWERCASE = 0
|
||||
HEX_FORMAT_UPPERCASE = 1
|
||||
BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
|
||||
@@ -353,9 +332,6 @@ class GloboIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
|
||||
|
||||
video = self._download_json(
|
||||
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
|
||||
|
||||
@@ -364,7 +340,7 @@ class GloboIE(InfoExtractor):
|
||||
formats = []
|
||||
for resource in video['resources']:
|
||||
resource_id = resource.get('_id')
|
||||
if not resource_id:
|
||||
if not resource_id or resource_id.endswith('manifest'):
|
||||
continue
|
||||
|
||||
security = self._download_json(
|
||||
@@ -393,20 +369,23 @@ class GloboIE(InfoExtractor):
|
||||
resource_url = resource['url']
|
||||
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
||||
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': signed_url,
|
||||
'format_id': resource_id,
|
||||
'height': resource.get('height'),
|
||||
'format_id': 'http-%s' % resource_id,
|
||||
'height': int_or_none(resource.get('height')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = float_or_none(video.get('duration'), 1000)
|
||||
like_count = int_or_none(video.get('likes'))
|
||||
uploader = video.get('channel')
|
||||
uploader_id = video.get('channel_id')
|
||||
uploader_id = str_or_none(video.get('channel_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -414,6 +393,46 @@ class GloboIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class GloboArticleIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id=["\'](\d{7,})',
|
||||
r'\bdata-player-videosids=["\'](\d{7,})',
|
||||
r'\bvideosIDs\s*:\s*["\'](\d{7,})',
|
||||
r'\bdata-id=["\'](\d{7,})',
|
||||
r'<div[^>]+\bid=["\'](\d{7,})',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
|
||||
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
|
||||
'info_dict': {
|
||||
'id': '3652183',
|
||||
'ext': 'mp4',
|
||||
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
|
||||
'duration': 110.711,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
|
||||
return self.url_result('globo:%s' % video_id, 'Globo')
|
||||
|
@@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor):
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
} for width, height, video_url in re.findall(
|
||||
r'\d+,(\d+),(\d+),"(https?://redirector\.googlevideo\.com.*?)"', webpage)]
|
||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -16,7 +16,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?:
|
||||
kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
|
||||
https?://
|
||||
(:?(?:www|cdnapisec)\.)?kaltura\.com/
|
||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
|
@@ -82,6 +82,11 @@ class LyndaBaseIE(InfoExtractor):
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _logout(self):
|
||||
self._download_webpage(
|
||||
'http://www.lynda.com/ajax/logout.aspx', None,
|
||||
'Logging out', 'Unable to log out', fatal=False)
|
||||
|
||||
|
||||
class LyndaIE(LyndaBaseIE):
|
||||
IE_NAME = 'lynda'
|
||||
@@ -108,51 +113,47 @@ class LyndaIE(LyndaBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
page = self._download_webpage(
|
||||
video = self._download_json(
|
||||
'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(page)
|
||||
|
||||
if 'Status' in video_json:
|
||||
if 'Status' in video:
|
||||
raise ExtractorError(
|
||||
'lynda returned error: %s' % video_json['Message'], expected=True)
|
||||
'lynda returned error: %s' % video['Message'], expected=True)
|
||||
|
||||
if video_json['HasAccess'] is False:
|
||||
if video.get('HasAccess') is False:
|
||||
self.raise_login_required('Video %s is only available for members' % video_id)
|
||||
|
||||
video_id = compat_str(video_json['ID'])
|
||||
duration = video_json['DurationInSeconds']
|
||||
title = video_json['Title']
|
||||
video_id = compat_str(video.get('ID') or video_id)
|
||||
duration = int_or_none(video.get('DurationInSeconds'))
|
||||
title = video['Title']
|
||||
|
||||
formats = []
|
||||
|
||||
fmts = video_json.get('Formats')
|
||||
fmts = video.get('Formats')
|
||||
if fmts:
|
||||
formats.extend([
|
||||
{
|
||||
'url': fmt['Url'],
|
||||
'ext': fmt['Extension'],
|
||||
'width': fmt['Width'],
|
||||
'height': fmt['Height'],
|
||||
'filesize': fmt['FileSize'],
|
||||
'format_id': str(fmt['Resolution'])
|
||||
} for fmt in fmts])
|
||||
formats.extend([{
|
||||
'url': f['Url'],
|
||||
'ext': f.get('Extension'),
|
||||
'width': int_or_none(f.get('Width')),
|
||||
'height': int_or_none(f.get('Height')),
|
||||
'filesize': int_or_none(f.get('FileSize')),
|
||||
'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None,
|
||||
} for f in fmts if f.get('Url')])
|
||||
|
||||
prioritized_streams = video_json.get('PrioritizedStreams')
|
||||
prioritized_streams = video.get('PrioritizedStreams')
|
||||
if prioritized_streams:
|
||||
for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
|
||||
formats.extend([
|
||||
{
|
||||
'url': video_url,
|
||||
'width': int_or_none(format_id),
|
||||
'format_id': '%s-%s' % (prioritized_stream_id, format_id),
|
||||
} for format_id, video_url in prioritized_stream.items()
|
||||
])
|
||||
formats.extend([{
|
||||
'url': video_url,
|
||||
'width': int_or_none(format_id),
|
||||
'format_id': '%s-%s' % (prioritized_stream_id, format_id),
|
||||
} for format_id, video_url in prioritized_stream.items()])
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, page)
|
||||
subtitles = self.extract_subtitles(video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -183,7 +184,7 @@ class LyndaIE(LyndaBaseIE):
|
||||
if srt:
|
||||
return srt
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
def _get_subtitles(self, video_id):
|
||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
subs = self._download_json(url, None, False)
|
||||
if subs:
|
||||
@@ -205,12 +206,13 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
course_path = mobj.group('coursepath')
|
||||
course_id = mobj.group('courseid')
|
||||
|
||||
page = self._download_webpage(
|
||||
course = self._download_json(
|
||||
'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||
course_id, 'Downloading course JSON')
|
||||
course_json = json.loads(page)
|
||||
|
||||
if 'Status' in course_json and course_json['Status'] == 'NotFound':
|
||||
self._logout()
|
||||
|
||||
if course.get('Status') == 'NotFound':
|
||||
raise ExtractorError(
|
||||
'Course %s does not exist' % course_id, expected=True)
|
||||
|
||||
@@ -220,12 +222,13 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||
# by single video API anymore
|
||||
|
||||
for chapter in course_json['Chapters']:
|
||||
for video in chapter['Videos']:
|
||||
if video['HasAccess'] is False:
|
||||
for chapter in course['Chapters']:
|
||||
for video in chapter.get('Videos', []):
|
||||
if video.get('HasAccess') is False:
|
||||
unaccessible_videos += 1
|
||||
continue
|
||||
videos.append(video['ID'])
|
||||
if video.get('ID'):
|
||||
videos.append(video['ID'])
|
||||
|
||||
if unaccessible_videos > 0:
|
||||
self._downloader.report_warning(
|
||||
@@ -238,6 +241,6 @@ class LyndaCourseIE(LyndaBaseIE):
|
||||
'Lynda')
|
||||
for video_id in videos]
|
||||
|
||||
course_title = course_json['Title']
|
||||
course_title = course.get('Title')
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
||||
|
@@ -1,64 +1,169 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
# No tests, MDR regularily deletes its videos
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# MDR regularily deletes its videos
|
||||
'url': 'http://www.mdr.de/fakt/video189002.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html',
|
||||
'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa',
|
||||
'info_dict': {
|
||||
'id': '1312272',
|
||||
'ext': 'mp3',
|
||||
'title': 'Feuilleton vom 30. Oktober 2015',
|
||||
'duration': 250,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
|
||||
'md5': '4930515e36b06c111213e80d1e4aad0e',
|
||||
'info_dict': {
|
||||
'id': '19636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Baumhaus vom 30. Oktober 2015',
|
||||
'duration': 134,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
|
||||
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
|
||||
'info_dict': {
|
||||
'id': '8182',
|
||||
'ext': 'mp4',
|
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||
'timestamp': 1419047100,
|
||||
'upload_date': '20141220',
|
||||
'duration': 4628,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('video_id')
|
||||
domain = m.group('domain')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# determine title and media streams from webpage
|
||||
html = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
|
||||
xmlurl = self._search_regex(
|
||||
r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
|
||||
data_url = self._search_regex(
|
||||
r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
|
||||
webpage, 'data url', group='url')
|
||||
|
||||
doc = self._download_xml(
|
||||
compat_urlparse.urljoin(url, data_url), video_id)
|
||||
|
||||
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
|
||||
|
||||
doc = self._download_xml(domain + xmlurl, video_id)
|
||||
formats = []
|
||||
for a in doc.findall('./assets/asset'):
|
||||
url_el = a.find('./progressiveDownloadUrl')
|
||||
if url_el is None:
|
||||
continue
|
||||
abr = int(a.find('bitrateAudio').text) // 1000
|
||||
media_type = a.find('mediaType').text
|
||||
format = {
|
||||
'abr': abr,
|
||||
'filesize': int(a.find('fileSize').text),
|
||||
'url': url_el.text,
|
||||
}
|
||||
processed_urls = []
|
||||
for asset in doc.findall('./assets/asset'):
|
||||
for source in (
|
||||
'progressiveDownload',
|
||||
'dynamicHttpStreamingRedirector',
|
||||
'adaptiveHttpStreamingRedirector'):
|
||||
url_el = asset.find('./%sUrl' % source)
|
||||
if url_el is None:
|
||||
continue
|
||||
|
||||
video_url = url_el.text
|
||||
if video_url in processed_urls:
|
||||
continue
|
||||
|
||||
processed_urls.append(video_url)
|
||||
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||
|
||||
ext = determine_ext(url_el.text)
|
||||
if ext == 'm3u8':
|
||||
url_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=0, m3u8_id='HLS', fatal=False)
|
||||
elif ext == 'f4m':
|
||||
url_formats = self._extract_f4m_formats(
|
||||
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
|
||||
preference=0, f4m_id='HDS', fatal=False)
|
||||
else:
|
||||
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
|
||||
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%s-%d' % (media_type, vbr or abr),
|
||||
'filesize': filesize,
|
||||
'abr': abr,
|
||||
'preference': 1,
|
||||
}
|
||||
|
||||
if vbr:
|
||||
width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
|
||||
height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
|
||||
f.update({
|
||||
'vbr': vbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
url_formats = [f]
|
||||
|
||||
if not url_formats:
|
||||
continue
|
||||
|
||||
if not vbr:
|
||||
for f in url_formats:
|
||||
abr = f.get('tbr') or abr
|
||||
if 'tbr' in f:
|
||||
del f['tbr']
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
formats.extend(url_formats)
|
||||
|
||||
vbr_el = a.find('bitrateVideo')
|
||||
if vbr_el is None:
|
||||
format.update({
|
||||
'vcodec': 'none',
|
||||
'format_id': '%s-%d' % (media_type, abr),
|
||||
})
|
||||
else:
|
||||
vbr = int(vbr_el.text) // 1000
|
||||
format.update({
|
||||
'vbr': vbr,
|
||||
'width': int(a.find('frameWidth').text),
|
||||
'height': int(a.find('frameHeight').text),
|
||||
'format_id': '%s-%d' % (media_type, vbr),
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
|
||||
timestamp = parse_iso8601(
|
||||
xpath_text(
|
||||
doc, [
|
||||
'./broadcast/broadcastDate',
|
||||
'./broadcast/broadcastStartDate',
|
||||
'./broadcast/broadcastEndDate'],
|
||||
'timestamp', default=None))
|
||||
duration = parse_duration(xpath_text(doc, './duration', 'duration'))
|
||||
uploader = xpath_text(doc, './rights', 'uploader')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
@@ -51,6 +52,8 @@ class MioMioIE(InfoExtractor):
|
||||
mioplayer_path = self._search_regex(
|
||||
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
|
||||
|
||||
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
|
||||
|
||||
xml_config = self._search_regex(
|
||||
r'flashvars="type=(?:sina|video)&(.+?)&',
|
||||
webpage, 'xml config')
|
||||
@@ -60,14 +63,12 @@ class MioMioIE(InfoExtractor):
|
||||
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
|
||||
video_id)
|
||||
|
||||
# the following xml contains the actual configuration information on the video file(s)
|
||||
vid_config = self._download_xml(
|
||||
vid_config_request = compat_urllib_request.Request(
|
||||
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
|
||||
video_id)
|
||||
headers=http_headers)
|
||||
|
||||
http_headers = {
|
||||
'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
|
||||
}
|
||||
# the following xml contains the actual configuration information on the video file(s)
|
||||
vid_config = self._download_xml(vid_config_request, video_id)
|
||||
|
||||
if not int_or_none(xpath_text(vid_config, 'timelength')):
|
||||
raise ExtractorError('Unable to load videos!', expected=True)
|
||||
|
@@ -86,7 +86,7 @@ class MITIE(TechTVMITIE):
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe .*?src="(.+?)"', webpage, 'embed url')
|
||||
return self.url_result(embed_url, ie='TechTVMIT')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
encode_dict,
|
||||
get_element_by_attribute,
|
||||
@@ -15,7 +18,7 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
'md5': 'ace7635b2a0b286aaa37d3ff192d2a8a',
|
||||
'md5': '0ff1a13aebb35d9bc14081ff633dd324',
|
||||
'info_dict': {
|
||||
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||
'display_id': 'programa-144',
|
||||
@@ -34,6 +37,7 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
config_url = self._search_regex(
|
||||
r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
|
||||
config_url = compat_urlparse.urljoin(url, config_url)
|
||||
|
||||
config = self._download_json(
|
||||
config_url, display_id, 'Downloading config JSON')
|
||||
@@ -56,7 +60,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'sta': '0',
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data)).encode('utf-8')),
|
||||
'%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data))),
|
||||
display_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
||||
|
||||
class MonikerIE(InfoExtractor):
|
||||
IE_DESC = 'allmyvideos.net and vidspot.net'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://allmyvideos.net/jih3nce3x6wn',
|
||||
@@ -46,6 +46,18 @@ class MonikerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidspot.net/2/v-ywDf99',
|
||||
'md5': '5f8254ce12df30479428b0152fb8e7ba',
|
||||
'info_dict': {
|
||||
'id': 'ywDf99',
|
||||
'ext': 'mp4',
|
||||
'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
|
||||
'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://allmyvideos.net/v/v-HXZm5t',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -64,18 +76,30 @@ class MonikerIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
builtin_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
|
||||
orig_webpage, 'builtin URL', default=None, group='url')
|
||||
|
||||
post = compat_urllib_parse.urlencode(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
if builtin_url:
|
||||
req = compat_urllib_request.Request(builtin_url)
|
||||
req.add_header('Referer', url)
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
|
||||
title = self._og_search_title(orig_webpage).strip()
|
||||
description = self._og_search_description(orig_webpage).strip()
|
||||
else:
|
||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
post = compat_urllib_parse.urlencode(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
description = None
|
||||
|
||||
# Could be several links with different quality
|
||||
links = re.findall(r'"file" : "?(.+?)",', webpage)
|
||||
@@ -89,5 +113,6 @@ class MonikerIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,80 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class MovieClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
|
||||
_VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
|
||||
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5',
|
||||
'info_dict': {
|
||||
'id': 'Wy7ZU',
|
||||
'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
|
||||
'id': 'pKIGmG83AqD9',
|
||||
'display_id': 'warcraft-trailer-1-561180739597',
|
||||
'ext': 'mp4',
|
||||
'title': 'My Week with Marilyn - Do You Love Me?',
|
||||
'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
|
||||
'title': 'Warcraft Trailer 1',
|
||||
'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
show_id = display_id or video_id
|
||||
display_id = self._match_id(url)
|
||||
|
||||
config = self._download_xml(
|
||||
'http://config.movieclips.com/player/config/%s' % video_id,
|
||||
show_id, 'Downloading player config')
|
||||
|
||||
if config.find('./country-region').text == 'false':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
|
||||
|
||||
properties = config.find('./video/properties')
|
||||
smil_file = properties.attrib['smil_file']
|
||||
|
||||
smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
|
||||
base_url = smil.find('./head/meta').attrib['base']
|
||||
|
||||
formats = []
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
vbr = int(video.attrib['system-bitrate']) / 1000
|
||||
src = video.attrib['src']
|
||||
formats.append({
|
||||
'url': base_url,
|
||||
'play_path': src,
|
||||
'ext': src.split(':')[0],
|
||||
'vbr': vbr,
|
||||
'format_id': '%dk' % vbr,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
|
||||
description = clean_html(compat_str(properties.attrib['clip_description']))
|
||||
thumbnail = properties.attrib['image']
|
||||
categories = properties.attrib['clip_categories'].split(',')
|
||||
req = compat_urllib_request.Request(url)
|
||||
# it doesn't work if it thinks the browser it's too old
|
||||
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link')
|
||||
title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com</title>', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'_type': 'url_transparent',
|
||||
'url': theplatform_link,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -14,7 +14,8 @@ from ..utils import (
|
||||
|
||||
class NDRBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = next(group for group in mobj.groups() if group)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return self._extract_embed(webpage, display_id)
|
||||
|
||||
@@ -22,7 +23,7 @@ class NDRBaseIE(InfoExtractor):
|
||||
class NDRIE(NDRBaseIE):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo, same content id
|
||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||
@@ -77,6 +78,9 @@ class NDRIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
@@ -101,7 +105,7 @@ class NDRIE(NDRBaseIE):
|
||||
class NJoyIE(NDRBaseIE):
|
||||
IE_NAME = 'njoy'
|
||||
IE_DESC = 'N-JOY'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo, same content id
|
||||
'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
|
||||
@@ -136,6 +140,9 @@ class NJoyIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
@@ -231,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
|
||||
class NDREmbedIE(NDREmbedBaseIE):
|
||||
IE_NAME = 'ndr:embed'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
||||
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
||||
@@ -325,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
|
||||
class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
IE_NAME = 'njoy:embed'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
|
||||
_VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo
|
||||
'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
|
||||
|
@@ -4,10 +4,14 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
encode_dict,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -38,19 +42,40 @@ class NovaMovIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
page = self._download_webpage(
|
||||
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
|
||||
url = 'http://%s/video/%s' % (self._HOST, video_id)
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, page) is not None:
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, 'Downloading video page')
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
|
||||
def extract_filekey(default=NO_DEFAULT):
|
||||
return self._search_regex(
|
||||
self._FILEKEY_REGEX, webpage, 'filekey', default=default)
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
|
||||
filekey = extract_filekey(default=None)
|
||||
|
||||
if not filekey:
|
||||
fields = self._hidden_inputs(webpage)
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
|
||||
'post url', default=url, group='url')
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(url, post_url)
|
||||
request = compat_urllib_request.Request(
|
||||
post_url, urlencode_postdata(encode_dict(fields)))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('Referer', post_url)
|
||||
webpage = self._download_webpage(
|
||||
request, video_id, 'Downloading continue to the video page')
|
||||
|
||||
filekey = extract_filekey()
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False)
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
|
||||
|
||||
api_response = self._download_webpage(
|
||||
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
|
||||
|
@@ -7,9 +7,9 @@ class NowVideoIE(NovaMovIE):
|
||||
IE_NAME = 'nowvideo'
|
||||
IE_DESC = 'NowVideo'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'}
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
|
||||
|
||||
_HOST = 'www.nowvideo.ch'
|
||||
_HOST = 'www.nowvideo.to'
|
||||
|
||||
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
)
|
||||
@@ -153,6 +154,22 @@ class PBSIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
# Frontline video embedded via flp2012.js
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/the-atomic-artists',
|
||||
'info_dict': {
|
||||
'id': '2070868960',
|
||||
'display_id': 'the-atomic-artists',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - The Atomic Artists',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'duration': 723,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}
|
||||
]
|
||||
_ERRORS = {
|
||||
@@ -191,9 +208,30 @@ class PBSIE(InfoExtractor):
|
||||
if media_id:
|
||||
return media_id, presumptive_id, upload_date
|
||||
|
||||
url = self._search_regex(
|
||||
r'(?s)<iframe[^>]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
|
||||
webpage, 'player URL')
|
||||
# Fronline video embedded via flp
|
||||
video_id = self._search_regex(
|
||||
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
||||
if video_id:
|
||||
# pkg_id calculation is reverse engineered from
|
||||
# http://www.pbs.org/wgbh/pages/frontline/js/flp2012.js
|
||||
prg_id = self._search_regex(
|
||||
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid')[7:]
|
||||
if 'q' in prg_id:
|
||||
prg_id = prg_id.split('q')[1]
|
||||
prg_id = int(prg_id, 16)
|
||||
getdir = self._download_json(
|
||||
'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id,
|
||||
presumptive_id, 'Downloading getdir JSON',
|
||||
transform_source=strip_jsonp)
|
||||
return getdir['mid'], presumptive_id, upload_date
|
||||
|
||||
for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage):
|
||||
url = self._search_regex(
|
||||
r'src=(["\'])(?P<url>.+?partnerplayer.+?)\1', iframe,
|
||||
'player URL', default=None, group='url')
|
||||
if url:
|
||||
break
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
player_id = mobj.group('player_id')
|
||||
|
@@ -12,7 +12,8 @@ from ..utils import parse_iso8601
|
||||
class PeriscopeIE(InfoExtractor):
|
||||
IE_DESC = 'Periscope'
|
||||
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
# Alive example URLs can be found here http://onperiscope.com/
|
||||
_TESTS = [{
|
||||
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
|
||||
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
|
||||
'info_dict': {
|
||||
@@ -25,11 +26,15 @@ class PeriscopeIE(InfoExtractor):
|
||||
'uploader_id': '1465763',
|
||||
},
|
||||
'skip': 'Expires in 24 hours',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, method, token):
|
||||
def _call_api(self, method, value):
|
||||
attribute = 'token' if len(value) > 13 else 'broadcast_id'
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token)
|
||||
'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value)
|
||||
|
||||
def _real_extract(self, url):
|
||||
token = self._match_id(url)
|
||||
|
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
class ProSiebenSat1IE(InfoExtractor):
|
||||
IE_NAME = 'prosiebensat1'
|
||||
IE_DESC = 'ProSiebenSat.1 Digital'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at)|ran\.de|fem\.com)/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
|
@@ -9,8 +9,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class RTBFIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.rtbf.be/video/[^\?]+\?id=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?rtbf\.be/(?:video/[^?]+\?.*\bid=|ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
|
||||
'md5': '799f334ddf2c0a582ba80c44655be570',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,14 @@ class RTBFIE(InfoExtractor):
|
||||
'title': 'Les Diables au coeur (épisode 2)',
|
||||
'duration': 3099,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_QUALITIES = [
|
||||
('mobile', 'mobile'),
|
||||
|
@@ -57,16 +57,21 @@ class RuutuIE(InfoExtractor):
|
||||
extract_formats(child)
|
||||
elif child.tag.endswith('File'):
|
||||
video_url = child.text
|
||||
if not video_url or video_url in processed_urls or 'NOT_USED' in video_url:
|
||||
if (not video_url or video_url in processed_urls or
|
||||
any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
|
||||
return
|
||||
processed_urls.append(video_url)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False)
|
||||
if f4m_formats:
|
||||
formats.extend(f4m_formats)
|
||||
else:
|
||||
proto = compat_urllib_parse_urlparse(video_url).scheme
|
||||
if not child.tag.startswith('HTTP') and proto != 'rtmp':
|
||||
|
@@ -121,9 +121,9 @@ class SenateISVPIE(InfoExtractor):
|
||||
'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=',
|
||||
}]
|
||||
else:
|
||||
hdcore_sign = '?hdcore=3.1.0'
|
||||
hdcore_sign = 'hdcore=3.1.0'
|
||||
url_params = (domain, video_id, stream_num)
|
||||
f4m_url = '%s/z/%s_1@%s/manifest.f4m' % url_params + hdcore_sign
|
||||
f4m_url = '%s/z/%s_1@%s/manifest.f4m?' % url_params + hdcore_sign
|
||||
m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params
|
||||
for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
|
||||
# URLs without the extra param induce an 404 error
|
||||
|
@@ -77,17 +77,21 @@ class SpiegeltvIE(InfoExtractor):
|
||||
'rtmp_live': True,
|
||||
})
|
||||
elif determine_ext(endpoint) == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
endpoint.replace('[video]', play_path),
|
||||
video_id, 'm4v',
|
||||
preference=1, # Prefer hls since it allows to workaround georestriction
|
||||
m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats is not False:
|
||||
formats.extend(m3u8_formats)
|
||||
formats.append({
|
||||
'url': endpoint.replace('[video]', play_path),
|
||||
'ext': 'm4v',
|
||||
'format_id': 'hls', # Prefer hls since it allows to workaround georestriction
|
||||
'protocol': 'm3u8',
|
||||
'preference': 1,
|
||||
'http_headers': {
|
||||
'Accept-Encoding': 'deflate', # gzip causes trouble on the server side
|
||||
},
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': endpoint,
|
||||
})
|
||||
self._check_formats(formats, video_id)
|
||||
|
||||
thumbnails = []
|
||||
for image in media_json['images']:
|
||||
|
@@ -64,7 +64,7 @@ class StitcherIE(InfoExtractor):
|
||||
'url': episode[episode_key],
|
||||
'ext': determine_ext(episode[episode_key]) or 'mp3',
|
||||
'vcodec': 'none',
|
||||
} for episode_key in ('origEpisodeURL', 'episodeURL') if episode.get(episode_key)]
|
||||
} for episode_key in ('episodeURL',) if episode.get(episode_key)]
|
||||
description = self._search_regex(
|
||||
r'Episode Info:\s*</span>([^<]+)<', webpage, 'description', fatal=False)
|
||||
duration = int_or_none(episode.get('duration'))
|
||||
|
@@ -10,10 +10,10 @@ class TutvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
|
||||
_TEST = {
|
||||
'url': 'http://tu.tv/videos/robots-futbolistas',
|
||||
'md5': '627c7c124ac2a9b5ab6addb94e0e65f7',
|
||||
'md5': '0cd9e28ad270488911b0d2a72323395d',
|
||||
'info_dict': {
|
||||
'id': '2973058',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Robots futbolistas',
|
||||
},
|
||||
}
|
||||
|
@@ -9,6 +9,8 @@ from ..utils import (
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
remove_end,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -120,7 +122,7 @@ class TwitterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
|
||||
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
|
||||
'md5': '31cd83a116fc41f99ae3d909d4caf6a0',
|
||||
'info_dict': {
|
||||
@@ -133,7 +135,19 @@ class TwitterIE(InfoExtractor):
|
||||
'uploader': 'FREE THE NIPPLE',
|
||||
'uploader_id': 'freethenipple',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
||||
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
|
||||
'info_dict': {
|
||||
'id': '657991469417025536',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
|
||||
'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
|
||||
'thumbnail': 're:^https?://.*\.png',
|
||||
'uploader': 'Gifs',
|
||||
'uploader_id': 'giphz',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -150,17 +164,41 @@ class TwitterIE(InfoExtractor):
|
||||
mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title)
|
||||
title, short_url = mobj.groups()
|
||||
|
||||
card_id = self._search_regex(
|
||||
r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url')
|
||||
card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'TwitterCard',
|
||||
info = {
|
||||
'uploader_id': user_id,
|
||||
'uploader': username,
|
||||
'url': card_url,
|
||||
'webpage_url': url,
|
||||
'description': '%s on Twitter: "%s %s"' % (username, title, short_url),
|
||||
'title': username + ' - ' + title,
|
||||
}
|
||||
|
||||
card_id = self._search_regex(
|
||||
r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
|
||||
if card_id:
|
||||
card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'TwitterCard',
|
||||
'url': card_url,
|
||||
})
|
||||
return info
|
||||
|
||||
mobj = re.search(r'''(?x)
|
||||
<video[^>]+class="animated-gif"[^>]+
|
||||
(?:data-height="(?P<height>\d+)")?[^>]+
|
||||
(?:data-width="(?P<width>\d+)")?[^>]+
|
||||
(?:poster="(?P<poster>[^"]+)")?[^>]*>\s*
|
||||
<source[^>]+video-src="(?P<url>[^"]+)"
|
||||
''', webpage)
|
||||
|
||||
if mobj:
|
||||
info.update({
|
||||
'id': twid,
|
||||
'url': mobj.group('url'),
|
||||
'height': int_or_none(mobj.group('height')),
|
||||
'width': int_or_none(mobj.group('width')),
|
||||
'thumbnail': mobj.group('poster'),
|
||||
})
|
||||
return info
|
||||
|
||||
raise ExtractorError('There\'s not video in this tweet.')
|
||||
|
@@ -1,10 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -97,7 +97,7 @@ class VevoIE(InfoExtractor):
|
||||
if last_version['version'] == -1:
|
||||
raise ExtractorError('Unable to extract last version of the video')
|
||||
|
||||
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
||||
renditions = compat_etree_fromstring(last_version['data'])
|
||||
formats = []
|
||||
# Already sorted from worst to best quality
|
||||
for rend in renditions.findall('rendition'):
|
||||
@@ -114,7 +114,7 @@ class VevoIE(InfoExtractor):
|
||||
|
||||
def _formats_from_smil(self, smil_xml):
|
||||
formats = []
|
||||
smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
|
||||
smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
|
||||
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
for el in els:
|
||||
src = el.attrib['src']
|
||||
|
@@ -2,8 +2,8 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -18,33 +18,35 @@ class VideofyMeIE(InfoExtractor):
|
||||
'id': '1100701',
|
||||
'ext': 'mp4',
|
||||
'title': 'This is VideofyMe',
|
||||
'description': None,
|
||||
'description': '',
|
||||
'upload_date': '20130326',
|
||||
'timestamp': 1364288959,
|
||||
'uploader': 'VideofyMe',
|
||||
'uploader_id': 'thisisvideofyme',
|
||||
'view_count': int,
|
||||
'likes': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||
video_id)
|
||||
video = config.find('video')
|
||||
sources = video.find('sources')
|
||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||
for key in ['on', 'av', 'off']] if node is not None)
|
||||
video_url = url_node.find('url').text
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'([0-9]+)', video.find('views').text, 'view count', fatal=False))
|
||||
|
||||
config = self._download_json('http://vf-player-info-loader.herokuapp.com/%s.json' % video_id, video_id)['videoinfo']
|
||||
|
||||
video = config.get('video')
|
||||
blog = config.get('blog', {})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video.find('title').text,
|
||||
'url': video_url,
|
||||
'thumbnail': video.find('thumb').text,
|
||||
'description': video.find('description').text,
|
||||
'uploader': config.find('blog/name').text,
|
||||
'uploader_id': video.find('identifier').text,
|
||||
'view_count': view_count,
|
||||
'title': video['title'],
|
||||
'url': video['sources']['source']['url'],
|
||||
'thumbnail': video.get('thumb'),
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('date')),
|
||||
'uploader': blog.get('name'),
|
||||
'uploader_id': blog.get('identifier'),
|
||||
'view_count': int_or_none(self._search_regex(r'([0-9]+)', video.get('views'), 'view count', fatal=False)),
|
||||
'likes': int_or_none(video.get('likes')),
|
||||
'comment_count': int_or_none(video.get('nrOfComments')),
|
||||
}
|
||||
|
@@ -1,82 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class VideoLecturesNetIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
IE_NAME = 'videolectures.net'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
|
||||
'info_dict': {
|
||||
'id': 'promogram_igor_mekjavic_eng',
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'upload_date': '20130627',
|
||||
'duration': 565,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
# video with invalid direct format links (HTTP 403)
|
||||
'url': 'http://videolectures.net/russir2010_filippova_nlp/',
|
||||
'info_dict': {
|
||||
'id': 'russir2010_filippova_nlp',
|
||||
'ext': 'flv',
|
||||
'title': 'NLP at Google',
|
||||
'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3',
|
||||
'duration': 5352,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videolectures.net/deeplearning2015_montreal/',
|
||||
'info_dict': {
|
||||
'id': 'deeplearning2015_montreal',
|
||||
'title': 'Deep Learning Summer School, Montreal 2015',
|
||||
'description': 'md5:90121a40cc6926df1bf04dcd8563ed3b',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
|
||||
|
||||
try:
|
||||
smil = self._download_smil(smil_url, video_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
# Probably a playlist
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, video_url), 'VideoLecturesNet')
|
||||
for _, video_url in re.findall(r'<a[^>]+href=(["\'])(.+?)\1[^>]+id=["\']lec=\d+', webpage)]
|
||||
playlist_title = self._html_search_meta('title', webpage, 'title', fatal=True)
|
||||
playlist_description = self._html_search_meta('description', webpage, 'description')
|
||||
return self.playlist_result(entries, video_id, playlist_title, playlist_description)
|
||||
|
||||
info = self._parse_smil(smil, smil_url, video_id)
|
||||
|
||||
info['id'] = video_id
|
||||
|
||||
switch = smil.find('.//switch')
|
||||
if switch is not None:
|
||||
info['duration'] = parse_duration(switch.attrib.get('dur'))
|
||||
|
||||
return info
|
@@ -101,6 +101,10 @@ class VidmeIE(InfoExtractor):
|
||||
# suspended
|
||||
'url': 'https://vid.me/Ox3G',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# deleted
|
||||
'url': 'https://vid.me/KTPm',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# no formats in the API response
|
||||
'url': 'https://vid.me/e5g',
|
||||
@@ -143,6 +147,11 @@ class VidmeIE(InfoExtractor):
|
||||
|
||||
video = response['video']
|
||||
|
||||
if video.get('state') == 'deleted':
|
||||
raise ExtractorError(
|
||||
'Vidme said: Sorry, this video has been deleted.',
|
||||
expected=True)
|
||||
|
||||
if video.get('state') in ('user-disabled', 'suspended'):
|
||||
raise ExtractorError(
|
||||
'Vidme said: This video has been suspended either due to a copyright claim, '
|
||||
|
@@ -20,8 +20,14 @@ class VidziIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url')
|
||||
video_host = self._html_search_regex(
|
||||
r'id=\'vplayer\'><img src="http://(.*?)/i', webpage,
|
||||
'video host')
|
||||
video_hash = self._html_search_regex(
|
||||
r'\|([a-z0-9]+)\|hls\|type', webpage, 'video_hash')
|
||||
ext = self._html_search_regex(
|
||||
r'\|tracks\|([a-z0-9]+)\|', webpage, 'video ext')
|
||||
video_url = 'http://' + video_host + '/' + video_hash + '/v.' + ext
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||
|
||||
|
188
youtube_dl/extractor/viidea.py
Normal file
188
youtube_dl/extractor/viidea.py
Normal file
@@ -0,0 +1,188 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ViideaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)http://(?:www\.)?(?:
|
||||
videolectures\.net|
|
||||
flexilearn\.viidea\.net|
|
||||
presentations\.ocwconsortium\.org|
|
||||
video\.travel-zoom\.si|
|
||||
video\.pomp-forum\.si|
|
||||
tv\.nil\.si|
|
||||
video\.hekovnik.com|
|
||||
video\.szko\.si|
|
||||
kpk\.viidea\.com|
|
||||
inside\.viidea\.net|
|
||||
video\.kiberpipa\.org|
|
||||
bvvideo\.si|
|
||||
kongres\.viidea\.net|
|
||||
edemokracija\.viidea\.com
|
||||
)(?:/lecture)?/(?P<id>[^/]+)(?:/video/(?P<part>\d+))?/*(?:[#?].*)?$'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
|
||||
'info_dict': {
|
||||
'id': '20171',
|
||||
'display_id': 'promogram_igor_mekjavic_eng',
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'timestamp': 1372349289,
|
||||
'upload_date': '20130627',
|
||||
'duration': 565,
|
||||
},
|
||||
}, {
|
||||
# video with invalid direct format links (HTTP 403)
|
||||
'url': 'http://videolectures.net/russir2010_filippova_nlp/',
|
||||
'info_dict': {
|
||||
'id': '14891',
|
||||
'display_id': 'russir2010_filippova_nlp',
|
||||
'ext': 'flv',
|
||||
'title': 'NLP at Google',
|
||||
'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'timestamp': 1284375600,
|
||||
'upload_date': '20100913',
|
||||
'duration': 5352,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# event playlist
|
||||
'url': 'http://videolectures.net/deeplearning2015_montreal/',
|
||||
'info_dict': {
|
||||
'id': '23181',
|
||||
'title': 'Deep Learning Summer School, Montreal 2015',
|
||||
'description': 'md5:0533a85e4bd918df52a01f0e1ebe87b7',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'timestamp': 1438560000,
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}, {
|
||||
# multi part lecture
|
||||
'url': 'http://videolectures.net/mlss09uk_bishop_ibi/',
|
||||
'info_dict': {
|
||||
'id': '9737',
|
||||
'display_id': 'mlss09uk_bishop_ibi',
|
||||
'title': 'Introduction To Bayesian Inference',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'timestamp': 1251622800,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '9737_part1',
|
||||
'display_id': 'mlss09uk_bishop_ibi_part1',
|
||||
'ext': 'wmv',
|
||||
'title': 'Introduction To Bayesian Inference (Part 1)',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 4622,
|
||||
'timestamp': 1251622800,
|
||||
'upload_date': '20090830',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '9737_part2',
|
||||
'display_id': 'mlss09uk_bishop_ibi_part2',
|
||||
'ext': 'wmv',
|
||||
'title': 'Introduction To Bayesian Inference (Part 2)',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 5641,
|
||||
'timestamp': 1251622800,
|
||||
'upload_date': '20090830',
|
||||
},
|
||||
}],
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lecture_slug, explicit_part_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, lecture_slug)
|
||||
|
||||
cfg = self._parse_json(self._search_regex(
|
||||
[r'cfg\s*:\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*:\s*\(?\s*function',
|
||||
r'cfg\s*:\s*({[^}]+})'],
|
||||
webpage, 'cfg'), lecture_slug, js_to_json)
|
||||
|
||||
lecture_id = compat_str(cfg['obj_id'])
|
||||
|
||||
base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
|
||||
|
||||
lecture_data = self._download_json(
|
||||
'%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
|
||||
lecture_id)['lecture'][0]
|
||||
|
||||
lecture_info = {
|
||||
'id': lecture_id,
|
||||
'display_id': lecture_slug,
|
||||
'title': lecture_data['title'],
|
||||
'timestamp': parse_iso8601(lecture_data.get('time')),
|
||||
'description': lecture_data.get('description_wiki'),
|
||||
'thumbnail': lecture_data.get('thumb'),
|
||||
}
|
||||
|
||||
playlist_entries = []
|
||||
lecture_type = lecture_data.get('type')
|
||||
parts = [compat_str(video) for video in cfg.get('videos', [])]
|
||||
if parts:
|
||||
multipart = len(parts) > 1
|
||||
|
||||
def extract_part(part_id):
|
||||
smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
|
||||
smil = self._download_smil(smil_url, lecture_id)
|
||||
info = self._parse_smil(smil, smil_url, lecture_id)
|
||||
info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
|
||||
info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
|
||||
if multipart:
|
||||
info['title'] += ' (Part %s)' % part_id
|
||||
switch = smil.find('.//switch')
|
||||
if switch is not None:
|
||||
info['duration'] = parse_duration(switch.attrib.get('dur'))
|
||||
item_info = lecture_info.copy()
|
||||
item_info.update(info)
|
||||
return item_info
|
||||
|
||||
if explicit_part_id or not multipart:
|
||||
result = extract_part(explicit_part_id or parts[0])
|
||||
else:
|
||||
result = {
|
||||
'_type': 'multi_video',
|
||||
'entries': [extract_part(part) for part in parts],
|
||||
}
|
||||
result.update(lecture_info)
|
||||
|
||||
# Immediately return explicitly requested part or non event item
|
||||
if explicit_part_id or lecture_type != 'evt':
|
||||
return result
|
||||
|
||||
playlist_entries.append(result)
|
||||
|
||||
# It's probably a playlist
|
||||
if not parts or lecture_type == 'evt':
|
||||
playlist_webpage = self._download_webpage(
|
||||
'%s/site/ajax/drilldown/?id=%s' % (base_url, lecture_id), lecture_id)
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, video_url), 'Viidea')
|
||||
for _, video_url in re.findall(
|
||||
r'<a[^>]+href=(["\'])(.+?)\1[^>]+id=["\']lec=\d+', playlist_webpage)]
|
||||
playlist_entries.extend(entries)
|
||||
|
||||
playlist = self.playlist_result(playlist_entries, lecture_id)
|
||||
playlist.update(lecture_info)
|
||||
return playlist
|
@@ -8,11 +8,11 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
encode_dict,
|
||||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
@@ -40,17 +40,17 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
self.report_login()
|
||||
webpage = self._download_webpage(self._LOGIN_URL, None, False)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
data = urlencode_postdata(encode_dict({
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
}))
|
||||
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'vuid=%s' % vuid)
|
||||
login_request.add_header('Referer', self._LOGIN_URL)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _extract_xsrft_and_vuid(self, webpage):
|
||||
@@ -62,6 +62,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
webpage, 'vuid', group='vuid')
|
||||
return xsrft, vuid
|
||||
|
||||
def _set_vimeo_cookie(self, name, value):
|
||||
self._set_cookie('vimeo.com', name, value)
|
||||
|
||||
|
||||
class VimeoIE(VimeoBaseInfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
@@ -208,17 +211,17 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
data = urlencode_postdata({
|
||||
data = urlencode_postdata(encode_dict({
|
||||
'password': password,
|
||||
'token': token,
|
||||
})
|
||||
}))
|
||||
if url.startswith('http://'):
|
||||
# vimeo only supports https now, but the user can give an http url
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = compat_urllib_request.Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)
|
||||
password_request.add_header('Referer', url)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
@@ -227,7 +230,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option')
|
||||
data = compat_urllib_parse.urlencode({'password': password})
|
||||
data = urlencode_postdata(encode_dict({'password': password}))
|
||||
pass_url = url + '/check-password'
|
||||
password_request = compat_urllib_request.Request(pass_url, data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
@@ -384,47 +387,29 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
like_count = None
|
||||
comment_count = None
|
||||
|
||||
# Vimeo specific: extract request signature and timestamp
|
||||
sig = config['request']['signature']
|
||||
timestamp = config['request']['timestamp']
|
||||
|
||||
# Vimeo specific: extract video codec and quality information
|
||||
# First consider quality, then codecs, then take everything
|
||||
codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
|
||||
files = {'hd': [], 'sd': [], 'other': []}
|
||||
config_files = config["video"].get("files") or config["request"].get("files")
|
||||
for codec_name, codec_extension in codecs:
|
||||
for quality in config_files.get(codec_name, []):
|
||||
format_id = '-'.join((codec_name, quality)).lower()
|
||||
key = quality if quality in files else 'other'
|
||||
video_url = None
|
||||
if isinstance(config_files[codec_name], dict):
|
||||
file_info = config_files[codec_name][quality]
|
||||
video_url = file_info.get('url')
|
||||
else:
|
||||
file_info = {}
|
||||
if video_url is None:
|
||||
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
|
||||
% (video_id, sig, timestamp, quality, codec_name.upper())
|
||||
|
||||
files[key].append({
|
||||
'ext': codec_extension,
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(file_info.get('width')),
|
||||
'height': int_or_none(file_info.get('height')),
|
||||
'tbr': int_or_none(file_info.get('bitrate')),
|
||||
})
|
||||
formats = []
|
||||
m3u8_url = config_files.get('hls', {}).get('all')
|
||||
config_files = config['video'].get('files') or config['request'].get('files', {})
|
||||
for f in config_files.get('progressive', []):
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'http-%s' % f.get('quality'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'fps': int_or_none(f.get('fps')),
|
||||
'tbr': int_or_none(f.get('bitrate')),
|
||||
})
|
||||
m3u8_url = config_files.get('hls', {}).get('url')
|
||||
if m3u8_url:
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
for key in ('other', 'sd', 'hd'):
|
||||
formats += files[key]
|
||||
self._sort_formats(formats)
|
||||
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
|
||||
# at the same time without actual units specified. This lead to wrong sorting.
|
||||
self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id'))
|
||||
|
||||
subtitles = {}
|
||||
text_tracks = config['request'].get('text_tracks')
|
||||
@@ -488,14 +473,14 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
fields['token'] = token
|
||||
fields['password'] = password
|
||||
post = urlencode_postdata(fields)
|
||||
post = urlencode_postdata(encode_dict(fields))
|
||||
password_path = self._search_regex(
|
||||
r'action="([^"]+)"', login_form, 'password URL')
|
||||
password_url = compat_urlparse.urljoin(page_url, password_path)
|
||||
password_request = compat_urllib_request.Request(password_url, post)
|
||||
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'vuid=%s' % vuid)
|
||||
self._set_cookie('vimeo.com', 'xsrft', token)
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
self._set_vimeo_cookie('xsrft', token)
|
||||
|
||||
return self._download_webpage(
|
||||
password_request, list_id,
|
||||
|
@@ -281,9 +281,13 @@ class VKIE(InfoExtractor):
|
||||
mobj.group(1) + ' ' + mobj.group(2)
|
||||
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'"mv_views_count_number"[^>]*>([\d,.]+) views<',
|
||||
info_page, 'view count', fatal=False))
|
||||
view_count = None
|
||||
views = self._html_search_regex(
|
||||
r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
|
||||
info_page, 'view count', fatal=False)
|
||||
if views:
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'([\d,.]+)', views, 'view count', fatal=False))
|
||||
|
||||
formats = [{
|
||||
'format_id': k,
|
||||
|
@@ -84,6 +84,5 @@ class WSJIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'categories': categories,
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -15,11 +15,11 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class GorillaVidIE(InfoExtractor):
|
||||
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net and filehoot.com'
|
||||
class XFileShareIE(InfoExtractor):
|
||||
IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<host>(?:www\.)?
|
||||
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com))/
|
||||
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto.\me))/
|
||||
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
||||
'''
|
||||
|
||||
@@ -76,6 +76,13 @@ class GorillaVidIE(InfoExtractor):
|
||||
'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vidto.me/ku5glz52nqe1.html',
|
||||
'info_dict': {
|
||||
'id': 'ku5glz52nqe1',
|
||||
'ext': 'mp4',
|
||||
'title': 'test'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -104,13 +111,18 @@ class GorillaVidIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||
|
||||
title = self._search_regex(
|
||||
[r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'>Watch (.+) '],
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
title = (self._search_regex(
|
||||
[r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||
r'<td nowrap>([^<]+)</td>',
|
||||
r'>Watch (.+) ',
|
||||
r'<h2 class="video-page-head">([^<]+)</h2>'],
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)).strip()
|
||||
video_url = self._search_regex(
|
||||
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
|
||||
[r'file\s*:\s*["\'](http[^"\']+)["\'],',
|
||||
r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'],
|
||||
webpage, 'file url')
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False)
|
||||
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
@@ -1,121 +1,171 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
from ..aes import aes_decrypt_text
|
||||
|
||||
|
||||
class YouPornIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
|
||||
'info_dict': {
|
||||
'id': '505835',
|
||||
'display_id': 'sex-ed-is-it-safe-to-masturbate-daily',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20101221',
|
||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||
'uploader': 'Ask Dan And Jennifer',
|
||||
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
|
||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ask Dan And Jennifer',
|
||||
'upload_date': '20101221',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Anonymous User uploader
|
||||
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
|
||||
'info_dict': {
|
||||
'id': '561726',
|
||||
'display_id': 'big-tits-awesome-brunette-on-amazing-webcam-show',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Tits Awesome Brunette On amazing webcam show',
|
||||
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Anonymous User',
|
||||
'upload_date': '20111125',
|
||||
'average_rating': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = mobj.group('proto') + 'www.' + mobj.group('url')
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(request, display_id)
|
||||
|
||||
# Get JSON parameters
|
||||
json_params = self._search_regex(
|
||||
[r'videoJa?son\s*=\s*({.+})',
|
||||
r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'],
|
||||
webpage, 'JSON parameters')
|
||||
try:
|
||||
params = json.loads(json_params)
|
||||
except ValueError:
|
||||
raise ExtractorError('Invalid JSON')
|
||||
title = self._search_regex(
|
||||
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>.+?)\1',
|
||||
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>([^<])<'],
|
||||
webpage, 'title', group='title')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
try:
|
||||
video_title = params['title']
|
||||
upload_date = unified_strdate(params['release_date_f'])
|
||||
video_description = params['description']
|
||||
video_uploader = params['submitted_by']
|
||||
thumbnail = params['thumbnails'][0]['image']
|
||||
except KeyError:
|
||||
raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
|
||||
links = []
|
||||
|
||||
# Get all of the links from the page
|
||||
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
||||
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
|
||||
webpage, 'download list').strip()
|
||||
LINK_RE = r'<a href="([^"]+)">'
|
||||
links = re.findall(LINK_RE, download_list_html)
|
||||
sources = self._search_regex(
|
||||
r'sources\s*:\s*({.+?})', webpage, 'sources', default=None)
|
||||
if sources:
|
||||
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
|
||||
links.append(link)
|
||||
|
||||
# Get all encrypted links
|
||||
encrypted_links = re.findall(r'var encryptedQuality[0-9]{3}URL = \'([a-zA-Z0-9+/]+={0,2})\';', webpage)
|
||||
for encrypted_link in encrypted_links:
|
||||
link = aes_decrypt_text(encrypted_link, video_title, 32).decode('utf-8')
|
||||
# Fallback #1
|
||||
for _, link in re.findall(
|
||||
r'(?:videoUrl|videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #2, this also contains extra low quality 180p format
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #3, encrypted links
|
||||
for _, encrypted_link in re.findall(
|
||||
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
|
||||
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
|
||||
|
||||
formats = []
|
||||
for link in links:
|
||||
# A link looks like this:
|
||||
# http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
|
||||
# A path looks like this:
|
||||
# /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
|
||||
video_url = unescapeHTML(link)
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
format_parts = path.split('/')[4].split('_')[:2]
|
||||
|
||||
dn = compat_urllib_parse_urlparse(video_url).netloc.partition('.')[0]
|
||||
|
||||
resolution = format_parts[0]
|
||||
height = int(resolution[:-len('p')])
|
||||
bitrate = int(format_parts[1][:-len('k')])
|
||||
format = '-'.join(format_parts) + '-' + dn
|
||||
|
||||
formats.append({
|
||||
for video_url in set(unescapeHTML(link) for link in links):
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
'resolution': resolution,
|
||||
})
|
||||
|
||||
}
|
||||
# Video URL's path looks like this:
|
||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||
# We will benefit from it by extracting some metadata
|
||||
mobj = re.search(r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
f.update({
|
||||
'format_id': '%dp-%dk' % (height, bitrate),
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('ERROR: no known formats available for video')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']video-description["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']videoInfoBy["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
average_rating = int_or_none(self._search_regex(
|
||||
r'<div[^>]+class=["\']videoInfoRating["\'][^>]*>\s*<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
|
||||
webpage, 'average rating', fatal=False))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'(?s)<div[^>]+class=["\']videoInfoViews["\'][^>]*>.*?([\d,.]+)\s*</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = str_to_int(self._search_regex(
|
||||
r'>All [Cc]omments? \(([\d,.]+)\)',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
def extract_tag_box(title):
|
||||
tag_box = self._search_regex(
|
||||
(r'<div[^>]+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?</div>\s*'
|
||||
'<div[^>]+class=["\']tagBoxContent["\']>(.+?)</div>') % re.escape(title),
|
||||
webpage, '%s tag box' % title, default=None)
|
||||
if not tag_box:
|
||||
return []
|
||||
return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
|
||||
|
||||
categories = extract_tag_box('Category')
|
||||
tags = extract_tag_box('Tags')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'description': video_description,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'average_rating': average_rating,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'tags': tags,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -703,7 +703,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
id_m = re.match(
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
if not id_m:
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
@@ -1107,6 +1107,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not video_info:
|
||||
video_info = get_video_info
|
||||
if 'token' in get_video_info:
|
||||
# Different get_video_info requests may report different results, e.g.
|
||||
# some may report video unavailability, but some may serve it without
|
||||
# any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
|
||||
# the original webpage as well as el=info and el=embedded get_video_info
|
||||
# requests report video unavailability due to geo restriction while
|
||||
# el=detailpage succeeds and returns valid data). This is probably
|
||||
# due to YouTube measures against IP ranges of hosting providers.
|
||||
# Working around by preferring the first succeeded video_info containing
|
||||
# the token if no such video_info yet was found.
|
||||
if 'token' not in video_info:
|
||||
video_info = get_video_info
|
||||
break
|
||||
if 'token' not in video_info:
|
||||
if 'reason' in video_info:
|
||||
@@ -1332,7 +1343,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
@@ -1644,8 +1655,18 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'playlist_mincount': 91,
|
||||
'info_dict': {
|
||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'title': 'Uploads from lex will',
|
||||
}
|
||||
}, {
|
||||
'note': 'Age restricted channel',
|
||||
# from https://www.youtube.com/user/DeusExOfficial
|
||||
'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
|
||||
'playlist_mincount': 64,
|
||||
'info_dict': {
|
||||
'id': 'UUs0ifCMCm1icqRbqhUINa0w',
|
||||
'title': 'Uploads from Deus Ex',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -1666,7 +1687,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'channelId', channel_page, 'channel id', default=None)
|
||||
if not channel_playlist_id:
|
||||
channel_playlist_id = self._search_regex(
|
||||
r'data-channel-external-id="([^"]+)"',
|
||||
r'data-(?:channel-external-|yt)id="([^"]+)"',
|
||||
channel_page, 'channel id', default=None)
|
||||
if channel_playlist_id and channel_playlist_id.startswith('UC'):
|
||||
playlist_id = 'UU' + channel_playlist_id[2:]
|
||||
|
@@ -232,10 +232,10 @@ class JSInterpreter(object):
|
||||
def extract_function(self, funcname):
|
||||
func_m = re.search(
|
||||
r'''(?x)
|
||||
(?:function\s+%s|[{;]%s\s*=\s*function)\s*
|
||||
(?:function\s+%s|[{;]%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
|
||||
\((?P<args>[^)]*)\)\s*
|
||||
\{(?P<code>[^}]+)\}''' % (
|
||||
re.escape(funcname), re.escape(funcname)),
|
||||
re.escape(funcname), re.escape(funcname), re.escape(funcname)),
|
||||
self.code)
|
||||
if func_m is None:
|
||||
raise ExtractorError('Could not find JS function %r' % funcname)
|
||||
|
@@ -272,7 +272,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
return [], information
|
||||
|
||||
try:
|
||||
self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
|
||||
self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
|
||||
self.run_ffmpeg(path, new_path, acodec, more_opts)
|
||||
except AudioConversionError as e:
|
||||
raise PostProcessingError(
|
||||
|
@@ -36,6 +36,7 @@ import zlib
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
compat_etree_fromstring,
|
||||
compat_html_entities,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
@@ -178,10 +179,19 @@ def xpath_with_ns(path, ns_map):
|
||||
|
||||
|
||||
def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
|
||||
if sys.version_info < (2, 7): # Crazy 2.6
|
||||
xpath = xpath.encode('ascii')
|
||||
def _find_xpath(xpath):
|
||||
if sys.version_info < (2, 7): # Crazy 2.6
|
||||
xpath = xpath.encode('ascii')
|
||||
return node.find(xpath)
|
||||
|
||||
if isinstance(xpath, (str, compat_str)):
|
||||
n = _find_xpath(xpath)
|
||||
else:
|
||||
for xp in xpath:
|
||||
n = _find_xpath(xp)
|
||||
if n is not None:
|
||||
break
|
||||
|
||||
n = node.find(xpath)
|
||||
if n is None:
|
||||
if default is not NO_DEFAULT:
|
||||
return default
|
||||
@@ -356,7 +366,7 @@ def sanitize_path(s):
|
||||
if drive_or_unc:
|
||||
norm_path.pop(0)
|
||||
sanitized_path = [
|
||||
path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
|
||||
path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
|
||||
for path_part in norm_path]
|
||||
if drive_or_unc:
|
||||
sanitized_path.insert(0, drive_or_unc + os.path.sep)
|
||||
@@ -814,9 +824,11 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
date_str = re.sub(r'\.[0-9]+', '', date_str)
|
||||
|
||||
if timezone is None:
|
||||
m = re.search(
|
||||
r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||
r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||
date_str)
|
||||
if not m:
|
||||
timezone = datetime.timedelta()
|
||||
@@ -829,9 +841,12 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
|
||||
timezone = datetime.timedelta(
|
||||
hours=sign * int(m.group('hours')),
|
||||
minutes=sign * int(m.group('minutes')))
|
||||
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
|
||||
dt = datetime.datetime.strptime(date_str, date_format) - timezone
|
||||
return calendar.timegm(dt.timetuple())
|
||||
try:
|
||||
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
|
||||
dt = datetime.datetime.strptime(date_str, date_format) - timezone
|
||||
return calendar.timegm(dt.timetuple())
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def unified_strdate(date_str, day_first=True):
|
||||
@@ -896,7 +911,8 @@ def unified_strdate(date_str, day_first=True):
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
return upload_date
|
||||
if upload_date is not None:
|
||||
return compat_str(upload_date)
|
||||
|
||||
|
||||
def determine_ext(url, default_ext='unknown_video'):
|
||||
@@ -1651,29 +1667,6 @@ def encode_dict(d, encoding='utf-8'):
|
||||
return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
|
||||
|
||||
|
||||
try:
|
||||
etree_iter = xml.etree.ElementTree.Element.iter
|
||||
except AttributeError: # Python <=2.6
|
||||
etree_iter = lambda n: n.findall('.//*')
|
||||
|
||||
|
||||
def parse_xml(s):
|
||||
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
pass # Ignore doctypes
|
||||
|
||||
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
|
||||
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
|
||||
tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
||||
# Fix up XML parser in Python 2.x
|
||||
if sys.version_info < (3, 0):
|
||||
for n in etree_iter(tree):
|
||||
if n.text is not None:
|
||||
if not isinstance(n.text, compat_str):
|
||||
n.text = n.text.decode('utf-8')
|
||||
return tree
|
||||
|
||||
|
||||
US_RATINGS = {
|
||||
'G': 0,
|
||||
'PG': 10,
|
||||
@@ -1974,7 +1967,7 @@ def dfxp2srt(dfxp_data):
|
||||
|
||||
return out
|
||||
|
||||
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
|
||||
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
||||
out = []
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
|
||||
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.10.23'
|
||||
__version__ = '2015.11.13'
|
||||
|
Reference in New Issue
Block a user