Compare commits

...

57 Commits

Author SHA1 Message Date
Sergey M․
f34b841b51 release 2017.11.06 2017-11-06 22:39:24 +07:00
Sergey M․
e0998333fa [ChangeLog] Actualize 2017-11-06 22:36:46 +07:00
Sergey M․
909191de91 [hotstar:playlist] Fix issues and improve (closes #12465) 2017-11-05 19:15:40 +07:00
Alpesh Valia
477c97f86b [hotstar:playlist] Add extractor 2017-11-05 19:15:34 +07:00
Sergey M․
6e71bbf4ab [hotstar] Bypass geo restriction (closes #14672) 2017-11-05 16:12:56 +07:00
Sergey M․
181e381fda [test_InfoExtractor] Add test for #14660 2017-11-04 22:15:58 +07:00
Sergey M․
187ee66c94 [extractor/common] Add protocol for f4m formats 2017-11-04 22:11:39 +07:00
Sergey M․
48107c198b [f4m] Prefer baseURL for relative URLs (closes #14660) 2017-11-04 22:10:55 +07:00
Jimbolino
cd670befc4 [22tracks] Remove extractor (closes #11024) 2017-11-02 23:48:43 +07:00
Remita Amine
44cca168cc [skysport] add support ooyala embed_token protected videos(fixes #14641) 2017-11-02 14:16:15 +01:00
Remita Amine
b0f4331002 [gamespot] extract formats referenced with new data fields(#14652) 2017-11-02 13:30:50 +01:00
Sergey M․
044eeb1455 [extractor/common] Respect URL query in _extract_wowza_formats (closes #14645) 2017-11-01 23:39:26 +07:00
Sergey M․
8fe767e072 [spankbang] Detect unavailable videos (closes #14644) 2017-10-31 23:05:25 +07:00
Sergey M․
6d0630d880 release 2017.10.29 2017-10-29 07:22:53 +07:00
Sergey M․
518d357b46 [ChangeLog] Actualize 2017-10-29 07:21:33 +07:00
Sergey M․
514e8aefd4 [egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00
Sergey M․
9211e3319e [extractor/common] Prefix format id for audio only HLS formats 2017-10-29 07:05:55 +07:00
Sergey M․
056653bbb1 [utils] Add support for zero years and months in parse_duration 2017-10-29 07:04:48 +07:00
enigmaquip
c3206d02e9 [fxnetworks] Extract series metadata 2017-10-29 05:20:18 +07:00
Sergey M․
eb4b5818e2 [younow] Fix issues and improve extraction (closes #9255, closes #9432, closes #12436) 2017-10-29 04:18:43 +07:00
Andrew Udvare
47a8587915 [younow] Add extractor 2017-10-29 04:17:03 +07:00
Sergey M․
8e01f3ca81 [dctptv] Fix extraction (closes #14599) 2017-10-28 22:58:01 +07:00
Sergey M․
f2332f18e6 [youtube] Restrict embed regex (#14600) 2017-10-27 22:26:43 +07:00
Sergey M․
7c1f419341 [vimeo] Restrict iframe embed regex (closes #14600) 2017-10-27 22:21:47 +07:00
Sergey M․
30e6161799 [soundgasm] Improve extraction (closes #14588) 2017-10-26 23:16:16 +07:00
Alex Seiler
dc24a7d4a2 [myvideo] Remove extractor (closes #8557)
Redirects to store.maxdome.de
2017-10-25 23:27:55 +07:00
Logan B
d673ab6562 [nbc] Add support for classic-tv videos 2017-10-25 23:23:27 +07:00
Sergey M․
b8c6ffc518 [vrtnu] Add support for cookies authentication and simplify (#11873) 2017-10-25 23:21:51 +07:00
mrBliss
7913e0fca7 [canvas] Add support for vrt.be/vrtnu (closes #11873) 2017-10-25 23:17:28 +07:00
J.D. Purcell
cdd1ce92c4 [twitch:clips] Fix title extraction 2017-10-23 23:12:50 +07:00
rawcoder
55c727a547 [ndtv] Add support for sub-sites 2017-10-22 08:32:20 +07:00
Sergey M․
36e2d3ca43 [dramafever] Fix login error message extraction 2017-10-22 08:16:30 +07:00
Sergey M․
f7a5038305 [travis] Disable IRC notifications 2017-10-22 02:46:28 +07:00
Sergey M․
9ff6273cae [nickru] Add support for more sites 2017-10-22 01:51:01 +07:00
Sergey M․
f03ee0b372 [nickde] Add support for nickelodeon.be 2017-10-22 01:42:44 +07:00
Sergey M․
cf6bda312b [nickde] Add support for nick.ch 2017-10-22 01:30:35 +07:00
Alex Seiler
3ebbd9991e [nick] Add support for more nickelodeon sites (closes #14553) 2017-10-22 01:26:58 +07:00
Sergey M․
21ce434051 [travis] Enable IRC notifications
Let's see how is it verbose now
2017-10-21 02:14:25 +07:00
Sergey M․
5c0e5bc4df [README.md] Add build status bagde 2017-10-21 02:11:11 +07:00
Sergey M․
9a9de2d7b2 [travis] Allow download tests to fail and fast finish 2017-10-21 01:58:45 +07:00
Alex Seiler
424505df76 [azmedien] Fix test 2017-10-21 01:10:56 +07:00
Sergey M․
fa3f0fd856 release 2017.10.20 2017-10-20 23:40:25 +07:00
Sergey M․
c9dcd4b0c5 [ChangeLog] Actualize 2017-10-20 23:37:55 +07:00
Alex Seiler
fc5c47d13c [parliamentliveuk] Fix extraction (closes #14524) 2017-10-20 23:31:13 +07:00
Sergey M․
a26a3c6d34 [soundcloud] Update client id (closes #14546) 2017-10-20 21:43:34 +07:00
Felix Yan
382fa456ea [ChangeLog] Fix typo 2017-10-19 23:36:32 +07:00
Alex Seiler
e1d168e592 [servus] Add extractor (closes #14362) 2017-10-19 22:17:20 +07:00
Parmjit Virk
ca1c9f26fa [unity] Add extractor (fixes #14528) 2017-10-19 04:46:06 +07:00
Sergey M․
6f3b4a98c9 [downloader/fragment] Report warning instead of error on inconsistent download state 2017-10-17 22:53:34 +07:00
Remita Amine
fa4bc6e712 [youtube] replace youtube redirect urls in description(fixes #14517) 2017-10-17 10:07:37 +00:00
Remita Amine
6b9cbd023f [pbs] restrict direct video url regex(fixes #14519) 2017-10-17 09:23:11 +00:00
Yen Chi Hsuan
c233003afe [megaphone] Fix deprecated escape sequence 2017-10-17 15:39:06 +08:00
Sergey M․
83fcf19e2d [drtv] Respect preference for direct http formats (#14509) 2017-10-16 05:48:45 +07:00
Sergey M․
acc4ea6237 [eporner] Add support for embed URLs (closes #14507) 2017-10-16 05:11:25 +07:00
Sergey M․
8cc1840ccb [arte] Capture and output error message 2017-10-15 22:12:34 +07:00
Sergey M․
a9ee4f6e49 [downloader/hls] Fix total fragments count when ad fragments exist 2017-10-15 11:03:54 +07:00
Pawit Pornkitprasan
aaab8c5e71 [niconico] Improve uploader metadata extraction robustness (closes #14135) 2017-10-15 10:40:57 +07:00
47 changed files with 1072 additions and 445 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.15.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.15.1**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.11.06*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.11.06**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.10.15.1
[debug] youtube-dl version 2017.11.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -11,12 +11,12 @@ sudo: false
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
matrix:
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
script: ./devscripts/run_tests.sh
notifications:
email:
- filippo.valsorda@gmail.com
- yasoob.khld@gmail.com
# irc:
# channels:
# - "irc.freenode.org#youtube-dl"
# skip_join: true

View File

@@ -1,3 +1,64 @@
version 2017.11.06
Core
+ [extractor/common] Add protocol for f4m formats
* [f4m] Prefer baseURL for relative URLs (#14660)
* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
Extractors
+ [hotstar:playlist] Add support for playlists (#12465)
* [hotstar] Bypass geo restriction (#14672)
- [22tracks] Remove extractor (#11024, #14628)
+ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
* [gamespot] Extract formats referenced with new data fields (#14652)
* [spankbang] Detect unavailable videos (#14644)
version 2017.10.29
Core
* [extractor/common] Prefix format id for audio only HLS formats
+ [utils] Add support for zero years and months in parse_duration
Extractors
* [egghead] Fix extraction (#14388)
+ [fxnetworks] Extract series metadata (#14603)
+ [younow] Add support for younow.com (#9255, #9432, #12436)
* [dctptv] Fix extraction (#14599)
* [youtube] Restrict embed regex (#14600)
* [vimeo] Restrict iframe embed regex (#14600)
* [soundgasm] Improve extraction (#14588)
- [myvideo] Remove extractor (#8557)
+ [nbc] Add support for classic-tv videos (#14575)
+ [vrtnu] Add support for cookies authentication and simplify (#11873)
+ [canvas] Add support for vrt.be/vrtnu (#11873)
* [twitch:clips] Fix title extraction (#14566)
+ [ndtv] Add support for sub-sites (#14534)
* [dramafever] Fix login error message extraction
+ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
ro, hu) (#14553)
version 2017.10.20
Core
* [downloader/fragment] Report warning instead of error on inconsistent
download state
* [downloader/hls] Fix total fragments count when ad fragments exist
Extractors
* [parliamentliveuk] Fix extraction (#14524)
* [soundcloud] Update client id (#14546)
+ [servus] Add support for servus.com (#14362)
+ [unity] Add support for unity3d.com (#14528)
* [youtube] Replace youtube redirect URLs in description (#14517)
* [pbs] Restrict direct video URL regular expression (#14519)
* [drtv] Respect preference for direct HTTP formats (#14509)
+ [eporner] Add support for embed URLs (#14507)
* [arte] Capture and output error message
* [niconico] Improve uploader metadata extraction robustness (#14135)
version 2017.10.15.1
Core
@@ -834,7 +895,7 @@ version 2017.04.14
Core
+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
+ [adobepass] Improve Comcast and Verison login code (#10803)
+ [adobepass] Improve Comcast and Verizon login code (#10803)
+ [adobepass] Add support for Verizon (#10803)
Extractors

View File

@@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
youtube-dl - download videos from youtube.com or other video platforms
- [INSTALLATION](#installation)

View File

@@ -3,8 +3,6 @@
- **1up.com**
- **20min**
- **220.ro**
- **22tracks:genre**
- **22tracks:track**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
@@ -342,6 +340,7 @@
- **HornBunny**
- **HotNewHipHop**
- **HotStar**
- **hotstar:playlist**
- **Howcast**
- **HowStuffWorks**
- **HRTi**
@@ -498,7 +497,6 @@
- **MySpace:album**
- **MySpass**
- **Myvi**
- **myvideo** (Currently broken)
- **MyVidster**
- **n-tv.de**
- **natgeo**
@@ -728,6 +726,7 @@
- **SenateISVP**
- **SendtoNews**
- **ServingSys**
- **Servus**
- **Sexu**
- **Shahid**
- **Shared**: shared.sx
@@ -887,6 +886,7 @@
- **UDNEmbed**: 聯合影音
- **UKTVPlay**
- **Unistra**
- **Unity**
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
@@ -975,6 +975,7 @@
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
@@ -1033,6 +1034,9 @@
- **YouJizz**
- **youku**: 优酷
- **youku:show**
- **YouNowChannel**
- **YouNowLive**
- **YouNowMoment**
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com

View File

@@ -574,6 +574,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/14660
'custom_base_url',
'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
[{
'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
'ext': 'flv',
'format_id': '2148',
'protocol': 'f4m',
'tbr': 2148,
'width': 1280,
'height': 720,
}]
),
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
f4m_url, None)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
if __name__ == '__main__':
unittest.main()

View File

@@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
def test_fix_xml_ampersands(self):
self.assertEqual(

10
test/testdata/f4m/custom_base_url.f4m vendored Normal file
View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest xmlns="http://ns.adobe.com/f4m/1.0">
<streamType>recorded</streamType>
<baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
<duration>269.293</duration>
<bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
<media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
<metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
</media>
</manifest>

View File

@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
media))
def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
def _add_ns(prop, ver=1):
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
def get_base_url(manifest):
base_url = xpath_text(
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
return base_url
class F4mFD(FragmentFD):
@@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
# From Adobe F4M 3.0 spec:
# The <baseURL> element SHALL be the base URL for all relative
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
# URLs should be relative to the location of the containing document.
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:

View File

@@ -158,7 +158,7 @@ class FragmentFD(FileDownloader):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx)
if ctx['fragment_index'] > 0 and resume_len == 0:
self.report_error(
self.report_warning(
'Inconsistent state of incomplete fragment download. '
'Restarting from the beginning...')
ctx['fragment_index'] = resume_len = 0

View File

@@ -88,6 +88,7 @@ class HlsFD(FragmentFD):
if line.startswith('#'):
if anvato_ad(line):
ad_frags += 1
ad_frag_next = True
continue
if ad_frag_next:
ad_frag_next = False

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
)
from ..utils import (
@@ -15,6 +16,7 @@ from ..utils import (
int_or_none,
NO_DEFAULT,
qualities,
try_get,
unified_strdate,
)
@@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor):
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
vsr = player_info['VSR']
vsr = try_get(player_info, lambda x: x['VSR'], dict)
if not vsr:
raise ExtractorError(
'Video %s is not available' % player_info.get('VID') or video_id,
expected=True)
error = None
if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
error = try_get(
player_info, lambda x: x['custom_msg']['msg'], compat_str)
if not error:
error = 'Video %s is not available' % player_info.get('VID') or video_id
raise ExtractorError(error, expected=True)
upload_date_str = player_info.get('shootingDate')
if not upload_date_str:

View File

@@ -47,7 +47,7 @@ class AZMedienIE(AZMedienBaseIE):
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'info_dict': {
'id': '1_2444peh4',
'ext': 'mov',
'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
'uploader_id': 'TeleZ?ri',

View File

@@ -1,16 +1,22 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError
from ..utils import (
float_or_none,
ExtractorError,
strip_or_none,
float_or_none,
int_or_none,
parse_iso8601,
)
class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
@@ -166,3 +172,139 @@ class CanvasEenIE(InfoExtractor):
'title': title,
'description': self._og_search_description(webpage),
}
class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
'info_dict': {
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
'ext': 'flv',
'title': 'De zwarte weduwe',
'description': 'md5:d90c21dced7db869a85db89a623998d4',
'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$',
'season': '1',
'season_number': 1,
'episode_number': 1,
},
'skip': 'This video is only available for registered users'
}]
_NETRC_MACHINE = 'vrtnu'
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
_CONTEXT_ID = 'R3595707040'
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
auth_data = {
'APIKey': self._APIKEY,
'targetEnv': 'jssdk',
'loginID': username,
'password': password,
'authMode': 'cookie',
}
auth_info = self._gigya_login(auth_data)
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt <= 3:
try:
# When requesting a token, no actual token is returned, but the
# necessary cookies are set.
self._request_webpage(
'https://token.vrt.be',
None, note='Requesting a token', errnote='Could not get a token',
headers={
'Content-Type': 'application/json',
'Referer': 'https://www.vrt.be/vrtnu/',
},
data=json.dumps({
'uid': auth_info['UID'],
'uidsig': auth_info['UIDSignature'],
'ts': auth_info['signatureTimestamp'],
'email': auth_info['profile']['email'],
}).encode('utf-8'))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
login_attempt += 1
self.report_warning('Authentication failed')
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
else:
raise e
else:
break
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'title').strip()
description = self._html_search_regex(
r'(?ms)<div class="content__description">(.+?)</div>',
webpage, 'description', default=None)
season = self._html_search_regex(
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
<span>seizoen\ (.+?)</span>\s*
</div>''',
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
webpage, 'season', default=None)
season_number = int_or_none(season)
episode_number = int_or_none(self._html_search_regex(
r'''(?xms)<div\ class="content__episode">\s*
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
</div>''',
webpage, 'episode_number', default=None))
release_date = parse_iso8601(self._html_search_regex(
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
webpage, 'release_date', default=None))
# If there's a ? or a # in the URL, remove them and everything after
clean_url = url.split('?')[0].split('#')[0].strip('/')
securevideo_url = clean_url + '.mssecurevideo.json'
try:
video = self._download_json(securevideo_url, display_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
self.raise_login_required()
raise
# We are dealing with a '../<show>.relevant' URL
redirect_url = video.get('url')
if redirect_url:
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
# There is only one entry, but with an unknown key, so just get
# the first one
video_id = list(video.values())[0].get('videoid')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'season': season,
'season_number': season_number,
'episode_number': episode_number,
'release_date': release_date,
}

View File

@@ -29,7 +29,10 @@ from ..compat import (
compat_urlparse,
compat_xml_parse_error,
)
from ..downloader.f4m import remove_encrypted_media
from ..downloader.f4m import (
get_base_url,
remove_encrypted_media,
)
from ..utils import (
NO_DEFAULT,
age_restricted,
@@ -1239,11 +1242,8 @@ class InfoExtractor(object):
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
manifest_base_url = get_base_url(manifest)
bootstrap_info = xpath_element(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@@ -1275,7 +1275,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@@ -1310,6 +1310,7 @@ class InfoExtractor(object):
'url': manifest_url,
'manifest_url': manifest_url,
'ext': 'flv' if bootstrap_info is not None else None,
'protocol': 'f4m',
'tbr': tbr,
'width': width,
'height': height,
@@ -1401,7 +1402,7 @@ class InfoExtractor(object):
media_url = media.get('URI')
if media_url:
format_id = []
for v in (group_id, name):
for v in (m3u8_id, group_id, name):
if v:
format_id.append(v)
f = {
@@ -2233,27 +2234,35 @@ class InfoExtractor(object):
return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
url_base = self._search_regex(
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
http_base_url = '%s:%s' % ('http', url_base)
formats = []
def manifest_url(manifest):
m_url = '%s/%s' % (http_base_url, manifest)
if query:
m_url += '?%s' % query
return m_url
if 'm3u8' not in skip_protocols:
formats.extend(self._extract_m3u8_formats(
http_base_url + '/playlist.m3u8', video_id, 'mp4',
manifest_url('playlist.m3u8'), video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
if 'f4m' not in skip_protocols:
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
manifest_url('manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'dash' not in skip_protocols:
formats.extend(self._extract_mpd_formats(
http_base_url + '/manifest.mpd',
manifest_url('manifest.mpd'),
video_id, mpd_id='dash', fatal=False))
if re.search(r'(?:/smil:|\.smil)', url_base):
if 'smil' not in skip_protocols:
rtmp_formats = self._extract_smil_formats(
http_base_url + '/jwplayer.smil',
manifest_url('jwplayer.smil'),
video_id, fatal=False)
for rtmp_format in rtmp_formats:
rtsp_format = rtmp_format.copy()

View File

@@ -2,53 +2,85 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import unified_strdate
from ..compat import compat_str
from ..utils import (
float_or_none,
unified_strdate,
)
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'mp4',
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 71.24,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
display_id = self._match_id(url)
object_id = self._html_search_meta('DC.identifier', webpage)
webpage = self._download_webpage(url, display_id)
servers_json = self._download_json(
'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
video_id, note='Downloading server list')
server = servers_json[0]['server']
m3u8_path = self._search_regex(
r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
formats = self._extract_m3u8_formats(
'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
entry_protocol='m3u8_native')
video_id = self._html_search_meta(
'DC.identifier', webpage, 'video id',
default=None) or self._search_regex(
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
title = self._og_search_title(webpage)
servers = self._download_json(
'http://www.dctp.tv/streaming_servers/', display_id,
note='Downloading server list', fatal=False)
if servers:
endpoint = next(
server['endpoint']
for server in servers
if isinstance(server.get('endpoint'), compat_str) and
'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
app = self._search_regex(
r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
formats = [{
'url': endpoint,
'app': app,
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
'page_url': url,
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
'ext': 'flv',
}]
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
duration = float_or_none(self._search_regex(
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
default=None), scale=1000)
return {
'id': object_id,
'id': video_id,
'title': title,
'formats': formats,
'display_id': video_id,
'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -59,7 +59,7 @@ class DramaFeverBaseIE(AMPIE):
if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
error = self._html_search_regex(
r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)

View File

@@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor):
'tbr': int_or_none(bitrate),
'ext': link.get('FileFormat'),
'vcodec': 'none' if kind == 'AudioResource' else None,
'preference': preference,
})
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):

View File

@@ -2,7 +2,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
try_get,
unified_timestamp,
@@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor):
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
'id': 'professor-frisby-introduces-composable-functional-javascript',
'id': '72',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
@@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
lessons = self._download_json(
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
playlist_id, 'Downloading course lessons JSON')
entries = [
self.url_result(
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
for lesson in course['lessons'] if lesson.get('wistia_id')]
entries = []
for lesson in lessons:
lesson_url = lesson.get('http_url')
if not lesson_url or not isinstance(lesson_url, compat_str):
continue
lesson_id = lesson.get('id')
if lesson_id:
lesson_id = compat_str(lesson_id)
entries.append(self.url_result(
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id,
playlist_id, 'Downloading course JSON', fatal=False) or {}
playlist_id = course.get('id')
if playlist_id:
playlist_id = compat_str(playlist_id)
return self.playlist_result(
entries, playlist_id, course.get('title'),
@@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor):
class EggheadLessonIE(InfoExtractor):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
_TEST = {
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
'id': 'fv5yotjxcg',
'id': '1196',
'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
'ext': 'mp4',
'title': 'Create linear data flow with container style types (Box)',
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
@@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor):
},
'params': {
'skip_download': True,
'format': 'bestvideo',
},
}
}, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True,
}]
def _real_extract(self, url):
lesson_id = self._match_id(url)
display_id = self._match_id(url)
lesson = self._download_json(
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
lesson_id = compat_str(lesson['id'])
title = lesson['title']
formats = []
for _, format_url in lesson['media_urls'].items():
if not format_url or not isinstance(format_url, compat_str):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, lesson_id, 'mp4', entry_protocol='m3u8',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, lesson_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
})
self._sort_formats(formats)
return {
'_type': 'url_transparent',
'ie_key': 'Wistia',
'url': 'wistia:%s' % lesson['wistia_id'],
'id': lesson['wistia_id'],
'title': lesson.get('title'),
'id': lesson_id,
'display_id': display_id,
'title': title,
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
'series': try_get(
lesson, lambda x: x['series']['title'], compat_str),
'formats': formats,
}

View File

@@ -15,7 +15,7 @@ from ..utils import (
class EpornerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '39d486f046212d8e1b911c52ab4691f8',
@@ -35,6 +35,9 @@ class EpornerIE(InfoExtractor):
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -150,6 +150,7 @@ from .canalc2 import Canalc2IE
from .canvas import (
CanvasIE,
CanvasEenIE,
VrtNUIE,
)
from .carambatv import (
CarambaTVIE,
@@ -431,7 +432,10 @@ from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
from .hornbunny import HornBunnyIE
from .hotnewhiphop import HotNewHipHopIE
from .hotstar import HotStarIE
from .hotstar import (
HotStarIE,
HotStarPlaylistIE,
)
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
from .hrti import (
@@ -623,7 +627,6 @@ from .mwave import MwaveIE, MwaveMeetGreetIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvi import MyviIE
from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .nationalgeographic import (
NationalGeographicVideoIE,
@@ -925,6 +928,7 @@ from .seeker import SeekerIE
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE
from .servus import ServusIE
from .sexu import SexuIE
from .shahid import ShahidIE
from .shared import (
@@ -1109,10 +1113,6 @@ from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentytwotracks import (
TwentyTwoTracksIE,
TwentyTwoTracksGenreIE
)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@@ -1138,6 +1138,7 @@ from .udn import UDNEmbedIE
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .unity import UnityIE
from .uol import UOLIE
from .uplynk import (
UplynkIE,
@@ -1333,6 +1334,11 @@ from .youku import (
YoukuIE,
YoukuShowIE,
)
from .younow import (
YouNowLiveIE,
YouNowChannelIE,
YouNowMomentIE,
)
from .youporn import YouPornIE
from .yourupload import YourUploadIE
from .youtube import (

View File

@@ -3,27 +3,31 @@ from __future__ import unicode_literals
from .adobepass import AdobePassIE
from ..utils import (
update_url_query,
extract_attributes,
int_or_none,
parse_age_limit,
smuggle_url,
update_url_query,
)
class FXNetworksIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.fxnetworks.com/video/719841347694',
'md5': '1447d4722e42ebca19e5232ab93abb22',
'url': 'http://www.fxnetworks.com/video/1032565827847',
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
'info_dict': {
'id': '719841347694',
'id': 'dRzwHC_MMqIv',
'ext': 'mp4',
'title': 'Vanpage',
'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
'title': 'First Look: Better Things - Season 2',
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
'age_limit': 14,
'uploader': 'NEWA-FNG-FX',
'upload_date': '20160706',
'timestamp': 1467844741,
'upload_date': '20170825',
'timestamp': 1503686274,
'episode_number': 0,
'season_number': 2,
'series': 'Better Things',
},
'add_ie': ['ThePlatform'],
}, {
@@ -64,6 +68,9 @@ class FXNetworksIE(AdobePassIE):
'id': video_id,
'title': title,
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
'series': video_data.get('data-show-title'),
'episode_number': int_or_none(video_data.get('data-episode')),
'season_number': int_or_none(video_data.get('data-season')),
'thumbnail': video_data.get('data-large-thumb'),
'age_limit': parse_age_limit(rating),
'ie_key': 'ThePlatform',

View File

@@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(OnceIE):
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -35,6 +35,9 @@ class GameSpotIE(OnceIE):
'params': {
'skip_download': True, # m3u8 downloads
},
}, {
'url': 'https://www.gamespot.com/videos/embed/6439218/',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -52,7 +55,7 @@ class GameSpotIE(OnceIE):
manifest_url = f4m_url
formats.extend(self._extract_f4m_formats(
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
m3u8_url = streams.get('m3u8_stream')
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
if m3u8_url:
manifest_url = m3u8_url
m3u8_formats = self._extract_m3u8_formats(
@@ -60,7 +63,7 @@ class GameSpotIE(OnceIE):
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
progressive_url = dict_get(
streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
if progressive_url and manifest_url:
qualities_basename = self._search_regex(
r'/([^/]+)\.csmil/',

View File

@@ -0,0 +1,22 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
urlencode_postdata,
)
class GigyaBaseIE(InfoExtractor):
def _gigya_login(self, auth_data):
auth_info = self._download_json(
'https://accounts.eu1.gigya.com/accounts.login', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(auth_data))
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
if error_message:
raise ExtractorError(
'Unable to login: %s' % error_message, expected=True)
return auth_info

View File

@@ -1,22 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
ExtractorError,
int_or_none,
)
class HotStarIE(InfoExtractor):
class HotStarBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['IN']
def _download_json(self, *args, **kwargs):
response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
if response['resultCode'] != 'OK':
if kwargs.get('fatal'):
raise ExtractorError(
response['errorDescription'], expected=True)
return None
return response['resultObj']
def _download_content_info(self, content_id):
return self._download_json(
'https://account.hotstar.com/AVS/besc', content_id, query={
'action': 'GetAggregatedContentDetails',
'appVersion': '5.0.40',
'channel': 'PCTV',
'contentId': content_id,
})['contentInfo'][0]
class HotStarIE(HotStarBaseIE):
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
_TESTS = [{
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
'info_dict': {
'id': '1000076273',
'ext': 'mp4',
'title': 'On Air With AIB - English',
'title': 'On Air With AIB',
'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
'timestamp': 1447227000,
'upload_date': '20151111',
@@ -34,23 +59,11 @@ class HotStarIE(InfoExtractor):
'only_matching': True,
}]
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
json_data = super(HotStarIE, self)._download_json(
url_or_request, video_id, note, fatal=fatal, query=query)
if json_data['resultCode'] != 'OK':
if fatal:
raise ExtractorError(json_data['errorDescription'])
return None
return json_data['resultObj']
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://account.hotstar.com/AVS/besc', video_id, query={
'action': 'GetAggregatedContentDetails',
'channel': 'PCTV',
'contentId': video_id,
})['contentInfo'][0]
video_data = self._download_content_info(video_id)
title = video_data['episodeTitle']
if video_data.get('encrypted') == 'Y':
@@ -99,3 +112,51 @@ class HotStarIE(InfoExtractor):
'episode_number': int_or_none(video_data.get('episodeNumber')),
'series': video_data.get('contentTitle'),
}
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
'info_dict': {
'id': '14812',
},
'playlist_mincount': 75,
}, {
'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
'only_matching': True,
}]
_ITEM_TYPES = {
'episodes': 'EPISODE',
'popular-clips': 'CLIPS',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
base_url = mobj.group('url')
content_id = mobj.group('content_id')
playlist_type = mobj.group('type')
content_info = self._download_content_info(content_id)
playlist_id = compat_str(content_info['categoryId'])
collection = self._download_json(
'https://search.hotstar.com/AVS/besc', playlist_id, query={
'action': 'SearchContents',
'appVersion': '5.0.40',
'channel': 'PCTV',
'moreFilters': 'series:%s;' % playlist_id,
'query': '*',
'searchOrder': 'last_broadcast_date desc,year desc,title asc',
'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
})
entries = [
self.url_result(
'%s/_/%s' % (base_url, video['contentId']),
ie=HotStarIE.ie_key(), video_id=video['contentId'])
for video in collection['response']['docs']
if video.get('contentId')]
return self.playlist_result(entries, playlist_id)

View File

@@ -2,19 +2,18 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .gigya import GigyaBaseIE
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
try_get,
unified_timestamp,
urlencode_postdata,
)
class MedialaanIE(InfoExtractor):
class MedialaanIE(GigyaBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:www\.|nieuws\.)?
@@ -119,15 +118,7 @@ class MedialaanIE(InfoExtractor):
'password': password,
}
auth_info = self._download_json(
'https://accounts.eu1.gigya.com/accounts.login', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(auth_data))
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
if error_message:
raise ExtractorError(
'Unable to login: %s' % error_message, expected=True)
auth_info = self._gigya_login(auth_data)
self._uid = auth_info['UID']
self._uid_signature = auth_info['UIDSignature']

View File

@@ -18,7 +18,7 @@ class MegaphoneIE(InfoExtractor):
'id': 'GLT9749789991',
'ext': 'mp3',
'title': '#97 What Kind Of Idiot Gets Phished?',
'thumbnail': 're:^https://.*\.png.*$',
'thumbnail': r're:^https://.*\.png.*$',
'duration': 1776.26375,
'author': 'Reply All',
},

View File

@@ -1,177 +0,0 @@
from __future__ import unicode_literals
import binascii
import base64
import hashlib
import re
import json
from .common import InfoExtractor
from ..compat import (
compat_ord,
compat_urllib_parse_unquote,
compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
class MyVideoIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
IE_NAME = 'myvideo'
_TEST = {
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
'md5': '2d2753e8130479ba2cb7e0a37002053e',
'info_dict': {
'id': '8229274',
'ext': 'flv',
'title': 'bowling-fail-or-win',
}
}
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Released into the Public Domain by Tristan Fischer on 2013-05-19
# https://github.com/rg3/youtube-dl/pull/842
def __rc4crypt(self, data, key):
x = 0
box = list(range(256))
for i in list(range(256)):
x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
box[i], box[x] = box[x], box[i]
x = 0
y = 0
out = ''
for char in data:
x = (x + 1) % 256
y = (y + box[x]) % 256
box[x], box[y] = box[y], box[x]
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
return out
def __md5(self, s):
return hashlib.md5(s).hexdigest().encode()
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
GK = (
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
b'TnpsbA0KTVRkbU1tSTRNdz09'
)
# Get video webpage
webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
if mobj is not None:
self.report_extraction(video_id)
video_url = mobj.group(1) + '.flv'
video_title = self._html_search_regex('<title>([^<]+)</title>',
webpage, 'title')
return {
'id': video_id,
'url': video_url,
'title': video_title,
}
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))
return {
'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:
raise ExtractorError('Unable to extract video')
params = {}
encxml = ''
sec = mobj.group(1)
for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
if not a == '_encxml':
params[a] = b
else:
encxml = compat_urllib_parse_unquote(b)
if not params.get('domain'):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
self._downloader.report_warning('avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
) % video_id
# get enc data
enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
enc_data_b = binascii.unhexlify(enc_data)
sk = self.__md5(
base64.b64decode(base64.b64decode(GK)) +
self.__md5(
str(video_id).encode('utf-8')
)
)
dec_data = self.__rc4crypt(enc_data_b, sk)
# extracting infos
self.report_extraction(video_id)
video_url = None
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
if mobj:
video_url = compat_urllib_parse_unquote(mobj.group(1))
if 'myvideo2flash' in video_url:
self.report_warning(
'Rewriting URL to use unencrypted rtmp:// ...',
video_id)
video_url = video_url.replace('rtmpe://', 'rtmp://')
if not video_url:
# extract non rtmp videos
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError('unable to extract url')
video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
video_file = compat_urllib_parse_unquote(video_file)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_playpath = '%s:%s' % (prefix, ppath)
else:
video_playpath = ''
video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
webpage, 'title')
return {
'id': video_id,
'url': video_url,
'tc_url': video_url,
'title': video_title,
'ext': 'flv',
'play_path': video_playpath,
'player_url': video_swfobj,
}

View File

@@ -15,7 +15,7 @@ from ..utils import (
class NBCIE(AdobePassIE):
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
{
@@ -67,7 +67,11 @@ class NBCIE(AdobePassIE):
'skip_download': True,
},
'skip': 'Only works from US',
}
},
{
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
'only_matching': True,
},
]
def _real_extract(self, url):

View File

@@ -1,45 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote_plus
)
from ..utils import (
int_or_none,
parse_duration,
remove_end,
unified_strdate,
urljoin
)
class NDTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
_VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
_TEST = {
'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
'info_dict': {
'id': '300710',
'ext': 'mp4',
'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
'upload_date': '20131208',
'duration': 1327,
'thumbnail': r're:https?://.*\.jpg',
_TESTS = [
{
'url': 'https://khabar.ndtv.com/video/show/prime-time/prime-time-ill-system-and-poor-education-468818',
'md5': '78efcf3880ef3fd9b83d405ca94a38eb',
'info_dict': {
'id': '468818',
'ext': 'mp4',
'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल",
'description': 'md5:f410512f1b49672e5695dea16ef2731d',
'upload_date': '20170928',
'duration': 2218,
'thumbnail': r're:https?://.*\.jpg',
}
},
}
{
# __filename is url
'url': 'http://movies.ndtv.com/videos/cracker-free-diwali-wishes-from-karan-johar-kriti-sanon-other-stars-470304',
'md5': 'f1d709352305b44443515ac56b45aa46',
'info_dict': {
'id': '470304',
'ext': 'mp4',
'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars",
'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465',
'upload_date': '20171019',
'duration': 137,
'thumbnail': r're:https?://.*\.jpg',
}
},
{
'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372',
'only_matching': True
},
{
'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935',
'only_matching': True
},
{
'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764',
'only_matching': True
},
{
'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568',
'only_matching': True
},
{
'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040',
'only_matching': True
},
{
'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083',
'only_matching': True
},
{
'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396',
'only_matching': True
},
{
'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324',
'only_matching': True
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - NDTV')
# '__title' does not contain extra words such as sub-site name, "Video" etc.
title = compat_urllib_parse_unquote_plus(
self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or
self._og_search_title(webpage))
filename = self._search_regex(
r"__filename='([^']+)'", webpage, 'video filename')
video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
# in "movies" sub-site pages, filename is URL
video_url = urljoin('https://ndtvod.bc-ssl.cdn.bitgravity.com/23372/ndtv/', filename.lstrip('/'))
duration = int_or_none(self._search_regex(
r"__duration='([^']+)'", webpage, 'duration', fatal=False))
# "doctor" sub-site has MM:SS format
duration = parse_duration(self._search_regex(
r"(?:__)?duration\s*[:=]\s*'([^']+)'", webpage, 'duration', fatal=False))
# "sports", "doctor", "swirlster" sub-sites don't have 'publish-date'
upload_date = unified_strdate(self._html_search_meta(
'publish-date', webpage, 'upload date', fatal=False))
'publish-date', webpage, 'upload date', default=None) or self._html_search_meta(
'uploadDate', webpage, 'upload date', default=None) or self._search_regex(
r'datePublished"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False))
description = remove_end(self._og_search_description(webpage), ' (Read more)')

View File

@@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de'
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True,
@@ -91,6 +91,21 @@ class NickDeIE(MTVServicesInfoExtractor):
}, {
'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.no/program/2626-bulderhuset/videoer/90947-femteklasse-veronica-vs-vanzilla',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.dk/serier/2626-hojs-hus/videoer/761-tissepause',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.se/serier/2626-lugn-i-stormen/videos/998-',
'only_matching': True,
}, {
'url': 'http://www.nick.ch/shows/2304-adventure-time-abenteuerzeit-mit-finn-und-jake',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.be/afspeellijst/4530-top-videos/videos/episode/73917-inval-broodschapper-lariekoek-arie',
'only_matching': True,
}]
def _extract_mrss_url(self, webpage, host):
@@ -132,13 +147,28 @@ class NickNightIE(NickDeIE):
class NickRuIE(MTVServicesInfoExtractor):
IE_NAME = 'nickelodeonru'
_VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.fr/programmes/bob-l-eponge/videos/le-marathon-de-booh-kini-bottom-mardi-31-octobre/nfn7z0',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.es/videos/nickelodeon-consejos-tortitas/f7w7xy',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.pt/series/spongebob-squarepants/videos/a-bolha-de-tinta-gigante/xutq1b',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.ro/emisiuni/shimmer-si-shine/video/nahal-din-bomboane/uw5u2k',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor):
'uploader': 'takuya0301',
'uploader_id': '2698420',
'upload_date': '20131123',
'timestamp': 1385182762,
'timestamp': int, # timestamp is unstable
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
'view_count': int,
@@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor):
'skip': 'Requires an account',
}, {
# "New" HTML5 video
# md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm31464864',
'md5': '351647b4917660986dc0fa8864085135',
'info_dict': {
'id': 'sm31464864',
'ext': 'mp4',
@@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor):
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
'timestamp': 1498514060,
'upload_date': '20170626',
'uploader': 'ゲス',
'uploader': 'ゲス',
'uploader_id': '40826363',
'thumbnail': r're:https?://.*',
'duration': 198,
@@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor):
'comment_count': int,
},
'skip': 'Requires an account',
}, {
# Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488',
'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
'info_dict': {
'id': 'sm18238488',
'ext': 'mp4',
'title': '【実写版】ミュータントタートルズ',
'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
'timestamp': 1341160408,
'upload_date': '20120701',
'uploader': None,
'uploader_id': None,
'thumbnail': r're:https?://.*',
'duration': 5271,
'view_count': int,
'comment_count': int,
},
'skip': 'Requires an account',
}, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
@@ -395,7 +414,9 @@ class NiconicoIE(InfoExtractor):
webpage_url = get_video_info('watch_url') or url
owner = api_data.get('owner', {})
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
# in the JSON, which will cause None to be returned instead of {}.
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')

View File

@@ -11,7 +11,7 @@ class ParliamentLiveUKIE(InfoExtractor):
_TESTS = [{
'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'info_dict': {
'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'id': '1_af9nv9ym',
'ext': 'mp4',
'title': 'Home Affairs Committee',
'uploader_id': 'FFMPEG-01',
@@ -28,14 +28,14 @@ class ParliamentLiveUKIE(InfoExtractor):
webpage = self._download_webpage(
'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
widget_config = self._parse_json(self._search_regex(
r'kWidgetConfig\s*=\s*({.+});',
r'(?s)kWidgetConfig\s*=\s*({.+});',
webpage, 'kaltura widget config'), video_id)
kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
kaltura_url = 'kaltura:%s:%s' % (
widget_config['wid'][1:], widget_config['entry_id'])
event_title = self._download_json(
'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
return {
'_type': 'url_transparent',
'id': video_id,
'title': event_title,
'description': '',
'url': kaltura_url,

View File

@@ -187,7 +187,7 @@ class PBSIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
(?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
@@ -367,6 +367,10 @@ class PBSIE(InfoExtractor):
{
'url': 'http://watch.knpb.org/video/2365616055/',
'only_matching': True,
},
{
'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
'only_matching': True,
}
]
_ERRORS = {

View File

@@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class ServusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
_TESTS = [{
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
'md5': '046dee641cda1c4cabe13baef3be2c1c',
'info_dict': {
'id': 'AA-1T6VBU5PW1W12',
'ext': 'mp4',
'title': 'Die Grünen aus Volkssicht',
'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
formats = self._extract_m3u8_formats(
'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -2,7 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import strip_or_none
from ..utils import (
extract_attributes,
smuggle_url,
strip_or_none,
urljoin,
)
class SkySportsIE(InfoExtractor):
@@ -22,12 +27,22 @@ class SkySportsIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = extract_attributes(self._search_regex(
r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data'))
video_url = 'ooyala:%s' % video_data['data-video-id']
if video_data.get('data-token-required') == 'true':
token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
token_fetch_url = token_fetch_options.get('url')
if token_fetch_url:
embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
if embed_token:
video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'ooyala:%s' % self._search_regex(
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
'url': video_url,
'title': self._og_search_title(webpage),
'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',

View File

@@ -138,7 +138,7 @@ class SoundcloudIE(InfoExtractor):
},
]
_CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
_CLIENT_ID = 'c6CU49JDMapyrQo06UxU9xouB9ZVzqCn'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod

View File

@@ -8,36 +8,49 @@ from .common import InfoExtractor
class SoundgasmIE(InfoExtractor):
IE_NAME = 'soundgasm'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
'md5': '010082a2c802c5275bb00030743e75ad',
'info_dict': {
'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
'ext': 'm4a',
'title': 'ytdl_Piano-sample',
'description': 'Royalty Free Sample Music'
'title': 'Piano sample',
'description': 'Royalty Free Sample Music',
'uploader': 'ytdl',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('title')
audio_title = mobj.group('user') + '_' + mobj.group('title')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
audio_url = self._html_search_regex(
r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
audio_id = re.split(r'\/|\.', audio_url)[-2]
r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'audio URL', group='url')
title = self._search_regex(
r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
webpage, 'title', default=display_id)
description = self._html_search_regex(
r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
fatal=False)
(r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
r'(?s)<li>Description:\s(.*?)<\/li>'),
webpage, 'description', fatal=False)
audio_id = self._search_regex(
r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
return {
'id': audio_id,
'display_id': display_id,
'url': audio_url,
'title': audio_title,
'description': description
'vcodec': 'none',
'title': title,
'description': description,
'uploader': mobj.group('user'),
}

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class SpankBangIE(InfoExtractor):
@@ -33,6 +34,10 @@ class SpankBangIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
raise ExtractorError(
'Video %s is not available' % video_id, expected=True)
stream_key = self._html_search_regex(
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
webpage, 'stream key')

View File

@@ -1,86 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
# 22Tracks regularly replace the audio tracks that can be streamed on their
# site. The tracks usually expire after 1 months, so we can't add tests.
class TwentyTwoTracksIE(InfoExtractor):
_VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
IE_NAME = '22tracks:track'
_API_BASE = 'http://22tracks.com/api'
def _extract_info(self, city, genre_name, track_id=None):
item_id = track_id if track_id else genre_name
cities = self._download_json(
'%s/cities' % self._API_BASE, item_id,
'Downloading cities info',
'Unable to download cities info')
city_id = [x['id'] for x in cities if x['slug'] == city][0]
genres = self._download_json(
'%s/genres/%s' % (self._API_BASE, city_id), item_id,
'Downloading %s genres info' % city,
'Unable to download %s genres info' % city)
genre = [x for x in genres if x['slug'] == genre_name][0]
genre_id = genre['id']
tracks = self._download_json(
'%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
'Downloading %s genre tracks info' % genre_name,
'Unable to download track info')
return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
def _get_track_url(self, filename, track_id):
token = self._download_json(
'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
track_id, 'Downloading token', 'Unable to download token')
return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
def _extract_track_info(self, track_info, track_id):
download_url = self._get_track_url(track_info['filename'], track_id)
title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
return {
'id': track_id,
'url': download_url,
'ext': 'mp3',
'title': title,
'duration': int_or_none(track_info.get('duration')),
'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
city = mobj.group('city')
genre = mobj.group('genre')
track_id = mobj.group('id')
track_info = self._extract_info(city, genre, track_id)
return self._extract_track_info(track_info, track_id)
class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
_VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
IE_NAME = '22tracks:genre'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
city = mobj.group('city')
genre = mobj.group('genre')
genre_title, tracks = self._extract_info(city, genre)
entries = [
self._extract_track_info(track_info, track_info['id'])
for track_info in tracks]
return self.playlist_result(entries, genre, genre_title)

View File

@@ -609,7 +609,7 @@ class TwitchClipsIE(InfoExtractor):
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
video_id, transform_source=js_to_json)
title = clip.get('channel_title') or self._og_search_title(webpage)
title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
formats = [{
'url': option['source'],

View File

@@ -0,0 +1,32 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
class UnityIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?unity3d\.com/learn/tutorials/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://unity3d.com/learn/tutorials/topics/animation/animate-anything-mecanim',
'info_dict': {
'id': 'jWuNtik0C8E',
'ext': 'mp4',
'title': 'Live Training 22nd September 2014 - Animate Anything',
'description': 'md5:e54913114bd45a554c56cdde7669636e',
'duration': 2893,
'uploader': 'Unity',
'uploader_id': 'Unity3D',
'upload_date': '20140926',
}
}, {
'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
youtube_id = self._search_regex(
r'data-video-id="([_0-9a-zA-Z-]+)"',
webpage, 'youtube ID')
return self.url_result(youtube_id, ie=YoutubeIE.ie_key(), video_id=video_id)

View File

@@ -412,7 +412,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
urls = []
# Look for embedded (iframe) Vimeo player
for mobj in re.finditer(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
webpage):
urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
PLAIN_EMBED_RE = (

View File

@@ -0,0 +1,202 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
try_get,
)
CDN_API_BASE = 'https://cdn.younow.com/php/api'
MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
class YouNowLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.younow.com/AmandaPadeezy',
'info_dict': {
'id': 'AmandaPadeezy',
'ext': 'mp4',
'is_live': True,
'title': 'March 26, 2017',
'thumbnail': r're:^https?://.*\.jpg$',
'tags': ['girls'],
'categories': ['girls'],
'uploader': 'AmandaPadeezy',
'uploader_id': '6716501',
'uploader_url': 'https://www.younow.com/AmandaPadeezy',
'creator': 'AmandaPadeezy',
},
'skip': True,
}
@classmethod
def suitable(cls, url):
return (False
if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
else super(YouNowLiveIE, cls).suitable(url))
def _real_extract(self, url):
username = self._match_id(url)
data = self._download_json(
'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
% username, username)
if data.get('errorCode') != 0:
raise ExtractorError(data['errorMsg'], expected=True)
uploader = try_get(
data, lambda x: x['user']['profileUrlString'],
compat_str) or username
return {
'id': uploader,
'is_live': True,
'title': self._live_title(uploader),
'thumbnail': data.get('awsUrl'),
'tags': data.get('tags'),
'categories': data.get('tags'),
'uploader': uploader,
'uploader_id': data.get('userId'),
'uploader_url': 'https://www.younow.com/%s' % username,
'creator': uploader,
'view_count': int_or_none(data.get('viewers')),
'like_count': int_or_none(data.get('likes')),
'formats': [{
'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
% (CDN_API_BASE, data['broadcastId'], data['userId']),
'ext': 'mp4',
'protocol': 'm3u8',
}],
}
def _extract_moment(item, fatal=True):
moment_id = item.get('momentId')
if not moment_id:
if not fatal:
return
raise ExtractorError('Unable to extract moment id')
moment_id = compat_str(moment_id)
title = item.get('text')
if not title:
title = 'YouNow %s' % (
item.get('momentType') or item.get('titleType') or 'moment')
uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
uploader_id = try_get(item, lambda x: x['owner']['userId'])
uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
entry = {
'extractor_key': 'YouNowMoment',
'id': moment_id,
'title': title,
'view_count': int_or_none(item.get('views')),
'like_count': int_or_none(item.get('likes')),
'timestamp': int_or_none(item.get('created')),
'creator': uploader,
'uploader': uploader,
'uploader_id': uploader_id,
'uploader_url': uploader_url,
'formats': [{
'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
% (moment_id, moment_id),
'ext': 'mp4',
'protocol': 'm3u8_native',
}],
}
return entry
class YouNowChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
_TEST = {
'url': 'https://www.younow.com/its_Kateee_/channel',
'info_dict': {
'id': '14629760',
'title': 'its_Kateee_ moments'
},
'playlist_mincount': 8,
}
def _entries(self, username, channel_id):
created_before = 0
for page_num in itertools.count(1):
if created_before is None:
break
info = self._download_json(
'%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
% (CDN_API_BASE, channel_id, created_before), username,
note='Downloading moments page %d' % page_num)
items = info.get('items')
if not items or not isinstance(items, list):
break
for item in items:
if not isinstance(item, dict):
continue
item_type = item.get('type')
if item_type == 'moment':
entry = _extract_moment(item, fatal=False)
if entry:
yield entry
elif item_type == 'collection':
moments = item.get('momentsIds')
if isinstance(moments, list):
for moment_id in moments:
m = self._download_json(
MOMENT_URL_FORMAT % moment_id, username,
note='Downloading %s moment JSON' % moment_id,
fatal=False)
if m and isinstance(m, dict) and m.get('item'):
entry = _extract_moment(m['item'])
if entry:
yield entry
created_before = int_or_none(item.get('created'))
def _real_extract(self, url):
username = self._match_id(url)
channel_id = compat_str(self._download_json(
'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
% username, username, note='Downloading user information')['userId'])
return self.playlist_result(
self._entries(username, channel_id), channel_id,
'%s moments' % username)
class YouNowMomentIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
'info_dict': {
'id': '20712117',
'ext': 'mp4',
'title': 'YouNow capture',
'view_count': int,
'like_count': int,
'timestamp': 1490432040,
'upload_date': '20170325',
'uploader': 'GABO...',
'uploader_id': 35917228,
},
}
@classmethod
def suitable(cls, url):
return (False
if YouNowChannelIE.suitable(url)
else super(YouNowMomentIE, cls).suitable(url))
def _real_extract(self, url):
video_id = self._match_id(url)
item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
return _extract_moment(item['item'])

View File

@@ -1391,7 +1391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
(?:embed|v|p)/.+?)
(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
\1''', webpage)]
# lazyYT YouTube embed
@@ -1622,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# description
description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
def replace_url(m):
redir_url = compat_urlparse.urljoin(url, m.group(1))
parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
qs = compat_parse_qs(parsed_redir_url.query)
q = qs.get('q')
if q and q[0]:
return q[0]
return redir_url
description_original = video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
@@ -1630,7 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
class="[^"]*"[^>]*>
[^<]+\.{3}\s*
</a>
''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description)
''', replace_url, video_description)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)

View File

@@ -1835,10 +1835,20 @@ def parse_duration(s):
days, hours, mins, secs, ms = m.groups()
else:
m = re.match(
r'''(?ix)(?:P?T)?
r'''(?ix)(?:P?
(?:
[0-9]+\s*y(?:ears?)?\s*
)?
(?:
[0-9]+\s*m(?:onths?)?\s*
)?
(?:
[0-9]+\s*w(?:eeks?)?\s*
)?
(?:
(?P<days>[0-9]+)\s*d(?:ays?)?\s*
)?
T)?
(?:
(?P<hours>[0-9]+)\s*h(?:ours?)?\s*
)?

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.10.15.1'
__version__ = '2017.11.06'