mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-18 14:18:37 +09:00
Compare commits
139 Commits
2018.12.03
...
2019.01.24
Author | SHA1 | Date | |
---|---|---|---|
![]() |
a1e171233d | ||
![]() |
7d311586ed | ||
![]() |
e118a8794f | ||
![]() |
435e382423 | ||
![]() |
0670bdd8f2 | ||
![]() |
71a1f61700 | ||
![]() |
6510a3aa97 | ||
![]() |
278d061a0c | ||
![]() |
503b604a31 | ||
![]() |
4b85f0f9db | ||
![]() |
19d6991312 | ||
![]() |
07f9febc4b | ||
![]() |
fad4ceb534 | ||
![]() |
6945b9e78f | ||
![]() |
29cfcb43da | ||
![]() |
a1a4607598 | ||
![]() |
73c19aaa9f | ||
![]() |
289ef490f7 | ||
![]() |
6ca3fa898c | ||
![]() |
31fbedc06a | ||
![]() |
15870747f0 | ||
![]() |
fc746c3fdd | ||
![]() |
4e58d9fabb | ||
![]() |
2cc779f497 | ||
![]() |
379306ef55 | ||
![]() |
f28363ad1f | ||
![]() |
2bfc1d9d68 | ||
![]() |
e2dd132f05 | ||
![]() |
79fec976b0 | ||
![]() |
29639b363d | ||
![]() |
f53cecd796 | ||
![]() |
fa4ac365f6 | ||
![]() |
bfc8eeea57 | ||
![]() |
b0d73a7456 | ||
![]() |
4fe54c128a | ||
![]() |
a16c7c033a | ||
![]() |
2f483bc1c3 | ||
![]() |
561b456e2d | ||
![]() |
929ba3997b | ||
![]() |
10026329c2 | ||
![]() |
3b983ee471 | ||
![]() |
f1ab3b7de7 | ||
![]() |
d65f6e734b | ||
![]() |
ed8db0a25c | ||
![]() |
60a899bb7e | ||
![]() |
cbdc688c41 | ||
![]() |
5caa531a1a | ||
![]() |
a64646e417 | ||
![]() |
c469e8808c | ||
![]() |
b64f6e690f | ||
![]() |
a4491dd55c | ||
![]() |
c3e543893b | ||
![]() |
432aba1c5e | ||
![]() |
7c072f00d6 | ||
![]() |
96c186e1fd | ||
![]() |
4ad159c7b0 | ||
![]() |
65615be368 | ||
![]() |
3c1089dba4 | ||
![]() |
6089ff40e7 | ||
![]() |
2543938bbe | ||
![]() |
440863ade1 | ||
![]() |
391256dc0e | ||
![]() |
06b4b90c70 | ||
![]() |
8cb5c2181a | ||
![]() |
0266854f63 | ||
![]() |
bcc334a3c6 | ||
![]() |
e9a50fba86 | ||
![]() |
04fb6928da | ||
![]() |
b7acc83550 | ||
![]() |
de0359c0af | ||
![]() |
c87f65e43d | ||
![]() |
d7c3af7a72 | ||
![]() |
aeb72b3a41 | ||
![]() |
2122d7151d | ||
![]() |
751e051557 | ||
![]() |
d226c560a6 | ||
![]() |
8437f5089f | ||
![]() |
1d803085d7 | ||
![]() |
696f4e4114 | ||
![]() |
0e713dbb11 | ||
![]() |
9b5c8751ee | ||
![]() |
d9f1123c08 | ||
![]() |
3d8eb6beb9 | ||
![]() |
38d15ba7f9 | ||
![]() |
6b688b8942 | ||
![]() |
9d9daed464 | ||
![]() |
32ac3d49ae | ||
![]() |
373941c5f0 | ||
![]() |
4e1ddc8da9 | ||
![]() |
e4d51e751e | ||
![]() |
c2dd2dc086 | ||
![]() |
140a13f5de | ||
![]() |
825cd268a3 | ||
![]() |
63529e935c | ||
![]() |
4273caf5c7 | ||
![]() |
e1a0628797 | ||
![]() |
835e45abab | ||
![]() |
904bb599be | ||
![]() |
65e29cdac3 | ||
![]() |
4ee1845454 | ||
![]() |
cfd13c4c45 | ||
![]() |
386d1fea79 | ||
![]() |
7216e9bff7 | ||
![]() |
4cee62ade0 | ||
![]() |
cbb3e4b14f | ||
![]() |
752582183a | ||
![]() |
1c82122741 | ||
![]() |
50a498a68e | ||
![]() |
252e172dea | ||
![]() |
90046d7761 | ||
![]() |
c8b3751086 | ||
![]() |
21c340b83f | ||
![]() |
c984196cf1 | ||
![]() |
7f41a598b3 | ||
![]() |
8fe104947d | ||
![]() |
0a05cfabb6 | ||
![]() |
13e17cd28e | ||
![]() |
102a4e54c5 | ||
![]() |
6e29458f24 | ||
![]() |
59c3940165 | ||
![]() |
cefe42c412 | ||
![]() |
24cc64254c | ||
![]() |
9e02c2c704 | ||
![]() |
5ee7ae5c75 | ||
![]() |
3ad6dabd33 | ||
![]() |
5f47a60c5d | ||
![]() |
1bab343704 | ||
![]() |
1d88b3e6e6 | ||
![]() |
9235b5091c | ||
![]() |
c3c098dcf2 | ||
![]() |
8c5879715f | ||
![]() |
ebb0449049 | ||
![]() |
dfe0a3a9d2 | ||
![]() |
c976873c5b | ||
![]() |
15699ec8b0 | ||
![]() |
33cc1ea586 | ||
![]() |
ae9d77dab5 | ||
![]() |
8bb0c9cc16 | ||
![]() |
5547014ad9 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.03**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.24**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.12.03
|
||||
[debug] youtube-dl version 2019.01.24
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -152,16 +152,20 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
@@ -173,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
@@ -181,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@@ -257,11 +261,33 @@ title = meta.get('title') or self._og_search_title(webpage)
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
### Regular expressions
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
#### Don't capture groups you don't use
|
||||
|
||||
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
|
||||
|
||||
##### Example
|
||||
|
||||
Don't capture id attribute name here since you can't use it for anything anyway.
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
r'(?:id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
```python
|
||||
r'(id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
|
||||
#### Make regular expressions relaxed and flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
|
||||
|
||||
#### Example
|
||||
##### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
@@ -294,6 +320,25 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Long lines policy
|
||||
|
||||
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
|
||||
|
||||
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
163
ChangeLog
163
ChangeLog
@@ -1,3 +1,166 @@
|
||||
version 2019.01.24
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Fix negation for string operators in format selection (#18961)
|
||||
|
||||
|
||||
version 2019.01.23
|
||||
|
||||
Core
|
||||
* [utils] Fix urljoin for paths with non-http(s) schemes
|
||||
* [extractor/common] Improve jwplayer relative URL handling (#18892)
|
||||
+ [YoutubeDL] Add negation support for string comparisons in format selection
|
||||
expressions (#18600, #18805)
|
||||
* [extractor/common] Improve HLS video-only format detection (#18923)
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Extend URL regular expression (#18955)
|
||||
* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
|
||||
#17197, #18338 #18842, #18899)
|
||||
+ [vrv] Add support for authentication (#14307)
|
||||
* [videomore:season] Fix extraction
|
||||
* [videomore] Improve extraction (#18908)
|
||||
+ [tnaflix] Pass Referer in metadata request (#18925)
|
||||
* [radiocanada] Relax DRM check (#18608, #18609)
|
||||
* [vimeo] Fix video password verification for videos protected by
|
||||
Referer HTTP header
|
||||
+ [hketv] Add support for hkedcity.net (#18696)
|
||||
+ [streamango] Add support for fruithosts.net (#18710)
|
||||
+ [instagram] Add support for tags (#18757)
|
||||
+ [odnoklassniki] Detect paid videos (#18876)
|
||||
* [ted] Correct acodec for HTTP formats (#18923)
|
||||
* [cartoonnetwork] Fix extraction (#15664, #17224)
|
||||
* [vimeo] Fix extraction for password protected player URLs (#18889)
|
||||
|
||||
|
||||
version 2019.01.17
|
||||
|
||||
Extractors
|
||||
* [youtube] Extend JS player signature function name regular expressions
|
||||
(#18890, #18891, #18893)
|
||||
|
||||
|
||||
version 2019.01.16
|
||||
|
||||
Core
|
||||
+ [test/helper] Add support for maxcount and count collection len checkers
|
||||
* [downloader/hls] Fix uplynk ad skipping (#18824)
|
||||
* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
|
||||
|
||||
Extractors
|
||||
* [youtube] Skip unsupported adaptive stream type (#18804)
|
||||
+ [youtube] Extract DASH formats from player response (#18804)
|
||||
* [funimation] Fix extraction (#14089)
|
||||
* [skylinewebcams] Fix extraction (#18853)
|
||||
+ [curiositystream] Add support for non app URLs
|
||||
+ [bitchute] Check formats (#18833)
|
||||
* [wistia] Extend URL regular expression (#18823)
|
||||
+ [playplustv] Add support for playplus.com (#18789)
|
||||
|
||||
|
||||
version 2019.01.10
|
||||
|
||||
Core
|
||||
* [extractor/common] Use episode name as title in _json_ld
|
||||
+ [extractor/common] Add support for movies in _json_ld
|
||||
* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
|
||||
(#18765)
|
||||
+ [utils] Add language codes replaced in 1989 revision of ISO 639
|
||||
to ISO639Utils (#18765)
|
||||
|
||||
Extractors
|
||||
* [youtube] Extract live HLS URL from player response (#18799)
|
||||
+ [outsidetv] Add support for outsidetv.com (#18774)
|
||||
* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
|
||||
+ [fox] Add support National Geographic (#17985, #15333, #14698)
|
||||
+ [playplustv] Add support for playplus.tv (#18789)
|
||||
* [globo] Set GLBID cookie manually (#17346)
|
||||
+ [gaia] Add support for gaia.com (#14605)
|
||||
* [youporn] Fix title and description extraction (#18748)
|
||||
+ [hungama] Add support for hungama.com (#17402, #18771)
|
||||
* [dtube] Fix extraction (#18741)
|
||||
* [tvnow] Fix and rework extractors and prepare for a switch to the new API
|
||||
(#17245, #18499)
|
||||
* [carambatv:page] Fix extraction (#18739)
|
||||
|
||||
|
||||
version 2019.01.02
|
||||
|
||||
Extractors
|
||||
* [discovery] Use geo verification headers (#17838)
|
||||
+ [packtpub] Add support for subscription.packtpub.com (#18718)
|
||||
* [yourporn] Fix extraction (#18583)
|
||||
+ [acast:channel] Add support for play.acast.com (#18587)
|
||||
+ [extractors] Add missing age limits (#18621)
|
||||
+ [rmcdecouverte] Add support for live stream
|
||||
* [rmcdecouverte] Bypass geo restriction
|
||||
* [rmcdecouverte] Update URL regular expression (#18595, 18697)
|
||||
* [manyvids] Fix extraction (#18604, #18614)
|
||||
* [bitchute] Fix extraction (#18567)
|
||||
|
||||
|
||||
version 2018.12.31
|
||||
|
||||
Extractors
|
||||
+ [bbc] Add support for another embed pattern (#18643)
|
||||
+ [npo:live] Add support for npostart.nl (#18644)
|
||||
* [beeg] Fix extraction (#18610, #18626)
|
||||
* [youtube] Unescape HTML for series (#18641)
|
||||
+ [youtube] Extract more format metadata
|
||||
* [youtube] Detect DRM protected videos (#1774)
|
||||
* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
|
||||
* [youtube] Extend HTML5 player regular expression (#17516)
|
||||
+ [liveleak] Add support for another embed type and restore original
|
||||
format extraction
|
||||
+ [crackle] Extract ISM and HTTP formats
|
||||
+ [twitter] Pass Referer with card request (#18579)
|
||||
* [mediasite] Extend URL regular expression (#18558)
|
||||
+ [lecturio] Add support for lecturio.de (#18562)
|
||||
+ [discovery] Add support for Scripps Networks watch domains (#17947)
|
||||
|
||||
|
||||
version 2018.12.17
|
||||
|
||||
Extractors
|
||||
* [ard:beta] Improve geo restricted videos extraction
|
||||
* [ard:beta] Fix subtitles extraction
|
||||
* [ard:beta] Improve extraction robustness
|
||||
* [ard:beta] Relax URL regular expression (#18441)
|
||||
* [acast] Add support for embed.acast.com and play.acast.com (#18483)
|
||||
* [iprima] Relax URL regular expression (#18515, #18540)
|
||||
* [vrv] Fix initial state extraction (#18553)
|
||||
* [youtube] Fix mark watched (#18546)
|
||||
+ [safari] Add support for learning.oreilly.com (#18510)
|
||||
* [youtube] Fix multifeed extraction (#18531)
|
||||
* [lecturio] Improve subtitles extraction (#18488)
|
||||
* [uol] Fix format URL extraction (#18480)
|
||||
+ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
|
||||
|
||||
|
||||
version 2018.12.09
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Keep session cookies in cookie file between runs
|
||||
* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
|
||||
|
||||
Extractors
|
||||
+ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
|
||||
+ [aenetworks] Add support for historyvault.com (#18460)
|
||||
* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
|
||||
#17223, #18404)
|
||||
* [iprima] Relax URL regular expression (#18453)
|
||||
* [hotstar] Fix video data extraction (#18386)
|
||||
* [ard:mediathek] Fix title and description extraction (#18349, #18371)
|
||||
* [xvideos] Switch to HTTPS (#18422, #18427)
|
||||
+ [lecturio] Add support for lecturio.com (#18405)
|
||||
+ [nrktv:series] Add support for extra materials
|
||||
* [nrktv:season,series] Fix extraction (#17159, #17258)
|
||||
* [nrktv] Relax URL regular expression (#18304, #18387)
|
||||
* [yourporn] Fix extraction (#18424, #18425)
|
||||
* [tbs] Fix info extraction (#18403)
|
||||
+ [gamespot] Add support for review URLs
|
||||
|
||||
|
||||
version 2018.12.03
|
||||
|
||||
Core
|
||||
|
69
README.md
69
README.md
@@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
@@ -667,7 +667,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `fps`: Frame rate
|
||||
|
||||
Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||
- `ext`: File extension
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
@@ -675,6 +675,8 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
|
||||
@@ -1024,16 +1026,20 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
@@ -1045,7 +1051,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
@@ -1053,7 +1059,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@@ -1129,11 +1135,33 @@ title = meta.get('title') or self._og_search_title(webpage)
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
### Regular expressions
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
#### Don't capture groups you don't use
|
||||
|
||||
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
|
||||
|
||||
##### Example
|
||||
|
||||
Don't capture id attribute name here since you can't use it for anything anyway.
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
r'(?:id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
```python
|
||||
r'(id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
|
||||
#### Make regular expressions relaxed and flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
|
||||
|
||||
#### Example
|
||||
##### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
@@ -1166,6 +1194,25 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Long lines policy
|
||||
|
||||
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
|
||||
|
||||
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
@@ -33,7 +33,7 @@
|
||||
- **AdobeTVShow**
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
@@ -320,6 +320,7 @@
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
- **Gaia**
|
||||
- **GameInformer**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
@@ -360,6 +361,7 @@
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HitRecord**
|
||||
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
|
||||
- **HornBunny**
|
||||
- **HotNewHipHop**
|
||||
- **hotstar**
|
||||
@@ -370,18 +372,22 @@
|
||||
- **HRTiPlaylist**
|
||||
- **Huajiao**: 花椒直播
|
||||
- **HuffPost**: Huffington Post
|
||||
- **Hungama**
|
||||
- **HungamaSong**
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
- **ign.com**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **ImgurAlbum**
|
||||
- **imgur:album**
|
||||
- **imgur:gallery**
|
||||
- **Ina**
|
||||
- **Inc**
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:tag**: Instagram hashtag search
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **Internazionale**
|
||||
- **InternetVideoArchive**
|
||||
@@ -435,6 +441,9 @@
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **Lecturio**
|
||||
- **LecturioCourse**
|
||||
- **LecturioDeCourse**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **Lenta**
|
||||
@@ -536,8 +545,6 @@
|
||||
- **MyviEmbed**
|
||||
- **MyVisionTV**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:episodeguide**
|
||||
- **natgeo:video**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
@@ -638,6 +645,7 @@
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **OsnatelTV**
|
||||
- **OutsideTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PandaTV**: 熊猫TV
|
||||
@@ -662,6 +670,7 @@
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **play.fm**
|
||||
- **PlayPlusTV**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
@@ -853,6 +862,8 @@
|
||||
- **TastyTrade**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**
|
||||
- **TeachableCourse**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **TeachingChannel**
|
||||
@@ -928,7 +939,9 @@
|
||||
- **TVNet**
|
||||
- **TVNoe**
|
||||
- **TVNow**
|
||||
- **TVNowList**
|
||||
- **TVNowAnnual**
|
||||
- **TVNowNew**
|
||||
- **TVNowSeason**
|
||||
- **TVNowShow**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
@@ -961,8 +974,6 @@
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Upskill**
|
||||
- **UpskillCourse**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USANetwork**
|
||||
|
@@ -153,15 +153,27 @@ def expect_value(self, got, expected, field):
|
||||
isinstance(got, compat_str),
|
||||
'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
|
||||
got = 'md5:' + md5(got)
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
|
||||
self.assertTrue(
|
||||
isinstance(got, (list, dict)),
|
||||
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||
field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
op, _, expected_num = expected.partition(':')
|
||||
expected_num = int(expected_num)
|
||||
if op == 'mincount':
|
||||
assert_func = assertGreaterEqual
|
||||
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
||||
elif op == 'maxcount':
|
||||
assert_func = assertLessEqual
|
||||
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
||||
elif op == 'count':
|
||||
assert_func = assertEqual
|
||||
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
||||
else:
|
||||
assert False
|
||||
assert_func(
|
||||
self, len(got), expected_num,
|
||||
'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got)))
|
||||
msg_tmpl % (expected_num, field, len(got)))
|
||||
return
|
||||
self.assertEqual(
|
||||
expected, got,
|
||||
@@ -237,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None):
|
||||
self.assertTrue(got >= expected, msg)
|
||||
|
||||
|
||||
def assertLessEqual(self, got, expected, msg=None):
|
||||
if not (got <= expected):
|
||||
if msg is None:
|
||||
msg = '%r not less than or equal to %r' % (got, expected)
|
||||
self.assertTrue(got <= expected, msg)
|
||||
|
||||
|
||||
def assertEqual(self, got, expected, msg=None):
|
||||
if not (got == expected):
|
||||
if msg is None:
|
||||
msg = '%r not equal to %r' % (got, expected)
|
||||
self.assertTrue(got == expected, msg)
|
||||
|
||||
|
||||
def expect_warnings(ydl, warnings_re):
|
||||
real_warning = ydl.report_warning
|
||||
|
||||
|
@@ -497,7 +497,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
)
|
||||
),
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/18923
|
||||
# https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
|
||||
'ted_18923',
|
||||
'http://hls.ted.com/talks/31241.m3u8',
|
||||
[{
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '600k-Audio',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '68',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '163',
|
||||
'acodec': 'none',
|
||||
'width': 320,
|
||||
'height': 180,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '481',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '769',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '984',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '1255',
|
||||
'acodec': 'none',
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '1693',
|
||||
'acodec': 'none',
|
||||
'width': 853,
|
||||
'height': 480,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '2462',
|
||||
'acodec': 'none',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
),
|
||||
]
|
||||
|
||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
||||
|
@@ -239,6 +239,76 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
|
||||
|
||||
def test_format_selection_string_ops(self):
|
||||
formats = [
|
||||
{'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
|
||||
{'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
# equals (=)
|
||||
ydl = YDL({'format': '[format_id=abc-cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not equal (!=)
|
||||
ydl = YDL({'format': '[format_id!=abc-cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# starts with (^=)
|
||||
ydl = YDL({'format': '[format_id^=abc]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not start with (!^=)
|
||||
ydl = YDL({'format': '[format_id!^=abc]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# ends with ($=)
|
||||
ydl = YDL({'format': '[format_id$=cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not end with (!$=)
|
||||
ydl = YDL({'format': '[format_id!$=cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# contains (*=)
|
||||
ydl = YDL({'format': '[format_id*=bc-cb]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not contain (!*=)
|
||||
ydl = YDL({'format': '[format_id!*=bc-cb]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
ydl = YDL({'format': '[format_id!*=-]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
||||
|
34
test/test_YoutubeDLCookieJar.py
Normal file
34
test/test_YoutubeDLCookieJar.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.utils import YoutubeDLCookieJar
|
||||
|
||||
|
||||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
def test_keep_session_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
try:
|
||||
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
|
||||
temp = tf.read().decode('utf-8')
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
|
||||
finally:
|
||||
tf.close()
|
||||
os.remove(tf.name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -507,6 +507,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(urljoin('http://foo.de/', ''), None)
|
||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
|
||||
self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
|
||||
|
||||
def test_url_or_none(self):
|
||||
self.assertEqual(url_or_none(None), None)
|
||||
|
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Netscape HTTP Cookie File
|
||||
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
|
||||
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
|
28
test/testdata/m3u8/ted_18923.m3u8
vendored
Normal file
28
test/testdata/m3u8/ted_18923.m3u8
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
#EXTM3U
|
||||
#EXT-X-VERSION:4
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
|
||||
/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
|
||||
/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
|
||||
/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
|
||||
/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
|
||||
/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400
|
@@ -88,6 +88,7 @@ from .utils import (
|
||||
version_tuple,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLCookieJar,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
@@ -558,7 +559,7 @@ class YoutubeDL(object):
|
||||
self.restore_console_title()
|
||||
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save()
|
||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
def trouble(self, message=None, tb=None):
|
||||
"""Determine action to take when a download problem appears.
|
||||
@@ -1062,21 +1063,24 @@ class YoutubeDL(object):
|
||||
if not m:
|
||||
STR_OPERATORS = {
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
'^=': lambda attr, value: attr.startswith(value),
|
||||
'$=': lambda attr, value: attr.endswith(value),
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.search(filter_spec)
|
||||
if m:
|
||||
comparison_value = m.group('value')
|
||||
op = STR_OPERATORS[m.group('op')]
|
||||
str_op = STR_OPERATORS[m.group('op')]
|
||||
if m.group('negation'):
|
||||
op = lambda attr, value: not str_op(attr, value)
|
||||
else:
|
||||
op = str_op
|
||||
|
||||
if not m:
|
||||
raise ValueError('Invalid filter specification %r' % filter_spec)
|
||||
@@ -2297,10 +2301,9 @@ class YoutubeDL(object):
|
||||
self.cookiejar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
opts_cookiefile = expand_path(opts_cookiefile)
|
||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||
opts_cookiefile)
|
||||
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
||||
if os.access(opts_cookiefile, os.R_OK):
|
||||
self.cookiejar.load()
|
||||
self.cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
|
@@ -75,10 +75,14 @@ class HlsFD(FragmentFD):
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
def is_ad_fragment(s):
|
||||
def is_ad_fragment_start(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
|
||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
|
||||
def is_ad_fragment_end(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or
|
||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||
|
||||
media_frags = 0
|
||||
ad_frags = 0
|
||||
ad_frag_next = False
|
||||
@@ -87,12 +91,13 @@ class HlsFD(FragmentFD):
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('#'):
|
||||
if is_ad_fragment(line):
|
||||
ad_frags += 1
|
||||
if is_ad_fragment_start(line):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
continue
|
||||
if ad_frag_next:
|
||||
ad_frag_next = False
|
||||
ad_frags += 1
|
||||
continue
|
||||
media_frags += 1
|
||||
|
||||
@@ -123,7 +128,6 @@ class HlsFD(FragmentFD):
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
if ad_frag_next:
|
||||
ad_frag_next = False
|
||||
continue
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
@@ -196,8 +200,10 @@ class HlsFD(FragmentFD):
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
elif is_ad_fragment(line):
|
||||
elif is_ad_fragment_start(line):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@@ -17,25 +17,15 @@ from ..utils import (
|
||||
|
||||
class ACastIE(InfoExtractor):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# test with one bling
|
||||
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
|
||||
'md5': 'ada3de5a1e3a2a381327d749854788bb',
|
||||
'info_dict': {
|
||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
||||
'ext': 'mp3',
|
||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'timestamp': 1196172000,
|
||||
'upload_date': '20071127',
|
||||
'duration': 211,
|
||||
'creator': 'Concierge',
|
||||
'series': 'Condé Nast Traveler Podcast',
|
||||
'episode': '"Where Are You?": Taipei 101, Taiwan',
|
||||
}
|
||||
}, {
|
||||
# test with multiple blings
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||
'info_dict': {
|
||||
@@ -50,6 +40,12 @@ class ACastIE(InfoExtractor):
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -83,17 +79,27 @@ class ACastIE(InfoExtractor):
|
||||
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.acast.com/condenasttraveler',
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/todayinfocus',
|
||||
'info_dict': {
|
||||
'id': '50544219-29bb-499e-a083-6087f4cb7797',
|
||||
'title': 'Condé Nast Traveler Podcast',
|
||||
'description': 'md5:98646dee22a5b386626ae31866638fbd',
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||
'playlist_mincount': 35,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE_URL = 'https://play.acast.com/api/'
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@classmethod
|
||||
@@ -106,7 +112,7 @@ class ACastChannelIE(InfoExtractor):
|
||||
channel_slug, note='Download page %d of channel data' % page)
|
||||
for cast in casts:
|
||||
yield self.url_result(
|
||||
'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
|
||||
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
||||
'ACast', cast['id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -22,18 +22,19 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/
|
||||
(?:
|
||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||
specials/(?P<special_display_id>[^/]+)/full-special
|
||||
specials/(?P<special_display_id>[^/]+)/full-special|
|
||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
@@ -80,6 +81,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
@@ -90,9 +94,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
|
@@ -8,20 +8,23 @@ from .generic import GenericIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
qualities,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
qualities,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
@@ -51,8 +54,15 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
media_info = self._download_json(
|
||||
media_info_url, video_id, 'Downloading media JSON')
|
||||
@@ -173,13 +183,18 @@ class ARDMediathekIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'meta description')
|
||||
'description', webpage, 'meta description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+class="teasertext">(.+?)</p>',
|
||||
webpage, 'teaser text', default=None)
|
||||
|
||||
# Thumbnail is sometimes not present.
|
||||
# It is in the mobile version, but that seems to use a different URL
|
||||
@@ -288,7 +303,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
|
||||
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||
@@ -302,12 +317,18 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'upload_date': '20180826',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||
@@ -318,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'display_id': display_id,
|
||||
}
|
||||
formats = []
|
||||
subtitles = {}
|
||||
geoblocked = False
|
||||
for widget in data.values():
|
||||
if widget.get('_geoblocked'):
|
||||
raise ExtractorError('This video is not available due to geoblocking', expected=True)
|
||||
|
||||
if widget.get('_geoblocked') is True:
|
||||
geoblocked = True
|
||||
if '_duration' in widget:
|
||||
res['duration'] = widget['_duration']
|
||||
res['duration'] = int_or_none(widget['_duration'])
|
||||
if 'clipTitle' in widget:
|
||||
res['title'] = widget['clipTitle']
|
||||
if '_previewImage' in widget:
|
||||
res['thumbnail'] = widget['_previewImage']
|
||||
if 'broadcastedOn' in widget:
|
||||
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
|
||||
res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
|
||||
if 'synopsis' in widget:
|
||||
res['description'] = widget['synopsis']
|
||||
if '_subtitleUrl' in widget:
|
||||
res['subtitles'] = {'de': [{
|
||||
subtitle_url = url_or_none(widget.get('_subtitleUrl'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('de', []).append({
|
||||
'ext': 'ttml',
|
||||
'url': widget['_subtitleUrl'],
|
||||
}]}
|
||||
'url': subtitle_url,
|
||||
})
|
||||
if '_quality' in widget:
|
||||
format_url = widget['_stream']['json'][0]
|
||||
|
||||
if format_url.endswith('.f4m'):
|
||||
format_url = url_or_none(try_get(
|
||||
widget, lambda x: x['_stream']['json'][0]))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=3.11.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif format_url.endswith('m3u8'):
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
format_url, video_id, 'mp4', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
# HTTP formats are not available when geoblocked is True,
|
||||
# other formats are fine though
|
||||
if geoblocked:
|
||||
continue
|
||||
quality = str_or_none(widget.get('_quality'))
|
||||
formats.append({
|
||||
'format_id': 'http-' + widget['_quality'],
|
||||
'format_id': ('http-' + quality) if quality else 'http',
|
||||
'url': format_url,
|
||||
'preference': 10, # Plain HTTP, that's nice
|
||||
})
|
||||
|
||||
if not formats and geoblocked:
|
||||
self.raise_geo_restricted(
|
||||
msg='This video is not available due to geoblocking',
|
||||
countries=['DE'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
res['formats'] = formats
|
||||
res.update({
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return res
|
||||
|
@@ -62,7 +62,7 @@ class AudiomackIE(InfoExtractor):
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(api_response['url']):
|
||||
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
||||
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
|
||||
|
||||
return {
|
||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||
|
@@ -795,6 +795,15 @@ class BBCIE(BBCCoUkIE):
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
'id': 'p06w9tws',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -945,6 +954,15 @@ class BBCIE(BBCCoUkIE):
|
||||
if entries:
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
|
||||
group_id = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||
webpage, 'group id', default=None)
|
||||
if playlist_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
||||
|
@@ -1,15 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
urljoin,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,29 +31,9 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cpl_url = self._search_regex(
|
||||
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
|
||||
webpage, 'cpl', default=None, group='url')
|
||||
|
||||
cpl_url = urljoin(url, cpl_url)
|
||||
|
||||
beeg_version, beeg_salt = [None] * 2
|
||||
|
||||
if cpl_url:
|
||||
cpl = self._download_webpage(
|
||||
self._proto_relative_url(cpl_url), video_id,
|
||||
'Downloading cpl JS', fatal=False)
|
||||
if cpl:
|
||||
beeg_version = int_or_none(self._search_regex(
|
||||
r'beeg_version\s*=\s*([^\b]+)', cpl,
|
||||
'beeg version', default=None)) or self._search_regex(
|
||||
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
|
||||
beeg_salt = self._search_regex(
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '2185'
|
||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
@@ -68,37 +43,6 @@ class BeegIE(InfoExtractor):
|
||||
if video:
|
||||
break
|
||||
|
||||
def split(o, e):
|
||||
def cut(s, x):
|
||||
n.append(s[:x])
|
||||
return s[x:]
|
||||
n = []
|
||||
r = len(o) % e
|
||||
if r > 0:
|
||||
o = cut(o, r)
|
||||
while len(o) > e:
|
||||
o = cut(o, e)
|
||||
n.append(o)
|
||||
return n
|
||||
|
||||
def decrypt_key(key):
|
||||
# Reverse engineered from http://static.beeg.com/cpl/1738.js
|
||||
a = beeg_salt
|
||||
e = compat_urllib_parse_unquote(key)
|
||||
o = ''.join([
|
||||
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
|
||||
for n in range(len(e))])
|
||||
return ''.join(split(o, 3)[::-1])
|
||||
|
||||
def decrypt_url(encrypted_url):
|
||||
encrypted_url = self._proto_relative_url(
|
||||
encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
|
||||
key = self._search_regex(
|
||||
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
|
||||
if not key:
|
||||
return encrypted_url
|
||||
return encrypted_url.replace(key, decrypt_key(key))
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
@@ -108,18 +52,20 @@ class BeegIE(InfoExtractor):
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': decrypt_url(video_url),
|
||||
'url': self._proto_relative_url(
|
||||
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
|
||||
'format_id': format_id,
|
||||
'height': int(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
video_id = video.get('id') or video_id
|
||||
video_id = compat_str(video.get('id') or video_id)
|
||||
display_id = video.get('code')
|
||||
description = video.get('desc')
|
||||
series = video.get('ps_name')
|
||||
|
||||
timestamp = parse_iso8601(video.get('date'), ' ')
|
||||
timestamp = unified_timestamp(video.get('date'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||
@@ -129,6 +75,7 @@ class BeegIE(InfoExtractor):
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
|
@@ -5,7 +5,10 @@ import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urlencode_postdata
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
@@ -43,10 +46,16 @@ class BitChuteIE(InfoExtractor):
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
format_urls = []
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
format_urls.append(mobj.group('url'))
|
||||
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
|
||||
|
||||
formats = [
|
||||
{'url': mobj.group('url')}
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
|
||||
{'url': format_url}
|
||||
for format_url in orderedSet(format_urls)]
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
|
@@ -14,6 +14,7 @@ class CamModelsIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
|
||||
'only_matching': True,
|
||||
'age_limit': 18
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -93,4 +94,5 @@ class CamModelsIE(InfoExtractor):
|
||||
'title': self._live_title(user_id),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -20,6 +20,7 @@ class CamTubeIE(InfoExtractor):
|
||||
'duration': 1274,
|
||||
'timestamp': 1528018608,
|
||||
'upload_date': '20180603',
|
||||
'age_limit': 18
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -66,4 +67,5 @@ class CamTubeIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'creator': creator,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -25,6 +25,7 @@ class CamWithHerIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'uploader': 'MileenaK',
|
||||
'upload_date': '20160322',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -84,4 +85,5 @@ class CamWithHerIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
videomore_url = VideomoreIE._extract_url(webpage)
|
||||
if not videomore_url:
|
||||
videomore_id = self._search_regex(
|
||||
r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
|
||||
default=None)
|
||||
if videomore_id:
|
||||
videomore_url = 'videomore:%s' % videomore_id
|
||||
if videomore_url:
|
||||
title = self._og_search_title(webpage)
|
||||
return {
|
||||
|
@@ -1,20 +1,19 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
||||
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
|
||||
'info_dict': {
|
||||
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
||||
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starfire the Cat Lady',
|
||||
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
||||
'title': 'How to Draw Upgrade',
|
||||
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
||||
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
||||
return self._extract_cvp_info(
|
||||
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
|
||||
},
|
||||
}, {
|
||||
|
||||
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
|
||||
metadata_re = ''
|
||||
if content_re:
|
||||
metadata_re = r'|video_metadata\.content_' + content_re
|
||||
return self._search_regex(
|
||||
r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
|
||||
webpage, name, fatal=fatal)
|
||||
|
||||
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
|
||||
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, {'networkId': 'cartoonnetwork'}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': self._search_regex(
|
||||
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
|
||||
webpage, 'auth required', default='false') == 'true',
|
||||
'auth_required': find_field('authType', 'auth type') != 'unauth',
|
||||
})
|
||||
|
||||
series = find_field(
|
||||
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
|
||||
info.update({
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'series': series,
|
||||
'episode': title,
|
||||
})
|
||||
|
||||
for field in ('season', 'episode'):
|
||||
field_name = field + 'Number'
|
||||
info[field + '_number'] = int_or_none(find_field(
|
||||
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
|
||||
|
||||
return info
|
||||
|
@@ -119,11 +119,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
@@ -145,8 +141,4 @@ class CNNArticleIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'http://cnn.com/video/?/video/' + cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
|
@@ -1239,17 +1239,27 @@ class InfoExtractor(object):
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type in ('TVEpisode', 'Episode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': unescapeHTML(e.get('name')),
|
||||
'episode': episode_name,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
})
|
||||
if not info.get('title') and episode_name:
|
||||
info['title'] = episode_name
|
||||
part_of_season = e.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Movie':
|
||||
info.update({
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('dateCreated')),
|
||||
})
|
||||
elif item_type in ('Article', 'NewsArticle'):
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||
@@ -1586,6 +1596,7 @@ class InfoExtractor(object):
|
||||
# References:
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||
# 2. https://github.com/rg3/youtube-dl/issues/12211
|
||||
# 3. https://github.com/rg3/youtube-dl/issues/18923
|
||||
|
||||
# We should try extracting formats only from master playlists [1, 4.3.4],
|
||||
# i.e. playlists that describe available qualities. On the other hand
|
||||
@@ -1657,11 +1668,16 @@ class InfoExtractor(object):
|
||||
rendition = stream_group[0]
|
||||
return rendition.get('NAME') or stream_group_id
|
||||
|
||||
# parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
|
||||
# chance to detect video only formats when EXT-X-STREAM-INF tags
|
||||
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_stream_inf = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
@@ -2614,7 +2630,7 @@ class InfoExtractor(object):
|
||||
'id': this_video_id,
|
||||
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
@@ -2641,12 +2657,9 @@ class InfoExtractor(object):
|
||||
for source in jwplayer_sources_data:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = self._proto_relative_url(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
if source_url in urls:
|
||||
source_url = urljoin(
|
||||
base_url, self._proto_relative_url(source.get('file')))
|
||||
if not source_url or source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
|
@@ -48,6 +48,21 @@ class CrackleIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 480,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -95,6 +110,20 @@ class CrackleIE(InfoExtractor):
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif format_url.endswith('.ism/Manifest'):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
mfs_path = e.get('Type')
|
||||
mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
|
||||
if not mfs_info:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media.get('Description')
|
||||
|
@@ -144,7 +144,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
@@ -269,6 +269,9 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-723735',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
|
@@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
self._handle_errors(result)
|
||||
self._auth_token = result['message']['auth_token']
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
@@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
return self._extract_media_info(media)
|
||||
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}
|
||||
'playlist_mincount': 17,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/series/2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_id = self._match_id(url)
|
||||
@@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
'collections/' + collection_id, collection_id)
|
||||
entries = []
|
||||
for media in collection.get('media', []):
|
||||
entries.append(self._extract_media_info(media))
|
||||
media_id = compat_str(media.get('id'))
|
||||
entries.append(self.url_result(
|
||||
'https://curiositystream.com/video/' + media_id,
|
||||
CuriosityStreamIE.ie_key(), media_id))
|
||||
return self.playlist_result(
|
||||
entries, collection_id,
|
||||
collection.get('title'), collection.get('description'))
|
||||
|
@@ -17,16 +17,29 @@ from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocity
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
(?:www\.)?
|
||||
(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocity
|
||||
)|
|
||||
watch\.
|
||||
(?:
|
||||
hgtv|
|
||||
foodnetwork|
|
||||
travelchannel|
|
||||
diynetwork|
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
||||
@@ -71,7 +84,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
||||
'https://%s.com/anonymous' % site, display_id, query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
@@ -81,11 +94,12 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
stream = self._download_json(
|
||||
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
||||
display_id, headers={
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
})
|
||||
display_id, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
|
@@ -15,16 +15,16 @@ from ..utils import (
|
||||
class DTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
|
||||
_TEST = {
|
||||
'url': 'https://d.tube/#!/v/benswann/zqd630em',
|
||||
'md5': 'a03eaa186618ffa7a3145945543a251e',
|
||||
'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
|
||||
'md5': '9f29088fa08d699a7565ee983f56a06e',
|
||||
'info_dict': {
|
||||
'id': 'zqd630em',
|
||||
'id': 'x380jtr1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
|
||||
'description': 'md5:700d164e066b87f9eac057949e4227c2',
|
||||
'uploader_id': 'benswann',
|
||||
'upload_date': '20180222',
|
||||
'timestamp': 1519328958,
|
||||
'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
|
||||
'description': 'md5:60be222088183be3a42f196f34235776',
|
||||
'uploader_id': 'broncnutz',
|
||||
'upload_date': '20190107',
|
||||
'timestamp': 1546854054,
|
||||
},
|
||||
'params': {
|
||||
'format': '480p',
|
||||
@@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor):
|
||||
def canonical_url(h):
|
||||
if not h:
|
||||
return None
|
||||
return 'https://ipfs.io/ipfs/' + h
|
||||
return 'https://video.dtube.top/ipfs/' + h
|
||||
|
||||
formats = []
|
||||
for q in ('240', '480', '720', '1080', ''):
|
||||
|
@@ -411,6 +411,7 @@ from .funk import (
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
@@ -451,6 +452,7 @@ from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hketv import HKETVIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
@@ -469,6 +471,10 @@ from .hrti import (
|
||||
)
|
||||
from .huajiao import HuajiaoIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
from .ign import (
|
||||
@@ -483,12 +489,17 @@ from .imdb import (
|
||||
from .imgur import (
|
||||
ImgurIE,
|
||||
ImgurAlbumIE,
|
||||
ImgurGalleryIE,
|
||||
)
|
||||
from .ina import InaIE
|
||||
from .inc import IncIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .instagram import (
|
||||
InstagramIE,
|
||||
InstagramUserIE,
|
||||
InstagramTagIE,
|
||||
)
|
||||
from .internazionale import InternazionaleIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
@@ -553,6 +564,11 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lecturio import (
|
||||
LecturioIE,
|
||||
LecturioCourseIE,
|
||||
LecturioDeCourseIE,
|
||||
)
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
@@ -676,11 +692,7 @@ from .myvi import (
|
||||
MyviEmbedIE,
|
||||
)
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicVideoIE,
|
||||
NationalGeographicIE,
|
||||
NationalGeographicEpisodeGuideIE,
|
||||
)
|
||||
from .nationalgeographic import NationalGeographicVideoIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
@@ -822,6 +834,7 @@ from .orf import (
|
||||
ORFOE1IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .outsidetv import OutsideTVIE
|
||||
from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
@@ -850,6 +863,7 @@ from .piksel import PikselIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
@@ -1086,6 +1100,10 @@ from .tass import TassIE
|
||||
from .tastytrade import TastyTradeIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
TeachableIE,
|
||||
TeachableCourseIE,
|
||||
)
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
@@ -1183,7 +1201,9 @@ from .tvnet import TVNetIE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvnow import (
|
||||
TVNowIE,
|
||||
TVNowListIE,
|
||||
TVNowNewIE,
|
||||
TVNowSeasonIE,
|
||||
TVNowAnnualIE,
|
||||
TVNowShowIE,
|
||||
)
|
||||
from .tvp import (
|
||||
@@ -1235,10 +1255,6 @@ from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .upskill import (
|
||||
UpskillIE,
|
||||
UpskillCourseIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usanetwork import USANetworkIE
|
||||
|
@@ -1,11 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# import json
|
||||
# import uuid
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FOXIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:fox\.com|nationalgeographic\.com/tv)/watch/(?P<id>[\da-fA-F]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
|
||||
@@ -43,41 +43,47 @@ class FOXIE(AdobePassIE):
|
||||
# episode, geo-restricted, tv provided required
|
||||
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# _access_token = None
|
||||
|
||||
# def _call_api(self, path, video_id, data=None):
|
||||
# headers = {
|
||||
# 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd',
|
||||
# }
|
||||
# if self._access_token:
|
||||
# headers['Authorization'] = 'Bearer ' + self._access_token
|
||||
# return self._download_json(
|
||||
# 'https://api2.fox.com/v2.0/' + path, video_id, data=data, headers=headers)
|
||||
|
||||
# def _real_initialize(self):
|
||||
# self._access_token = self._call_api(
|
||||
# 'login', None, json.dumps({
|
||||
# 'deviceId': compat_str(uuid.uuid4()),
|
||||
# }).encode())['accessToken']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
|
||||
'https://api.fox.com/fbc-content/v1_5/video/%s' % video_id,
|
||||
video_id, headers={
|
||||
'apikey': 'abdcbed02c124d393b39e818a4312055',
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': url,
|
||||
})
|
||||
# video = self._call_api('vodplayer/' + video_id, video_id)
|
||||
|
||||
title = video['name']
|
||||
release_url = video['videoRelease']['url']
|
||||
|
||||
description = video.get('description')
|
||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||
video.get('duration')) or parse_duration(video.get('duration'))
|
||||
timestamp = unified_timestamp(video.get('datePublished'))
|
||||
rating = video.get('contentRating')
|
||||
age_limit = parse_age_limit(rating)
|
||||
# release_url = video['url']
|
||||
|
||||
data = try_get(
|
||||
video, lambda x: x['trackingData']['properties'], dict) or {}
|
||||
|
||||
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||
|
||||
series = video.get('seriesName') or data.get(
|
||||
'seriesName') or data.get('show')
|
||||
season_number = int_or_none(video.get('seasonNumber'))
|
||||
episode = video.get('name')
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
release_year = int_or_none(video.get('releaseYear'))
|
||||
|
||||
rating = video.get('contentRating')
|
||||
if data.get('authRequired'):
|
||||
resource = self._get_mvpd_resource(
|
||||
'fbc-fox', title, video.get('guid'), rating)
|
||||
@@ -86,6 +92,18 @@ class FOXIE(AdobePassIE):
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, video_id, 'fbc-fox', resource)
|
||||
})
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||
video.get('duration')) or parse_duration(video.get('duration'))
|
||||
timestamp = unified_timestamp(video.get('datePublished'))
|
||||
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||
series = video.get('seriesName') or data.get(
|
||||
'seriesName') or data.get('show')
|
||||
|
||||
subtitles = {}
|
||||
for doc_rel in video.get('documentReleases', []):
|
||||
@@ -98,36 +116,19 @@ class FOXIE(AdobePassIE):
|
||||
}]
|
||||
break
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'description': video.get('description'),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'creator': creator,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'release_year': release_year,
|
||||
'season_number': int_or_none(video.get('seasonNumber')),
|
||||
'episode': video.get('name'),
|
||||
'episode_number': int_or_none(video.get('episodeNumber')),
|
||||
'release_year': int_or_none(video.get('releaseYear')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
urlh = self._request_webpage(HEADRequest(release_url), video_id)
|
||||
video_url = compat_str(urlh.geturl())
|
||||
|
||||
if UplynkPreplayIE.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'ie_key': UplynkPreplayIE.ie_key(),
|
||||
})
|
||||
else:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class FreespeechIE(InfoExtractor):
|
||||
@@ -27,8 +28,4 @@ class FreespeechIE(InfoExtractor):
|
||||
r'data-video-url="([^"]+)"',
|
||||
webpage, 'youtube url')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
return self.url_result(youtube_url, YoutubeIE.ie_key())
|
||||
|
@@ -1,6 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
@@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor):
|
||||
|
||||
video_id = title_data.get('id') or self._search_regex([
|
||||
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
||||
r'<iframe[^>]+src="/player/(\d+)"',
|
||||
r'<iframe[^>]+src="/player/(\d+)',
|
||||
], webpage, 'video_id', default=None)
|
||||
if not video_id:
|
||||
player_url = self._html_search_meta([
|
||||
@@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor):
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = 'Token %s' % self._TOKEN
|
||||
sources = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
||||
video_id, headers=headers)['items']
|
||||
'https://www.funimation.com/api/showexperience/%s/' % video_id,
|
||||
video_id, headers=headers, query={
|
||||
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
|
||||
})['items']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||
|
98
youtube_dl/extractor/gaia.py
Normal file
98
youtube_dl/extractor/gaia.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class GaiaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
|
||||
'info_dict': {
|
||||
'id': '89356',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connecting with Universal Consciousness',
|
||||
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
|
||||
'upload_date': '20151116',
|
||||
'timestamp': 1447707266,
|
||||
'duration': 936,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
|
||||
'info_dict': {
|
||||
'id': '89351',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connecting with Universal Consciousness',
|
||||
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
|
||||
'upload_date': '20151116',
|
||||
'timestamp': 1447707266,
|
||||
'duration': 53,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, vtype = re.search(self._VALID_URL, url).groups()
|
||||
node_id = self._download_json(
|
||||
'https://brooklyn.gaia.com/pathinfo', display_id, query={
|
||||
'path': 'video/' + display_id,
|
||||
})['id']
|
||||
node = self._download_json(
|
||||
'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
|
||||
vdata = node[vtype]
|
||||
media_id = compat_str(vdata['nid'])
|
||||
title = node['title']
|
||||
|
||||
media = self._download_json(
|
||||
'https://brooklyn.gaia.com/media/' + media_id, media_id)
|
||||
formats = self._extract_m3u8_formats(
|
||||
media['mediaUrls']['bcHLS'], media_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
text_tracks = media.get('textTracks', {})
|
||||
for key in ('captions', 'subtitles'):
|
||||
for lang, sub_url in text_tracks.get(key, {}).items():
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
fivestar = node.get('fivestar', {})
|
||||
fields = node.get('fields', {})
|
||||
|
||||
def get_field_value(key, value_key='value'):
|
||||
return try_get(fields, lambda x: x[key][0][value_key])
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
|
||||
'timestamp': int_or_none(node.get('created')),
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(vdata.get('duration')),
|
||||
'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
|
||||
'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
|
||||
'comment_count': int_or_none(node.get('comment_count')),
|
||||
'series': try_get(node, lambda x: x['series']['title'], compat_str),
|
||||
'season_number': int_or_none(get_field_value('season')),
|
||||
'season_id': str_or_none(get_field_value('series_nid', 'nid')),
|
||||
'episode_number': int_or_none(get_field_value('episode')),
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GameSpotIE(OnceIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||
@@ -41,6 +41,9 @@ class GameSpotIE(OnceIE):
|
||||
}, {
|
||||
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -109,6 +109,7 @@ from .vice import ViceIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .peertube import PeerTubeIE
|
||||
from .teachable import TeachableIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
@@ -2196,10 +2197,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': self.http_scheme() + url,
|
||||
}
|
||||
return self.url_result(self.http_scheme() + url)
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
@@ -3112,6 +3110,10 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||
|
||||
teachable_url = TeachableIE._extract_url(webpage, url)
|
||||
if teachable_url:
|
||||
return self.url_result(teachable_url)
|
||||
|
||||
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
||||
if indavideo_urls:
|
||||
return self.playlist_from_matches(
|
||||
|
@@ -72,7 +72,7 @@ class GloboIE(InfoExtractor):
|
||||
return
|
||||
|
||||
try:
|
||||
self._download_json(
|
||||
glb_id = (self._download_json(
|
||||
'https://login.globo.com/api/authentication', None, data=json.dumps({
|
||||
'payload': {
|
||||
'email': email,
|
||||
@@ -81,7 +81,9 @@ class GloboIE(InfoExtractor):
|
||||
},
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
})
|
||||
}) or {}).get('glbId')
|
||||
if glb_id:
|
||||
self._set_cookie('.globo.com', 'GLBID', glb_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
resp = self._parse_json(e.cause.read(), None)
|
||||
|
191
youtube_dl/extractor/hketv.py
Normal file
191
youtube_dl/extractor/hketv.py
Normal file
@@ -0,0 +1,191 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class HKETVIE(InfoExtractor):
|
||||
IE_NAME = 'hketv'
|
||||
IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['HK']
|
||||
_VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hkedcity.net/etv/resource/2932360618',
|
||||
'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
|
||||
'info_dict': {
|
||||
'id': '2932360618',
|
||||
'ext': 'mp4',
|
||||
'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
|
||||
'description': 'md5:d5286d05219ef50e0613311cbe96e560',
|
||||
'upload_date': '20181024',
|
||||
'duration': 900,
|
||||
'subtitles': 'count:2',
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}, {
|
||||
'url': 'https://www.hkedcity.net/etv/resource/972641418',
|
||||
'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
|
||||
'info_dict': {
|
||||
'id': '972641418',
|
||||
'ext': 'mp4',
|
||||
'title': '衣冠楚楚 (天使系列之一)',
|
||||
'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
|
||||
'upload_date': '20070109',
|
||||
'duration': 907,
|
||||
'subtitles': {},
|
||||
},
|
||||
'params': {
|
||||
'geo_verification_proxy': '<HK proxy here>',
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}]
|
||||
|
||||
_CC_LANGS = {
|
||||
'中文(繁體中文)': 'zh-Hant',
|
||||
'中文(简体中文)': 'zh-Hans',
|
||||
'English': 'en',
|
||||
'Bahasa Indonesia': 'id',
|
||||
'\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
|
||||
'\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
|
||||
'Tagalog': 'tl',
|
||||
'\u0e44\u0e17\u0e22': 'th',
|
||||
'\u0627\u0631\u062f\u0648': 'ur',
|
||||
}
|
||||
_FORMAT_HEIGHTS = {
|
||||
'SD': 360,
|
||||
'HD': 720,
|
||||
}
|
||||
_APPS_BASE_URL = 'https://apps.hkedcity.net'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = (
|
||||
self._html_search_meta(
|
||||
('ed_title', 'search.ed_title'), webpage, default=None) or
|
||||
self._search_regex(
|
||||
r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'title', default=None, group='url') or
|
||||
self._html_search_regex(
|
||||
r'<h1>([^<]+)</h1>', webpage, 'title', default=None) or
|
||||
self._og_search_title(webpage)
|
||||
)
|
||||
|
||||
file_id = self._search_regex(
|
||||
r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
|
||||
webpage, 'file ID')
|
||||
curr_url = self._search_regex(
|
||||
r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
|
||||
webpage, 'curr URL')
|
||||
data = {
|
||||
'action': 'get_info',
|
||||
'curr_url': curr_url,
|
||||
'file_id': file_id,
|
||||
'video_url': file_id,
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
self._APPS_BASE_URL + '/media/play/handler.php', video_id,
|
||||
data=urlencode_postdata(data),
|
||||
headers=merge_dicts({
|
||||
'Content-Type': 'application/x-www-form-urlencoded'},
|
||||
self.geo_verification_headers()))
|
||||
|
||||
result = response['result']
|
||||
|
||||
if not response.get('success') or not response.get('access'):
|
||||
error = clean_html(response.get('access_err_msg'))
|
||||
if 'Video streaming is not available in your country' in error:
|
||||
self.raise_geo_restricted(
|
||||
msg=error, countries=self._GEO_COUNTRIES)
|
||||
else:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
formats = []
|
||||
|
||||
width = int_or_none(result.get('width'))
|
||||
height = int_or_none(result.get('height'))
|
||||
|
||||
playlist0 = result['playlist'][0]
|
||||
for fmt in playlist0['sources']:
|
||||
file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
|
||||
if not file_url:
|
||||
continue
|
||||
# If we ever wanted to provide the final resolved URL that
|
||||
# does not require cookies, albeit with a shorter lifespan:
|
||||
# urlh = self._downloader.urlopen(file_url)
|
||||
# resolved_url = urlh.geturl()
|
||||
label = fmt.get('label')
|
||||
h = self._FORMAT_HEIGHTS.get(label)
|
||||
w = h * width // height if h and width and height else None
|
||||
formats.append({
|
||||
'format_id': label,
|
||||
'ext': fmt.get('type'),
|
||||
'url': file_url,
|
||||
'width': w,
|
||||
'height': h,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
track_kind = str_or_none(track.get('kind'))
|
||||
if not track_kind or not isinstance(track_kind, compat_str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
track_label = track.get('label')
|
||||
subtitles.setdefault(self._CC_LANGS.get(
|
||||
track_label, track_label), []).append({
|
||||
'url': self._proto_relative_url(track_url),
|
||||
'ext': 'srt',
|
||||
})
|
||||
|
||||
# Likes
|
||||
emotion = self._download_json(
|
||||
'https://emocounter.hkedcity.net/handler.php', video_id,
|
||||
data=urlencode_postdata({
|
||||
'action': 'get_emotion',
|
||||
'data[bucket_id]': 'etv',
|
||||
'data[identifier]': video_id,
|
||||
}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'},
|
||||
fatal=False) or {}
|
||||
like_count = int_or_none(try_get(
|
||||
emotion, lambda x: x['data']['emotion_data'][0]['count']))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
'description', webpage, fatal=False),
|
||||
'upload_date': unified_strdate(self._html_search_meta(
|
||||
'ed_date', webpage, fatal=False), day_first=False),
|
||||
'duration': int_or_none(result.get('length')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
|
||||
'view_count': parse_count(result.get('view_count')),
|
||||
'like_count': like_count,
|
||||
}
|
@@ -79,7 +79,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
||||
webpage, 'app state'), video_id)
|
||||
video_data = {}
|
||||
getters = (
|
||||
getters = list(
|
||||
lambda x, k=k: x['initialState']['content%s' % k]['content']
|
||||
for k in ('Data', 'Detail')
|
||||
)
|
||||
@@ -87,6 +87,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
content = try_get(v, getters, dict)
|
||||
if content and content.get('contentId') == video_id:
|
||||
video_data = content
|
||||
break
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
|
117
youtube_dl/extractor/hungama.py
Normal file
117
youtube_dl/extractor/hungama.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HungamaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?hungama\.com/
|
||||
(?:
|
||||
(?:video|movie)/[^/]+/|
|
||||
tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
|
||||
'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
|
||||
'info_dict': {
|
||||
'id': '2931166',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lucky Ali - Kitni Haseen Zindagi',
|
||||
'track': 'Kitni Haseen Zindagi',
|
||||
'artist': 'Lucky Ali',
|
||||
'album': 'Aks',
|
||||
'release_year': 2000,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id)
|
||||
|
||||
m3u8_url = self._download_json(
|
||||
'https://www.hungama.com/index.php', video_id,
|
||||
data=urlencode_postdata({'content_id': video_id}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, query={
|
||||
'c': 'common',
|
||||
'm': 'get_video_mdn_url',
|
||||
})['stream_url']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class HungamaSongIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
|
||||
'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
|
||||
'info_dict': {
|
||||
'id': '2931166',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lucky Ali - Kitni Haseen Zindagi',
|
||||
'track': 'Kitni Haseen Zindagi',
|
||||
'artist': 'Lucky Ali',
|
||||
'album': 'Aks',
|
||||
'release_year': 2000,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
|
||||
audio_id, query={'_country': 'IN'})[0]
|
||||
|
||||
track = data['song_name']
|
||||
artist = data.get('singer_name')
|
||||
|
||||
m3u8_url = self._download_json(
|
||||
data.get('file') or data['preview_link'],
|
||||
audio_id)['response']['media_url']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
thumbnail = data.get('img_src') or data.get('album_image')
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'track': track,
|
||||
'artist': artist,
|
||||
'album': data.get('album_name'),
|
||||
'release_year': int_or_none(data.get('date')),
|
||||
'formats': formats,
|
||||
}
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
@@ -20,28 +20,9 @@ class ImgurIE(InfoExtractor):
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||
'description': 'Imgur: The magic of the Internet',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||
'description': 'Imgur: The magic of the Internet',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://imgur.com/gallery/YcAQlkx',
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||
@@ -50,8 +31,8 @@ class ImgurIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
|
||||
webpage = self._download_webpage(gifv_url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
|
||||
|
||||
width = int_or_none(self._og_search_property(
|
||||
'video:width', webpage, default=None))
|
||||
@@ -72,7 +53,6 @@ class ImgurIE(InfoExtractor):
|
||||
'format_id': m.group('type').partition('/')[2],
|
||||
'url': self._proto_relative_url(m.group('src')),
|
||||
'ext': mimetype2ext(m.group('type')),
|
||||
'acodec': 'none',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'http_headers': {
|
||||
@@ -107,44 +87,64 @@ class ImgurIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'title': self._og_search_title(webpage),
|
||||
}
|
||||
|
||||
|
||||
class ImgurAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
|
||||
class ImgurGalleryIE(InfoExtractor):
|
||||
IE_NAME = 'imgur:gallery'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/gallery/Q95ko',
|
||||
'info_dict': {
|
||||
'id': 'Q95ko',
|
||||
'title': 'Adding faces make every GIF better',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
}, {
|
||||
'url': 'http://imgur.com/a/j6Orj',
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'url': 'https://imgur.com/gallery/YcAQlkx',
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
gallery_id = self._match_id(url)
|
||||
|
||||
album_images = self._download_json(
|
||||
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
|
||||
album_id, fatal=False)
|
||||
data = self._download_json(
|
||||
'https://imgur.com/gallery/%s.json' % gallery_id,
|
||||
gallery_id)['data']['image']
|
||||
|
||||
if album_images:
|
||||
data = album_images.get('data')
|
||||
if data and isinstance(data, dict):
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, list):
|
||||
entries = [
|
||||
self.url_result('http://imgur.com/%s' % image['hash'])
|
||||
for image in images if image.get('hash')]
|
||||
return self.playlist_result(entries, album_id)
|
||||
if data.get('is_album'):
|
||||
entries = [
|
||||
self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
|
||||
for image in data['album_images']['images'] if image.get('hash')]
|
||||
return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
|
||||
|
||||
# Fallback to single video
|
||||
return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())
|
||||
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
|
||||
|
||||
|
||||
class ImgurAlbumIE(ImgurGalleryIE):
|
||||
IE_NAME = 'imgur:album'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/a/j6Orj',
|
||||
'info_dict': {
|
||||
'id': 'j6Orj',
|
||||
'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}]
|
||||
|
@@ -227,44 +227,37 @@ class InstagramIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class InstagramUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
|
||||
IE_DESC = 'Instagram user profile'
|
||||
IE_NAME = 'instagram:user'
|
||||
_TEST = {
|
||||
'url': 'https://instagram.com/porsche',
|
||||
'info_dict': {
|
||||
'id': 'porsche',
|
||||
'title': 'porsche',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
'params': {
|
||||
'extract_flat': True,
|
||||
'skip_download': True,
|
||||
'playlistend': 5,
|
||||
}
|
||||
}
|
||||
class InstagramPlaylistIE(InfoExtractor):
|
||||
# A superclass for handling any kind of query based on GraphQL which
|
||||
# results in a playlist.
|
||||
|
||||
_gis_tmpl = None
|
||||
_gis_tmpl = None # used to cache GIS request type
|
||||
|
||||
def _entries(self, data):
|
||||
def _parse_graphql(self, webpage, item_id):
|
||||
# Reads a webpage and returns its GraphQL data.
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
|
||||
item_id)
|
||||
|
||||
def _extract_graphql(self, data, url):
|
||||
# Parses GraphQL queries containing videos and generates a playlist.
|
||||
def get_count(suffix):
|
||||
return int_or_none(try_get(
|
||||
node, lambda x: x['edge_media_' + suffix]['count']))
|
||||
|
||||
uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||
uploader_id = self._match_id(url)
|
||||
csrf_token = data['config']['csrf_token']
|
||||
rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
|
||||
|
||||
self._set_cookie('instagram.com', 'ig_pr', '1')
|
||||
|
||||
cursor = ''
|
||||
for page_num in itertools.count(1):
|
||||
variables = json.dumps({
|
||||
'id': uploader_id,
|
||||
variables = {
|
||||
'first': 12,
|
||||
'after': cursor,
|
||||
})
|
||||
}
|
||||
variables.update(self._query_vars_for(data))
|
||||
variables = json.dumps(variables)
|
||||
|
||||
if self._gis_tmpl:
|
||||
gis_tmpls = [self._gis_tmpl]
|
||||
@@ -276,21 +269,26 @@ class InstagramUserIE(InfoExtractor):
|
||||
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
|
||||
]
|
||||
|
||||
# try all of the ways to generate a GIS query, and not only use the
|
||||
# first one that works, but cache it for future requests
|
||||
for gis_tmpl in gis_tmpls:
|
||||
try:
|
||||
media = self._download_json(
|
||||
json_data = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', uploader_id,
|
||||
'Downloading JSON page %d' % page_num, headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-Instagram-GIS': hashlib.md5(
|
||||
('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
|
||||
}, query={
|
||||
'query_hash': '42323d64886122307be10013ad2dcc44',
|
||||
'query_hash': self._QUERY_HASH,
|
||||
'variables': variables,
|
||||
})['data']['user']['edge_owner_to_timeline_media']
|
||||
})
|
||||
media = self._parse_timeline_from(json_data)
|
||||
self._gis_tmpl = gis_tmpl
|
||||
break
|
||||
except ExtractorError as e:
|
||||
# if it's an error caused by a bad query, and there are
|
||||
# more GIS templates to try, ignore it and keep trying
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if gis_tmpl != gis_tmpls[-1]:
|
||||
continue
|
||||
@@ -348,14 +346,80 @@ class InstagramUserIE(InfoExtractor):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
user_or_tag = self._match_id(url)
|
||||
webpage = self._download_webpage(url, user_or_tag)
|
||||
data = self._parse_graphql(webpage, user_or_tag)
|
||||
|
||||
webpage = self._download_webpage(url, username)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
|
||||
username)
|
||||
self._set_cookie('instagram.com', 'ig_pr', '1')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), username, username)
|
||||
self._extract_graphql(data, url), user_or_tag, user_or_tag)
|
||||
|
||||
|
||||
class InstagramUserIE(InstagramPlaylistIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
|
||||
IE_DESC = 'Instagram user profile'
|
||||
IE_NAME = 'instagram:user'
|
||||
_TEST = {
|
||||
'url': 'https://instagram.com/porsche',
|
||||
'info_dict': {
|
||||
'id': 'porsche',
|
||||
'title': 'porsche',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
'params': {
|
||||
'extract_flat': True,
|
||||
'skip_download': True,
|
||||
'playlistend': 5,
|
||||
}
|
||||
}
|
||||
|
||||
_QUERY_HASH = '42323d64886122307be10013ad2dcc44',
|
||||
|
||||
@staticmethod
|
||||
def _parse_timeline_from(data):
|
||||
# extracts the media timeline data from a GraphQL result
|
||||
return data['data']['user']['edge_owner_to_timeline_media']
|
||||
|
||||
@staticmethod
|
||||
def _query_vars_for(data):
|
||||
# returns a dictionary of variables to add to the timeline query based
|
||||
# on the GraphQL of the original page
|
||||
return {
|
||||
'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||
}
|
||||
|
||||
|
||||
class InstagramTagIE(InstagramPlaylistIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)'
|
||||
IE_DESC = 'Instagram hashtag search'
|
||||
IE_NAME = 'instagram:tag'
|
||||
_TEST = {
|
||||
'url': 'https://instagram.com/explore/tags/lolcats',
|
||||
'info_dict': {
|
||||
'id': 'lolcats',
|
||||
'title': 'lolcats',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'params': {
|
||||
'extract_flat': True,
|
||||
'skip_download': True,
|
||||
'playlistend': 50,
|
||||
}
|
||||
}
|
||||
|
||||
_QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314',
|
||||
|
||||
@staticmethod
|
||||
def _parse_timeline_from(data):
|
||||
# extracts the media timeline data from a GraphQL result
|
||||
return data['data']['hashtag']['edge_hashtag_to_media']
|
||||
|
||||
@staticmethod
|
||||
def _query_vars_for(data):
|
||||
# returns a dictionary of variables to add to the timeline query based
|
||||
# on the GraphQL of the original page
|
||||
return {
|
||||
'tag_name':
|
||||
data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
|
||||
}
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
@@ -41,6 +41,24 @@ class IPrimaIE(InfoExtractor):
|
||||
# iframe prima.iprima.cz
|
||||
'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.iprima.cz/filmy/desne-rande',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -7,8 +7,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TESTS = [{
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,10 @@ class JWPlatformIE(InfoExtractor):
|
||||
'upload_date': '20081127',
|
||||
'timestamp': 1227796140,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
@@ -34,5 +37,5 @@ class JWPlatformIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
|
||||
json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
|
||||
return self._parse_jwplayer_data(json_data, video_id)
|
||||
|
229
youtube_dl/extractor/lecturio.py
Normal file
229
youtube_dl/extractor/lecturio.py
Normal file
@@ -0,0 +1,229 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class LecturioBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://app.lecturio.com/en/login'
|
||||
_NETRC_MACHINE = 'lecturio'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
# Sets some cookies
|
||||
_, urlh = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(url_handle):
|
||||
return self._LOGIN_URL not in compat_str(url_handle.geturl())
|
||||
|
||||
# Already logged in
|
||||
if is_logged(urlh):
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'signin[email]': username,
|
||||
'signin[password]': password,
|
||||
'signin[remember]': 'on',
|
||||
}
|
||||
|
||||
response, urlh = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
# Logged in successfully
|
||||
if is_logged(urlh):
|
||||
return
|
||||
|
||||
errors = self._html_search_regex(
|
||||
r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
|
||||
'errors', default=None)
|
||||
if errors:
|
||||
raise ExtractorError('Unable to login: %s' % errors, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class LecturioIE(LecturioBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https://
|
||||
(?:
|
||||
app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
|
||||
'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870',
|
||||
'info_dict': {
|
||||
'id': '39634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Important Concepts and Terms – Introduction to Microbiology',
|
||||
},
|
||||
'skip': 'Requires lecturio account credentials',
|
||||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_CC_LANGS = {
|
||||
'German': 'de',
|
||||
'English': 'en',
|
||||
'Spanish': 'es',
|
||||
'French': 'fr',
|
||||
'Polish': 'pl',
|
||||
'Russian': 'ru',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id') or mobj.group('id_de')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://app.lecturio.com/en/lecture/%s/player.html' % display_id,
|
||||
display_id)
|
||||
|
||||
lecture_id = self._search_regex(
|
||||
r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id')
|
||||
|
||||
api_url = self._search_regex(
|
||||
r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'api url', group='url')
|
||||
|
||||
video = self._download_json(api_url, display_id)
|
||||
|
||||
title = video['title'].strip()
|
||||
|
||||
formats = []
|
||||
for format_ in video['content']['media']:
|
||||
if not isinstance(format_, dict):
|
||||
continue
|
||||
file_ = format_.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
if ext == 'smil':
|
||||
# smil contains only broken RTMP formats anyway
|
||||
continue
|
||||
file_url = url_or_none(file_)
|
||||
if not file_url:
|
||||
continue
|
||||
label = str_or_none(format_.get('label'))
|
||||
filesize = int_or_none(format_.get('fileSize'))
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': label,
|
||||
'filesize': float_or_none(filesize, invscale=1000)
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
cc = self._parse_json(
|
||||
self._search_regex(
|
||||
r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles',
|
||||
default='{}'), display_id, fatal=False)
|
||||
for cc_label, cc_url in cc.items():
|
||||
cc_url = url_or_none(cc_url)
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = self._search_regex(
|
||||
r'/([a-z]{2})_', cc_url, 'lang',
|
||||
default=cc_label.split()[0] if cc_label else 'en')
|
||||
original_lang = self._search_regex(
|
||||
r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
|
||||
default=None)
|
||||
sub_dict = (automatic_captions
|
||||
if 'auto-translated' in cc_label or original_lang
|
||||
else subtitles)
|
||||
sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': lecture_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
}
|
||||
|
||||
|
||||
class LecturioCourseIE(LecturioBaseIE):
|
||||
_VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course'
|
||||
_TEST = {
|
||||
'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
|
||||
'info_dict': {
|
||||
'id': 'microbiology-introduction',
|
||||
'title': 'Microbiology: Introduction',
|
||||
},
|
||||
'playlist_count': 45,
|
||||
'skip': 'Requires lecturio account credentials',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>',
|
||||
webpage):
|
||||
params = extract_attributes(mobj.group(0))
|
||||
lecture_url = urljoin(url, params.get('data-url'))
|
||||
lecture_id = params.get('data-id')
|
||||
entries.append(self.url_result(
|
||||
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage,
|
||||
'title', default=None)
|
||||
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
|
||||
|
||||
class LecturioDeCourseIE(LecturioBaseIE):
|
||||
_VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
|
||||
_TEST = {
|
||||
'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
|
||||
webpage):
|
||||
lecture_url = urljoin(url, mobj.group('url'))
|
||||
lecture_id = mobj.group('id')
|
||||
entries.append(self.url_result(
|
||||
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
|
||||
|
||||
return self.playlist_result(entries, display_id, title)
|
@@ -87,7 +87,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
|
||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -120,13 +120,27 @@ class LiveLeakIE(InfoExtractor):
|
||||
}
|
||||
|
||||
for idx, info_dict in enumerate(entries):
|
||||
formats = []
|
||||
for a_format in info_dict['formats']:
|
||||
if not a_format.get('height'):
|
||||
a_format['height'] = int_or_none(self._search_regex(
|
||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||
default=None))
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(info_dict['formats'])
|
||||
# Removing '.*.mp4' gives the raw video, which is essentially
|
||||
# the same video without the LiveLeak logo at the top (see
|
||||
# https://github.com/rg3/youtube-dl/pull/4768)
|
||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
||||
if a_format['url'] != orig_url:
|
||||
format_id = a_format.get('format_id')
|
||||
formats.append({
|
||||
'format_id': 'original' + ('-' + format_id if format_id else ''),
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
|
||||
# Don't append entry ID for one-video pages to keep backward compatibility
|
||||
if len(entries) > 1:
|
||||
@@ -146,7 +160,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
|
||||
|
||||
class LiveLeakEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
|
||||
|
||||
# See generic.py for actual test cases
|
||||
_TESTS = [{
|
||||
@@ -158,15 +172,14 @@ class LiveLeakEmbedIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
kind, video_id = mobj.group('kind', 'id')
|
||||
kind, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if kind == 'f':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
liveleak_url = self._search_regex(
|
||||
r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||
webpage, 'LiveLeak URL', group='url')
|
||||
elif kind == 'i':
|
||||
liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
|
||||
else:
|
||||
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
|
||||
|
||||
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
||||
|
@@ -363,7 +363,4 @@ class LivestreamShortenerIE(InfoExtractor):
|
||||
id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, id)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': self._og_search_url(webpage),
|
||||
}
|
||||
return self.url_result(self._og_search_url(webpage))
|
||||
|
@@ -2,12 +2,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class ManyVidsIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# preview video
|
||||
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||
'info_dict': {
|
||||
@@ -17,7 +23,18 @@ class ManyVidsIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
|
||||
'info_dict': {
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -28,12 +45,41 @@ class ManyVidsIE(InfoExtractor):
|
||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video URL', group='url')
|
||||
|
||||
title = '%s (Preview)' % self._html_search_regex(
|
||||
r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
|
||||
title = self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True)
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
|
||||
mv_token = self._search_regex(
|
||||
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'mv token', default=None, group='value')
|
||||
|
||||
if mv_token:
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, fatal=False, data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||
'view count', default=None))
|
||||
|
||||
@@ -42,7 +88,5 @@ class ManyVidsIE(InfoExtractor):
|
||||
'title': title,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
}],
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -21,7 +21,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MediasiteIE(InfoExtractor):
|
||||
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
||||
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
|
||||
@@ -84,7 +84,15 @@ class MediasiteIE(InfoExtractor):
|
||||
'timestamp': 1333983600,
|
||||
'duration': 7794,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
|
||||
|
@@ -1,15 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
update_url_query,
|
||||
get_element_by_class,
|
||||
)
|
||||
|
||||
|
||||
@@ -64,132 +58,3 @@ class NationalGeographicVideoIE(InfoExtractor):
|
||||
{'force_smil_url': True}),
|
||||
'id': guid,
|
||||
}
|
||||
|
||||
|
||||
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
|
||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
||||
'info_dict': {
|
||||
'id': 'vKInpacll2pC',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uncovering a Universal Knowledge',
|
||||
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
|
||||
'timestamp': 1458680907,
|
||||
'upload_date': '20160322',
|
||||
'uploader': 'NEWA-FNG-NGTV',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
|
||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
||||
'info_dict': {
|
||||
'id': 'Pok5lWCkiEFA',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Stunning Red Bird of Paradise',
|
||||
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
|
||||
'timestamp': 1459362152,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NEWA-FNG-NGTV',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
release_url = self._search_regex(
|
||||
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
|
||||
webpage, 'release url')
|
||||
theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
|
||||
video_id = theplatform_path.split('/')[-1]
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
|
||||
if is_auth == 'auth':
|
||||
auth_resource_id = self._search_regex(
|
||||
r"video_auth_resourceId\s*=\s*'([^']+)'",
|
||||
webpage, 'auth resource id')
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id)
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for key, value in (('switch', 'http'), ('manifest', 'm3u')):
|
||||
tp_query = query.copy()
|
||||
tp_query.update({
|
||||
key: value,
|
||||
})
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._extract_theplatform_metadata(theplatform_path, display_id)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class NationalGeographicEpisodeGuideIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo:episodeguide'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/',
|
||||
'info_dict': {
|
||||
'id': 'the-story-of-god-with-morgan-freeman-season-1',
|
||||
'title': 'The Story of God with Morgan Freeman - Season 1',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2',
|
||||
'info_dict': {
|
||||
'id': 'underworld-inc-season-2',
|
||||
'title': 'Underworld, Inc. - Season 2',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
show = get_element_by_class('show', webpage)
|
||||
selected_season = self._search_regex(
|
||||
r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>',
|
||||
webpage, 'selected season')
|
||||
entries = [
|
||||
self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic')
|
||||
for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)]
|
||||
return self.playlist_result(
|
||||
entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')),
|
||||
'%s - %s' % (show, selected_season))
|
||||
|
@@ -363,7 +363,7 @@ class NPOIE(NPOBaseIE):
|
||||
|
||||
class NPOLiveIE(NPOBaseIE):
|
||||
IE_NAME = 'npo.nl:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.npo.nl/live/npo-1',
|
||||
@@ -380,6 +380,9 @@ class NPOLiveIE(NPOBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.npo.nl/live',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.npostart.nl/live/npo-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -211,13 +211,13 @@ class NRKIE(NRKBaseIE):
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': '2f7f6eeb2aacdd99885f355428715cfa',
|
||||
'md5': '706f34cdf1322577589e369e522b50ef',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 263,
|
||||
'duration': 262,
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
@@ -248,7 +248,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:tv|radio)\.nrk(?:super)?\.no/
|
||||
(?:serie/[^/]+|program)/
|
||||
(?:serie(?:/[^/]+){1,2}|program)/
|
||||
(?![Ee]pisodes)%s
|
||||
(?:/\d{2}-\d{2}-\d{4})?
|
||||
(?:\#del=(?P<part_id>\d+))?
|
||||
@@ -256,14 +256,14 @@ class NRKTVIE(NRKBaseIE):
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
||||
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål - TV',
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
}, {
|
||||
@@ -301,7 +301,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 772,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
@@ -314,7 +314,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
'id': 'MSPO40010515BH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 6175,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
@@ -326,7 +326,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
}, {
|
||||
@@ -362,6 +362,9 @@ class NRKTVIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
@@ -403,21 +406,35 @@ class NRKTVSerieBaseIE(InfoExtractor):
|
||||
def _extract_series(self, webpage, display_id, fatal=True):
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
|
||||
default='{}' if not fatal else NO_DEFAULT),
|
||||
(r'INITIAL_DATA_*\s*=\s*({.+?})\s*;',
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
|
||||
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
|
||||
display_id, fatal=False)
|
||||
if not config:
|
||||
return
|
||||
return try_get(config, lambda x: x['series'], dict)
|
||||
return try_get(
|
||||
config,
|
||||
(lambda x: x['initialState']['series'], lambda x: x['series']),
|
||||
dict)
|
||||
|
||||
def _extract_seasons(self, seasons):
|
||||
if not isinstance(seasons, list):
|
||||
return []
|
||||
entries = []
|
||||
for season in seasons:
|
||||
entries.extend(self._extract_episodes(season))
|
||||
return entries
|
||||
|
||||
def _extract_episodes(self, season):
|
||||
entries = []
|
||||
if not isinstance(season, dict):
|
||||
return entries
|
||||
episodes = season.get('episodes')
|
||||
if not isinstance(episodes, list):
|
||||
return entries
|
||||
for episode in episodes:
|
||||
return []
|
||||
return self._extract_entries(season.get('episodes'))
|
||||
|
||||
def _extract_entries(self, entry_list):
|
||||
if not isinstance(entry_list, list):
|
||||
return []
|
||||
entries = []
|
||||
for episode in entry_list:
|
||||
nrk_id = episode.get('prfId')
|
||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||
continue
|
||||
@@ -462,7 +479,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# new layout
|
||||
# new layout, seasons
|
||||
'url': 'https://tv.nrk.no/serie/backstage',
|
||||
'info_dict': {
|
||||
'id': 'backstage',
|
||||
@@ -471,20 +488,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
},
|
||||
'playlist_mincount': 60,
|
||||
}, {
|
||||
# old layout
|
||||
# new layout, instalments
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
# old layout
|
||||
'url': 'https://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
@@ -517,11 +535,12 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
description = try_get(
|
||||
series, lambda x: x['titles']['subtitle'], compat_str)
|
||||
entries = []
|
||||
for season in series['seasons']:
|
||||
entries.extend(self._extract_episodes(season))
|
||||
entries.extend(self._extract_seasons(series.get('seasons')))
|
||||
entries.extend(self._extract_entries(series.get('instalments')))
|
||||
entries.extend(self._extract_episodes(series.get('extraMaterial')))
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
# Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
|
||||
# Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
@@ -533,6 +552,9 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
if title:
|
||||
title = self._search_regex(
|
||||
r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
@@ -593,7 +615,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
|
||||
'title': 'Rivertonprisen til Karin Fossum',
|
||||
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _extract_title(self, webpage):
|
||||
|
@@ -115,6 +115,10 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Paid video
|
||||
'url': 'https://ok.ru/video/954886983203',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -244,6 +248,11 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
if not formats:
|
||||
payment_info = metadata.get('paymentInfo')
|
||||
if payment_info:
|
||||
raise ExtractorError('This video is paid, subscribe to download it', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info['formats'] = formats
|
||||
|
28
youtube_dl/extractor/outsidetv.py
Normal file
28
youtube_dl/extractor/outsidetv.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class OutsideTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4',
|
||||
'md5': '192d968fedc10b2f70ec31865ffba0da',
|
||||
'info_dict': {
|
||||
'id': 'Hdg0jukV',
|
||||
'ext': 'mp4',
|
||||
'title': 'Home - Jackson Ep 1 | Arbor Snowboards',
|
||||
'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd',
|
||||
'upload_date': '20181225',
|
||||
'timestamp': 1545742800,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
jw_media_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id)
|
@@ -24,9 +24,9 @@ class PacktPubBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class PacktPubIE(PacktPubBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
|
||||
'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
|
||||
'info_dict': {
|
||||
@@ -37,7 +37,10 @@ class PacktPubIE(PacktPubBaseIE):
|
||||
'timestamp': 1490918400,
|
||||
'upload_date': '20170331',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_NETRC_MACHINE = 'packtpub'
|
||||
_TOKEN = None
|
||||
|
||||
@@ -110,15 +113,18 @@ class PacktPubIE(PacktPubBaseIE):
|
||||
|
||||
|
||||
class PacktPubCourseIE(PacktPubBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?P<url>https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
|
||||
'info_dict': {
|
||||
'id': '9781787122215',
|
||||
'title': 'Learn Nodejs by building 12 projects [Video]',
|
||||
},
|
||||
'playlist_count': 90,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://subscription.packtpub.com/video/web_development/9781787122215',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
|
109
youtube_dl/extractor/playplustv.py
Normal file
109
youtube_dl/extractor/playplustv.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
PUTRequest,
|
||||
)
|
||||
|
||||
|
||||
class PlayPlusTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})'
|
||||
_TEST = {
|
||||
'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e',
|
||||
'md5': 'd078cb89d7ab6b9df37ce23c647aef72',
|
||||
'info_dict': {
|
||||
'id': 'db8d274a5163424e967f35a30ddafb8e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Capítulo 179 - Final',
|
||||
'description': 'md5:01085d62d8033a1e34121d3c3cabc838',
|
||||
'timestamp': 1529992740,
|
||||
'upload_date': '20180626',
|
||||
},
|
||||
'skip': 'Requires account credential',
|
||||
}
|
||||
_NETRC_MACHINE = 'playplustv'
|
||||
_GEO_COUNTRIES = ['BR']
|
||||
_token = None
|
||||
_profile_id = None
|
||||
|
||||
def _call_api(self, resource, video_id=None, query=None):
|
||||
return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={
|
||||
'Authorization': 'Bearer ' + self._token,
|
||||
}, query=query)
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
self.raise_login_required()
|
||||
|
||||
req = PUTRequest(
|
||||
'https://api.playplus.tv/api/web/login', json.dumps({
|
||||
'email': email,
|
||||
'password': password,
|
||||
}).encode(), {
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
})
|
||||
|
||||
try:
|
||||
self._token = self._download_json(req, None)['token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read(), None)['errorMessage'], expected=True)
|
||||
raise
|
||||
|
||||
self._profile = self._call_api('Profiles')['list'][0]['_id']
|
||||
|
||||
def _real_extract(self, url):
|
||||
project_id, media_id = re.match(self._VALID_URL, url).groups()
|
||||
media = self._call_api(
|
||||
'Media', media_id, {
|
||||
'profileId': self._profile,
|
||||
'projectId': project_id,
|
||||
'mediaId': media_id,
|
||||
})['obj']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for f in media.get('files', []):
|
||||
f_url = f.get('url')
|
||||
if not f_url:
|
||||
continue
|
||||
file_info = f.get('fileInfo') or {}
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'width': int_or_none(file_info.get('width')),
|
||||
'height': int_or_none(file_info.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for thumb in media.get('thumbs', []):
|
||||
thumb_url = thumb.get('url')
|
||||
if not thumb_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumb_url,
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clean_html(media.get('description')) or media.get('shortDescription'),
|
||||
'timestamp': int_or_none(media.get('publishDate'), 1000),
|
||||
'view_count': int_or_none(media.get('numberOfViews')),
|
||||
'comment_count': int_or_none(media.get('numberOfComments')),
|
||||
'tags': media.get('tags'),
|
||||
}
|
@@ -10,7 +10,9 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -22,7 +24,29 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
class PornHubBaseIE(InfoExtractor):
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
def dl(*args, **kwargs):
|
||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
|
||||
if any(re.search(p, webpage) for p in (
|
||||
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||
r'document\.cookie\s*=\s*["\']RNKEY=',
|
||||
r'document\.location\.reload\(true\)')):
|
||||
url_or_request = args[0]
|
||||
url = (url_or_request.get_full_url()
|
||||
if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else url_or_request)
|
||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||
phantom.get(url, html=webpage)
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
|
||||
return webpage, urlh
|
||||
|
||||
|
||||
class PornHubIE(PornHubBaseIE):
|
||||
IE_DESC = 'PornHub and Thumbzilla'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -307,7 +331,7 @@ class PornHubIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class PornHubPlaylistBaseIE(InfoExtractor):
|
||||
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
def _extract_entries(self, webpage, host):
|
||||
# Only process container div with main playlist content skipping
|
||||
# drop-down menu that uses similar pattern for videos (see
|
||||
|
@@ -49,6 +49,16 @@ class RadioCanadaIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# with protectionType but not actually DRM protected
|
||||
'url': 'radiocanada:toutv:140872',
|
||||
'info_dict': {
|
||||
'id': '140872',
|
||||
'title': 'Épisode 1',
|
||||
'series': 'District 31',
|
||||
},
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -67,8 +77,10 @@ class RadioCanadaIE(InfoExtractor):
|
||||
el = find_xpath_attr(metadata, './/Meta', 'name', name)
|
||||
return el.text if el is not None else None
|
||||
|
||||
# protectionType does not necessarily mean the video is DRM protected (see
|
||||
# https://github.com/rg3/youtube-dl/pull/18609).
|
||||
if get_meta('protectionType'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
self.report_warning('This video is probably DRM protected.')
|
||||
|
||||
device_types = ['ipad']
|
||||
if not smuggled_data:
|
||||
|
@@ -1,38 +1,46 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class RMCDecouverteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET',
|
||||
_TESTS = [{
|
||||
'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
|
||||
'info_dict': {
|
||||
'id': '5419055995001',
|
||||
'id': '5983675500001',
|
||||
'ext': 'mp4',
|
||||
'title': 'UN DELICIEUX PROJET',
|
||||
'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5',
|
||||
'title': 'CORVETTE',
|
||||
'description': 'md5:c1e8295521e45ffebf635d6a7658f506',
|
||||
'uploader_id': '1969646226001',
|
||||
'upload_date': '20170502',
|
||||
'timestamp': 1493745308,
|
||||
'upload_date': '20181226',
|
||||
'timestamp': 1545861635,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'only available for a week',
|
||||
}
|
||||
}, {
|
||||
# live, geo restricted, bypassable
|
||||
'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id') or mobj.group('live_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
if brightcove_legacy_url:
|
||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
|
||||
@@ -41,5 +49,7 @@ class RMCDecouverteIE(InfoExtractor):
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
|
||||
brightcove_id)
|
||||
smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['FR']}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
|
@@ -15,10 +15,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class SafariBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||
_LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
|
||||
_NETRC_MACHINE = 'safari'
|
||||
|
||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
||||
_API_BASE = 'https://learning.oreilly.com/api/v1'
|
||||
_API_FORMAT = 'json'
|
||||
|
||||
LOGGED_IN = False
|
||||
@@ -76,7 +76,7 @@ class SafariIE(SafariBaseIE):
|
||||
IE_DESC = 'safaribooksonline.com online video'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?safaribooksonline\.com/
|
||||
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
|
||||
(?:
|
||||
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
|
||||
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
|
||||
@@ -104,6 +104,9 @@ class SafariIE(SafariBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PARTNER_ID = '1926081'
|
||||
@@ -160,7 +163,7 @@ class SafariIE(SafariBaseIE):
|
||||
|
||||
class SafariApiIE(SafariBaseIE):
|
||||
IE_NAME = 'safari:api'
|
||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||
@@ -185,7 +188,7 @@ class SafariCourseIE(SafariBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?safaribooksonline\.com/
|
||||
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
|
||||
(?:
|
||||
library/view/[^/]+|
|
||||
api/v1/book|
|
||||
@@ -213,6 +216,9 @@ class SafariCourseIE(SafariBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@@ -30,8 +30,5 @@ class SaveFromIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||
return {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'url': mobj.group('url'),
|
||||
}
|
||||
|
||||
return self.url_result(mobj.group('url'), video_id=video_id)
|
||||
|
@@ -19,7 +19,7 @@ class ScrippsNetworksWatchIE(AWSIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
watch\.
|
||||
(?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
|
||||
(?P<site>geniuskitchen)\.com/
|
||||
(?:
|
||||
player\.[A-Z0-9]+\.html\#|
|
||||
show/(?:[^/]+/){2}|
|
||||
@@ -28,38 +28,23 @@ class ScrippsNetworksWatchIE(AWSIE):
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
|
||||
'md5': '26545fd676d939954c6808274bdb905a',
|
||||
'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
|
||||
'info_dict': {
|
||||
'id': '4173834',
|
||||
'id': '4194875',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best Ever Treehouses',
|
||||
'description': "We're searching for the most over the top treehouses.",
|
||||
'title': 'Ample Hills Ice Cream Bike',
|
||||
'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
|
||||
'uploader': 'ANV',
|
||||
'upload_date': '20170922',
|
||||
'timestamp': 1506056400,
|
||||
'upload_date': '20171011',
|
||||
'timestamp': 1507698000,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [AnvatoIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_SNI_TABLE = {
|
||||
'hgtv': 'hgtv',
|
||||
'diynetwork': 'diy',
|
||||
'foodnetwork': 'food',
|
||||
'cookingchanneltv': 'cook',
|
||||
'travelchannel': 'trav',
|
||||
'geniuskitchen': 'genius',
|
||||
}
|
||||
|
||||
|
@@ -26,7 +26,7 @@ class SkylineWebcamsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
stream_url = self._search_regex(
|
||||
r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
|
||||
r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
|
||||
'stream url', group='url')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class StreamangoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'md5': 'e992787515a182f55e38fc97588d802a',
|
||||
@@ -38,6 +38,9 @@ class StreamangoIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class TBSIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
|
||||
'info_dict': {
|
||||
@@ -40,12 +40,12 @@ class TBSIE(TurnerBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site, path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
|
||||
webpage, 'drupal setting'), display_id)
|
||||
video_data = drupal_settings['turner_playlist'][0]
|
||||
video_data = next(v for v in drupal_settings['turner_playlist'] if v.get('url') == path)
|
||||
|
||||
media_id = video_data['mediaID']
|
||||
title = video_data['title']
|
||||
|
@@ -14,20 +14,38 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class UpskillBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'http://upskillcourses.com/sign_in'
|
||||
_NETRC_MACHINE = 'upskill'
|
||||
class TeachableBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'teachable'
|
||||
_URL_PREFIX = 'teachable:'
|
||||
|
||||
_SITES = {
|
||||
# Only notable ones here
|
||||
'upskillcourses.com': 'upskill',
|
||||
'academy.gns3.com': 'gns3',
|
||||
'academyhacker.com': 'academyhacker',
|
||||
'stackskills.com': 'stackskills',
|
||||
'market.saleshacker.com': 'saleshacker',
|
||||
'learnability.org': 'learnability',
|
||||
'edurila.com': 'edurila',
|
||||
}
|
||||
|
||||
_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
self._logged_in = False
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
def _login(self, site):
|
||||
if self._logged_in:
|
||||
return
|
||||
|
||||
username, password = self._get_login_info(
|
||||
netrc_machine=self._SITES.get(site, site))
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page, urlh = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
'https://%s/sign_in' % site, None,
|
||||
'Downloading %s login page' % site)
|
||||
|
||||
login_url = compat_str(urlh.geturl())
|
||||
|
||||
@@ -46,18 +64,24 @@ class UpskillBaseIE(InfoExtractor):
|
||||
post_url = urljoin(login_url, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in',
|
||||
post_url, None, 'Logging in to %s' % site,
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': login_url,
|
||||
})
|
||||
|
||||
if '>I accept the new Privacy Policy<' in response:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s asks you to accept new Privacy Policy. '
|
||||
'Go to https://%s/ and accept.' % (site, site), expected=True)
|
||||
|
||||
# Successful login
|
||||
if any(re.search(p, response) for p in (
|
||||
r'class=["\']user-signout',
|
||||
r'<a[^>]+\bhref=["\']/sign_out',
|
||||
r'>\s*Log out\s*<')):
|
||||
self._logged_in = True
|
||||
return
|
||||
|
||||
message = get_element_by_class('alert', response)
|
||||
@@ -68,8 +92,14 @@ class UpskillBaseIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class UpskillIE(UpskillBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P<id>\d+)'
|
||||
class TeachableIE(TeachableBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
%shttps?://(?P<site_t>[^/]+)|
|
||||
https?://(?:www\.)?(?P<site>%s)
|
||||
)
|
||||
/courses/[^/]+/lectures/(?P<id>\d+)
|
||||
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
|
||||
@@ -77,7 +107,7 @@ class UpskillIE(UpskillBaseIE):
|
||||
'id': 'uzw6zw58or',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to the Course!',
|
||||
'description': 'md5:8d66c13403783370af62ca97a7357bdd',
|
||||
'description': 'md5:65edb0affa582974de4625b9cdea1107',
|
||||
'duration': 138.763,
|
||||
'timestamp': 1479846621,
|
||||
'upload_date': '20161122',
|
||||
@@ -88,10 +118,37 @@ class UpskillIE(UpskillBaseIE):
|
||||
}, {
|
||||
'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://academy.gns3.com/courses/423415/lectures/6885939',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _is_teachable(webpage):
|
||||
return 'teachableTracker.linker:autoLink' in webpage and re.search(
|
||||
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
|
||||
webpage)
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage, source_url):
|
||||
if not TeachableIE._is_teachable(webpage):
|
||||
return
|
||||
if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
|
||||
return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site = mobj.group('site') or mobj.group('site_t')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
self._login(site)
|
||||
|
||||
prefixed = url.startswith(self._URL_PREFIX)
|
||||
if prefixed:
|
||||
url = url[len(self._URL_PREFIX):]
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -113,12 +170,18 @@ class UpskillIE(UpskillBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class UpskillCourseIE(UpskillBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P<id>[^/?#&]+)'
|
||||
class TeachableCourseIE(TeachableBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
%shttps?://(?P<site_t>[^/]+)|
|
||||
https?://(?:www\.)?(?P<site>%s)
|
||||
)
|
||||
/(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
|
||||
''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
|
||||
_TESTS = [{
|
||||
'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
|
||||
'info_dict': {
|
||||
'id': '119763',
|
||||
'id': 'essential-web-developer-course',
|
||||
'title': 'The Essential Web Developer Course (Free)',
|
||||
},
|
||||
'playlist_count': 192,
|
||||
@@ -128,21 +191,37 @@ class UpskillCourseIE(UpskillBaseIE):
|
||||
}, {
|
||||
'url': 'http://upskillcourses.com/courses/enrolled/119763',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://academy.gns3.com/courses/enrolled/423415',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if UpskillIE.suitable(url) else super(
|
||||
UpskillCourseIE, cls).suitable(url)
|
||||
return False if TeachableIE.suitable(url) else super(
|
||||
TeachableCourseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site = mobj.group('site') or mobj.group('site_t')
|
||||
course_id = mobj.group('id')
|
||||
|
||||
self._login(site)
|
||||
|
||||
prefixed = url.startswith(self._URL_PREFIX)
|
||||
if prefixed:
|
||||
prefix = self._URL_PREFIX
|
||||
url = url[len(prefix):]
|
||||
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
|
||||
course_id = self._search_regex(
|
||||
r'data-course-id=["\'](\d+)', webpage, 'course id',
|
||||
default=course_id)
|
||||
url_base = 'https://%s/' % site
|
||||
|
||||
entries = []
|
||||
|
||||
@@ -162,10 +241,13 @@ class UpskillCourseIE(UpskillBaseIE):
|
||||
title = self._html_search_regex(
|
||||
r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
|
||||
'title', default=None)
|
||||
entry_url = urljoin(url_base, lecture_url)
|
||||
if prefixed:
|
||||
entry_url = self._URL_PREFIX + entry_url
|
||||
entries.append(
|
||||
self.url_result(
|
||||
urljoin('http://upskillcourses.com/', lecture_url),
|
||||
ie=UpskillIE.ie_key(), video_id=lecture_id,
|
||||
entry_url,
|
||||
ie=TeachableIE.ie_key(), video_id=lecture_id,
|
||||
video_title=clean_html(title)))
|
||||
|
||||
course_title = self._html_search_regex(
|
@@ -203,10 +203,8 @@ class TEDIE(InfoExtractor):
|
||||
ext_url = None
|
||||
if service.lower() == 'youtube':
|
||||
ext_url = external.get('code')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': ext_url or external['uri'],
|
||||
}
|
||||
|
||||
return self.url_result(ext_url or external['uri'])
|
||||
|
||||
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
||||
|
||||
@@ -267,6 +265,8 @@ class TEDIE(InfoExtractor):
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
if f.get('acodec') == 'none':
|
||||
del f['acodec']
|
||||
formats.append(f)
|
||||
|
||||
audio_download = talk_info.get('audioDownload')
|
||||
|
@@ -61,8 +61,4 @@ class TestURLIE(InfoExtractor):
|
||||
|
||||
self.to_screen('Test URL: %s' % tc['url'])
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': tc['url'],
|
||||
'id': video_id,
|
||||
}
|
||||
return self.url_result(tc['url'], video_id=video_id)
|
||||
|
@@ -96,7 +96,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, display_id, 'Downloading metadata',
|
||||
transform_source=fix_xml_ampersands)
|
||||
transform_source=fix_xml_ampersands, headers={'Referer': url})
|
||||
|
||||
formats = []
|
||||
|
||||
|
@@ -10,8 +10,9 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
try_get,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,8 +25,7 @@ class TVNowBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, video_id, query):
|
||||
return self._download_json(
|
||||
'https://api.tvnow.de/v3/' + path,
|
||||
video_id, query=query)
|
||||
'https://api.tvnow.de/v3/' + path, video_id, query=query)
|
||||
|
||||
def _extract_video(self, info, display_id):
|
||||
video_id = compat_str(info['id'])
|
||||
@@ -108,6 +108,11 @@ class TVNowIE(TVNowBaseIE):
|
||||
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
|
||||
else super(TVNowIE, cls).suitable(url))
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
|
||||
'info_dict': {
|
||||
@@ -116,7 +121,6 @@ class TVNowIE(TVNowBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Der neue Porsche 911 GT 3',
|
||||
'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1495994400,
|
||||
'upload_date': '20170528',
|
||||
'duration': 5283,
|
||||
@@ -161,136 +165,314 @@ class TVNowIE(TVNowBaseIE):
|
||||
info = self._call_api(
|
||||
'movies/' + display_id, display_id, query={
|
||||
'fields': ','.join(self._VIDEO_FIELDS),
|
||||
'station': mobj.group(1),
|
||||
})
|
||||
|
||||
return self._extract_video(info, display_id)
|
||||
|
||||
|
||||
class TVNowListBaseIE(TVNowBaseIE):
|
||||
_SHOW_VALID_URL = r'''(?x)
|
||||
(?P<base_url>
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
|
||||
(?P<show_id>[^/]+)
|
||||
)
|
||||
class TVNowNewIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<base_url>https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/
|
||||
(?:shows|serien))/
|
||||
(?P<show>[^/]+)-\d+/
|
||||
[^/]+/
|
||||
episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
|
||||
'''
|
||||
|
||||
def _extract_list_info(self, display_id, show_id):
|
||||
fields = list(self._SHOW_FIELDS)
|
||||
fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
|
||||
fields.extend(
|
||||
'formatTabs.formatTabPages.container.movies.%s' % field
|
||||
for field in self._VIDEO_FIELDS)
|
||||
return self._call_api(
|
||||
'formats/seo', display_id, query={
|
||||
'fields': ','.join(fields),
|
||||
'name': show_id + '.php'
|
||||
})
|
||||
|
||||
|
||||
class TVNowListIE(TVNowListBaseIE):
|
||||
_VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
|
||||
|
||||
_SHOW_FIELDS = ('title', )
|
||||
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
|
||||
_VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
|
||||
'info_dict': {
|
||||
'id': '28296',
|
||||
'title': '30 Minuten Deutschland - Aktuell',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
|
||||
'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if TVNowIE.suitable(url)
|
||||
else super(TVNowListIE, cls).suitable(url))
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
|
||||
show, episode = mobj.group('show', 'episode')
|
||||
return self.url_result(
|
||||
# Rewrite new URLs to the old format and use extraction via old API
|
||||
# at api.tvnow.de as a loophole for bypassing premium content checks
|
||||
'%s/%s/%s' % (base_url, show, episode),
|
||||
ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
|
||||
|
||||
|
||||
class TVNowNewBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, query={}):
|
||||
result = self._download_json(
|
||||
'https://apigw.tvnow.de/module/' + path, video_id, query=query)
|
||||
error = result.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
return result
|
||||
|
||||
|
||||
"""
|
||||
TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
|
||||
when api.tvnow.de is shut down. This version can't bypass premium checks though.
|
||||
class TVNowIE(TVNowNewBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/
|
||||
(?:shows|serien)/[^/]+/
|
||||
(?:[^/]+/)+
|
||||
(?P<display_id>[^/?$&]+)-(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# episode with annual navigation
|
||||
'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
|
||||
'info_dict': {
|
||||
'id': '331082',
|
||||
'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der neue Porsche 911 GT 3',
|
||||
'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1495994400,
|
||||
'upload_date': '20170528',
|
||||
'duration': 5283,
|
||||
'series': 'GRIP - Das Motormagazin',
|
||||
'season_number': 14,
|
||||
'episode_number': 405,
|
||||
'episode': 'Der neue Porsche 911 GT 3',
|
||||
},
|
||||
}, {
|
||||
# rtl2, episode with season navigation
|
||||
'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# rtlnitro
|
||||
'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# superrtl
|
||||
'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ntv
|
||||
'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vox
|
||||
'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video(self, info, url, display_id):
|
||||
config = info['config']
|
||||
source = config['source']
|
||||
|
||||
video_id = compat_str(info.get('id') or source['videoId'])
|
||||
title = source['title'].strip()
|
||||
|
||||
paths = []
|
||||
for manifest_url in (info.get('manifest') or {}).values():
|
||||
if not manifest_url:
|
||||
continue
|
||||
manifest_url = update_url_query(manifest_url, {'filter': ''})
|
||||
path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
|
||||
if path in paths:
|
||||
continue
|
||||
paths.append(path)
|
||||
|
||||
def url_repl(proto, suffix):
|
||||
return re.sub(
|
||||
r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
|
||||
r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
|
||||
'.ism/' + suffix, manifest_url))
|
||||
|
||||
formats = self._extract_mpd_formats(
|
||||
url_repl('dash', '.mpd'), video_id,
|
||||
mpd_id='dash', fatal=False)
|
||||
formats.extend(self._extract_ism_formats(
|
||||
url_repl('hss', 'Manifest'),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url_repl('hls', '.m3u8'), video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
if formats:
|
||||
break
|
||||
else:
|
||||
if try_get(info, lambda x: x['rights']['isDrm']):
|
||||
raise ExtractorError(
|
||||
'Video %s is DRM protected' % video_id, expected=True)
|
||||
if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
|
||||
raise self.raise_geo_restricted()
|
||||
if not info.get('free', True):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available for free' % video_id, expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = source.get('description')
|
||||
thumbnail = url_or_none(source.get('poster'))
|
||||
timestamp = unified_timestamp(source.get('previewStart'))
|
||||
duration = parse_duration(source.get('length'))
|
||||
|
||||
series = source.get('format')
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'staffel-(\d+)', url, 'season number', default=None))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'episode-(\d+)', url, 'episode number', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'episode': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
info = self._call_api('player/' + video_id, video_id)
|
||||
return self._extract_video(info, video_id, display_id)
|
||||
"""
|
||||
|
||||
list_info = self._extract_list_info(season_id, show_id)
|
||||
|
||||
season = next(
|
||||
season for season in list_info['formatTabs']['items']
|
||||
if season.get('seoheadline') == season_id)
|
||||
class TVNowListBaseIE(TVNowNewBaseIE):
|
||||
_SHOW_VALID_URL = r'''(?x)
|
||||
(?P<base_url>
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
|
||||
[^/?#&]+-(?P<show_id>\d+)
|
||||
)
|
||||
'''
|
||||
|
||||
title = list_info.get('title')
|
||||
headline = season.get('headline')
|
||||
if title and headline:
|
||||
title = '%s - %s' % (title, headline)
|
||||
else:
|
||||
title = headline or title
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if TVNowNewIE.suitable(url)
|
||||
else super(TVNowListBaseIE, cls).suitable(url))
|
||||
|
||||
def _extract_items(self, url, show_id, list_id, query):
|
||||
items = self._call_api(
|
||||
'teaserrow/format/episode/' + show_id, list_id,
|
||||
query=query)['items']
|
||||
|
||||
entries = []
|
||||
for container in season['formatTabPages']['items']:
|
||||
items = try_get(
|
||||
container, lambda x: x['container']['movies']['items'],
|
||||
list) or []
|
||||
for info in items:
|
||||
seo_url = info.get('seoUrl')
|
||||
if not seo_url:
|
||||
continue
|
||||
video_id = info.get('id')
|
||||
entries.append(self.url_result(
|
||||
'%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
|
||||
compat_str(video_id) if video_id else None))
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
item_url = urljoin(url, item.get('url'))
|
||||
if not item_url:
|
||||
continue
|
||||
video_id = str_or_none(item.get('id') or item.get('videoId'))
|
||||
item_title = item.get('subheadline') or item.get('text')
|
||||
entries.append(self.url_result(
|
||||
item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
|
||||
video_title=item_title))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(season.get('id') or season_id), title)
|
||||
return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
|
||||
|
||||
|
||||
class TVNowSeasonIE(TVNowListBaseIE):
|
||||
_VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
|
||||
'info_dict': {
|
||||
'id': '1815/13',
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
_, show_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_items(
|
||||
url, show_id, season_id, {'season': season_id})
|
||||
|
||||
|
||||
class TVNowAnnualIE(TVNowListBaseIE):
|
||||
_VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
|
||||
'info_dict': {
|
||||
'id': '1669/2017-05',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
_, show_id, year, month = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_items(
|
||||
url, show_id, '%s-%s' % (year, month), {
|
||||
'year': int(year),
|
||||
'month': int(month),
|
||||
})
|
||||
|
||||
|
||||
class TVNowShowIE(TVNowListBaseIE):
|
||||
_VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
|
||||
|
||||
_SHOW_FIELDS = ('id', 'title', )
|
||||
_SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
|
||||
_VIDEO_FIELDS = ()
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnow.at/vox/ab-ins-beet',
|
||||
# annual navigationType
|
||||
'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
|
||||
'info_dict': {
|
||||
'id': 'ab-ins-beet',
|
||||
'title': 'Ab ins Beet!',
|
||||
'id': '1669',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'playlist_mincount': 73,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
|
||||
'only_matching': True,
|
||||
# season navigationType
|
||||
'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
|
||||
'info_dict': {
|
||||
'id': '11471',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
|
||||
return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
|
||||
else super(TVNowShowIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
list_info = self._extract_list_info(show_id, show_id)
|
||||
result = self._call_api(
|
||||
'teaserrow/format/navigation/' + show_id, show_id)
|
||||
|
||||
items = result['items']
|
||||
|
||||
entries = []
|
||||
for season_info in list_info['formatTabs']['items']:
|
||||
season_url = season_info.get('seoheadline')
|
||||
if not season_url:
|
||||
continue
|
||||
season_id = season_info.get('id')
|
||||
entries.append(self.url_result(
|
||||
'%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
|
||||
compat_str(season_id) if season_id else None,
|
||||
season_info.get('headline')))
|
||||
navigation = result.get('navigationType')
|
||||
if navigation == 'annual':
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
year = int_or_none(item.get('year'))
|
||||
if year is None:
|
||||
continue
|
||||
months = item.get('months')
|
||||
if not isinstance(months, list):
|
||||
continue
|
||||
for month_dict in months:
|
||||
if not isinstance(month_dict, dict) or not month_dict:
|
||||
continue
|
||||
month_number = int_or_none(list(month_dict.keys())[0])
|
||||
if month_number is None:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'%s/%04d-%02d' % (base_url, year, month_number),
|
||||
ie=TVNowAnnualIE.ie_key()))
|
||||
elif navigation == 'season':
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
season_number = int_or_none(item.get('season'))
|
||||
if season_number is None:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'%s/staffel-%d' % (base_url, season_number),
|
||||
ie=TVNowSeasonIE.ie_key()))
|
||||
else:
|
||||
raise ExtractorError('Unknown navigationType')
|
||||
|
||||
return self.playlist_result(entries, show_id, list_info.get('title'))
|
||||
return self.playlist_result(entries, show_id)
|
||||
|
@@ -171,7 +171,8 @@ class TwitterCardIE(TwitterBaseIE):
|
||||
urls.append('https://twitter.com/i/videos/' + video_id)
|
||||
|
||||
for u in urls:
|
||||
webpage = self._download_webpage(u, video_id)
|
||||
webpage = self._download_webpage(
|
||||
u, video_id, headers={'Referer': 'https://twitter.com/'})
|
||||
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
|
||||
|
@@ -61,7 +61,7 @@ class UOLIE(InfoExtractor):
|
||||
'height': 360,
|
||||
},
|
||||
'5': {
|
||||
'width': 1080,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
'6': {
|
||||
@@ -80,6 +80,10 @@ class UOLIE(InfoExtractor):
|
||||
'width': 568,
|
||||
'height': 320,
|
||||
},
|
||||
'11': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -111,19 +115,31 @@ class UOLIE(InfoExtractor):
|
||||
'ver': video_data.get('numRevision', 2),
|
||||
'r': 'http://mais.uol.com.br',
|
||||
}
|
||||
for k in ('token', 'sign'):
|
||||
v = video_data.get(k)
|
||||
if v:
|
||||
query[k] = v
|
||||
|
||||
formats = []
|
||||
for f in video_data.get('formats', []):
|
||||
f_url = f.get('url') or f.get('secureUrl')
|
||||
if not f_url:
|
||||
continue
|
||||
f_url = update_url_query(f_url, query)
|
||||
format_id = str_or_none(f.get('id'))
|
||||
if format_id == '10':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'url': update_url_query(f_url, query),
|
||||
'url': f_url,
|
||||
'source_preference': 1,
|
||||
}
|
||||
fmt.update(self._FORMATS.get(format_id, {}))
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext'))
|
||||
|
||||
tags = []
|
||||
for tag in video_data.get('tags', []):
|
||||
|
@@ -4,8 +4,14 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -13,7 +19,19 @@ from ..utils import (
|
||||
|
||||
class VideomoreIE(InfoExtractor):
|
||||
IE_NAME = 'videomore'
|
||||
_VALID_URL = r'videomore:(?P<sid>\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P<id>\d+)(?:[/?#&]|\.(?:xml|json)|$)'
|
||||
_VALID_URL = r'''(?x)
|
||||
videomore:(?P<sid>\d+)$|
|
||||
https?://(?:player\.)?videomore\.ru/
|
||||
(?:
|
||||
(?:
|
||||
embed|
|
||||
[^/]+/[^/]+
|
||||
)/|
|
||||
[^/]*\?.*?\btrack_id=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
(?:[/?#&]|\.(?:xml|json)|$)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
|
||||
'md5': '44455a346edc0d509ac5b5a5b531dc35',
|
||||
@@ -79,6 +97,9 @@ class VideomoreIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'videomore:367617',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -136,7 +157,7 @@ class VideomoreIE(InfoExtractor):
|
||||
|
||||
class VideomoreVideoIE(InfoExtractor):
|
||||
IE_NAME = 'videomore:video'
|
||||
_VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)[/?#&]*$'
|
||||
_VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)(?:/*|[?#&].*?)$'
|
||||
_TESTS = [{
|
||||
# single video with og:video:iframe
|
||||
'url': 'http://videomore.ru/elki_3',
|
||||
@@ -176,6 +197,9 @@ class VideomoreVideoIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -196,13 +220,16 @@ class VideomoreVideoIE(InfoExtractor):
|
||||
r'track-id=["\'](\d+)',
|
||||
r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id')
|
||||
video_url = 'videomore:%s' % video_id
|
||||
else:
|
||||
video_id = None
|
||||
|
||||
return self.url_result(video_url, VideomoreIE.ie_key())
|
||||
return self.url_result(
|
||||
video_url, ie=VideomoreIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class VideomoreSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'videomore:season'
|
||||
_VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)[/?#&]*$'
|
||||
_VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
|
||||
_TESTS = [{
|
||||
'url': 'http://videomore.ru/molodezhka/sezon_promo',
|
||||
'info_dict': {
|
||||
@@ -210,8 +237,16 @@ class VideomoreSeasonIE(InfoExtractor):
|
||||
'title': 'Молодежка Промо',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
|
||||
else super(VideomoreSeasonIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
@@ -219,9 +254,54 @@ class VideomoreSeasonIE(InfoExtractor):
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
entries = [
|
||||
self.url_result(item) for item in re.findall(
|
||||
r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
|
||||
% display_id, webpage)]
|
||||
data = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'\bclass=["\']seasons-tracks["\'][^>]+\bdata-custom-data=(["\'])(?P<value>{.+?})\1',
|
||||
webpage, 'data', default='{}', group='value'),
|
||||
display_id, fatal=False)
|
||||
|
||||
entries = []
|
||||
|
||||
if data:
|
||||
episodes = data.get('episodes')
|
||||
if isinstance(episodes, list):
|
||||
for ep in episodes:
|
||||
if not isinstance(ep, dict):
|
||||
continue
|
||||
ep_id = int_or_none(ep.get('id'))
|
||||
ep_url = url_or_none(ep.get('url'))
|
||||
if ep_id:
|
||||
e = {
|
||||
'url': 'videomore:%s' % ep_id,
|
||||
'id': compat_str(ep_id),
|
||||
}
|
||||
elif ep_url:
|
||||
e = {'url': ep_url}
|
||||
else:
|
||||
continue
|
||||
e.update({
|
||||
'_type': 'url',
|
||||
'ie_key': VideomoreIE.ie_key(),
|
||||
'title': str_or_none(ep.get('title')),
|
||||
'thumbnail': url_or_none(ep.get('image')),
|
||||
'duration': parse_duration(ep.get('duration')),
|
||||
'episode_number': int_or_none(ep.get('number')),
|
||||
'upload_date': unified_strdate(ep.get('date')),
|
||||
})
|
||||
entries.append(e)
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(
|
||||
'videomore:%s' % video_id, ie=VideomoreIE.ie_key(),
|
||||
video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r':(?:id|key)=["\'](\d+)["\']', webpage))]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(item) for item in re.findall(
|
||||
r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
|
||||
% display_id, webpage)]
|
||||
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
@@ -392,6 +393,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.vimeo.com/video/68375962',
|
||||
'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
|
||||
'info_dict': {
|
||||
'id': '68375962',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl password protected test video',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
|
||||
'uploader_id': 'user18948128',
|
||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||
'duration': 10,
|
||||
},
|
||||
'params': {
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
|
||||
'only_matching': True,
|
||||
@@ -418,6 +435,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
||||
'only_matching': True,
|
||||
}
|
||||
# https://gettingthingsdone.com/workflowmap/
|
||||
# vimeo embed with check-password page protected by Referer header
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
@@ -448,18 +467,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
urls = VimeoIE._extract_urls(url, webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
def _verify_player_video_password(self, url, video_id):
|
||||
def _verify_player_video_password(self, url, video_id, headers):
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option')
|
||||
data = urlencode_postdata({'password': password})
|
||||
pass_url = url + '/check-password'
|
||||
password_request = sanitized_Request(pass_url, data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Referer', url)
|
||||
return self._download_json(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
data = urlencode_postdata({
|
||||
'password': base64.b64encode(password.encode()),
|
||||
})
|
||||
headers = merge_dicts(headers, {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
checked = self._download_json(
|
||||
url + '/check-password', video_id,
|
||||
'Verifying the password', data=data, headers=headers)
|
||||
if checked is False:
|
||||
raise ExtractorError('Wrong video password', expected=True)
|
||||
return checked
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -572,7 +595,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
cause=e)
|
||||
else:
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(redirect_url, video_id)
|
||||
config = self._verify_player_video_password(redirect_url, video_id, headers)
|
||||
|
||||
vod = config.get('video', {}).get('vod', {})
|
||||
|
||||
|
@@ -11,10 +11,12 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
@@ -24,29 +26,41 @@ class VRVBaseIE(InfoExtractor):
|
||||
_API_DOMAIN = None
|
||||
_API_PARAMS = {}
|
||||
_CMS_SIGNING = {}
|
||||
_TOKEN = None
|
||||
_TOKEN_SECRET = ''
|
||||
|
||||
def _call_api(self, path, video_id, note, data=None):
|
||||
# https://tools.ietf.org/html/rfc5849#section-3
|
||||
base_url = self._API_DOMAIN + '/core/' + path
|
||||
encoded_query = compat_urllib_parse_urlencode({
|
||||
'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
|
||||
'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'oauth_signature_method': 'HMAC-SHA1',
|
||||
'oauth_timestamp': int(time.time()),
|
||||
'oauth_version': '1.0',
|
||||
})
|
||||
query = [
|
||||
('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
|
||||
('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])),
|
||||
('oauth_signature_method', 'HMAC-SHA1'),
|
||||
('oauth_timestamp', int(time.time())),
|
||||
]
|
||||
if self._TOKEN:
|
||||
query.append(('oauth_token', self._TOKEN))
|
||||
encoded_query = compat_urllib_parse_urlencode(query)
|
||||
headers = self.geo_verification_headers()
|
||||
if data:
|
||||
data = json.dumps(data).encode()
|
||||
headers['Content-Type'] = 'application/json'
|
||||
method = 'POST' if data else 'GET'
|
||||
base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
|
||||
base_string = '&'.join([
|
||||
'POST' if data else 'GET',
|
||||
compat_urllib_parse.quote(base_url, ''),
|
||||
compat_urllib_parse.quote(encoded_query, '')])
|
||||
oauth_signature = base64.b64encode(hmac.new(
|
||||
(self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
|
||||
(self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
|
||||
base_string.encode(), hashlib.sha1).digest()).decode()
|
||||
encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
|
||||
return self._download_json(
|
||||
'?'.join([base_url, encoded_query]), video_id,
|
||||
note='Downloading %s JSON metadata' % note, headers=headers, data=data)
|
||||
try:
|
||||
return self._download_json(
|
||||
'?'.join([base_url, encoded_query]), video_id,
|
||||
note='Downloading %s JSON metadata' % note, headers=headers, data=data)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
|
||||
raise
|
||||
|
||||
def _call_cms(self, path, video_id, note):
|
||||
if not self._CMS_SIGNING:
|
||||
@@ -55,19 +69,22 @@ class VRVBaseIE(InfoExtractor):
|
||||
self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
|
||||
note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
|
||||
|
||||
def _set_api_params(self, webpage, video_id):
|
||||
if not self._API_PARAMS:
|
||||
self._API_PARAMS = self._parse_json(self._search_regex(
|
||||
r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
|
||||
webpage, 'api config'), video_id)['cxApiParams']
|
||||
self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
|
||||
|
||||
def _get_cms_resource(self, resource_key, video_id):
|
||||
return self._call_api(
|
||||
'cms_resource', video_id, 'resource path', data={
|
||||
'resource_key': resource_key,
|
||||
})['__links__']['cms_resource']['href']
|
||||
|
||||
def _real_initialize(self):
|
||||
webpage = self._download_webpage(
|
||||
'https://vrv.co/', None, headers=self.geo_verification_headers())
|
||||
self._API_PARAMS = self._parse_json(self._search_regex(
|
||||
[
|
||||
r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)',
|
||||
r'window\.__APP_CONFIG__\s*=\s*({.+})'
|
||||
], webpage, 'app config'), None)['cxApiParams']
|
||||
self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
|
||||
|
||||
|
||||
class VRVIE(VRVBaseIE):
|
||||
IE_NAME = 'vrv'
|
||||
@@ -86,6 +103,22 @@ class VRVIE(VRVBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrv'
|
||||
|
||||
def _real_initialize(self):
|
||||
super(VRVIE, self)._real_initialize()
|
||||
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
|
||||
token_credentials = self._call_api(
|
||||
'authenticate/by:credentials', None, 'Token Credentials', data={
|
||||
'email': email,
|
||||
'password': password,
|
||||
})
|
||||
self._TOKEN = token_credentials['oauth_token']
|
||||
self._TOKEN_SECRET = token_credentials['oauth_token_secret']
|
||||
|
||||
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
|
||||
if not url or stream_format not in ('hls', 'dash'):
|
||||
@@ -116,26 +149,16 @@ class VRVIE(VRVBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
media_resource = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
|
||||
webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
|
||||
|
||||
video_data = media_resource.get('json')
|
||||
if not video_data:
|
||||
self._set_api_params(webpage, video_id)
|
||||
episode_path = self._get_cms_resource(
|
||||
'cms:/episodes/' + video_id, video_id)
|
||||
video_data = self._call_cms(episode_path, video_id, 'video')
|
||||
episode_path = self._get_cms_resource(
|
||||
'cms:/episodes/' + video_id, video_id)
|
||||
video_data = self._call_cms(episode_path, video_id, 'video')
|
||||
title = video_data['title']
|
||||
|
||||
streams_json = media_resource.get('streams', {}).get('json', {})
|
||||
if not streams_json:
|
||||
self._set_api_params(webpage, video_id)
|
||||
streams_path = video_data['__links__']['streams']['href']
|
||||
streams_json = self._call_cms(streams_path, video_id, 'streams')
|
||||
streams_path = video_data['__links__'].get('streams', {}).get('href')
|
||||
if not streams_path:
|
||||
self.raise_login_required()
|
||||
streams_json = self._call_cms(streams_path, video_id, 'streams')
|
||||
|
||||
audio_locale = streams_json.get('audio_locale')
|
||||
formats = []
|
||||
@@ -200,11 +223,7 @@ class VRVSeriesIE(VRVBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, series_id,
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
self._set_api_params(webpage, series_id)
|
||||
seasons_path = self._get_cms_resource(
|
||||
'cms:/seasons?series_id=' + series_id, series_id)
|
||||
seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
|
||||
|
@@ -40,11 +40,7 @@ class WimpIE(InfoExtractor):
|
||||
r'data-id=["\']([0-9A-Za-z_-]{11})'),
|
||||
webpage, 'video URL', default=None)
|
||||
if youtube_id:
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_id,
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
}
|
||||
return self.url_result(youtube_id, YoutubeIE.ie_key())
|
||||
|
||||
info_dict = self._extract_jwplayer_data(
|
||||
webpage, video_id, require_title=False)
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class WistiaIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/iframe/)(?P<id>[a-z0-9]+)'
|
||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]+)'
|
||||
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
|
||||
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
|
||||
|
||||
@@ -38,6 +38,9 @@ class WistiaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -45,7 +45,7 @@ class XVideosIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.xvideos.com/video%s/' % video_id, video_id)
|
||||
'https://www.xvideos.com/video%s/' % video_id, video_id)
|
||||
|
||||
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
||||
if mobj:
|
||||
|
@@ -68,11 +68,9 @@ class YouPornIE(InfoExtractor):
|
||||
request.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(request, display_id)
|
||||
|
||||
title = self._search_regex(
|
||||
[r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'],
|
||||
webpage, 'title', group='title',
|
||||
default=None) or self._og_search_title(
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'title', webpage, fatal=True)
|
||||
|
||||
@@ -134,7 +132,11 @@ class YouPornIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'description',
|
||||
default=None) or self._og_search_description(
|
||||
webpage, default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
|
@@ -14,6 +14,7 @@ class YourPornIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c9f43630bd968267672651ba905a7d35',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18
|
||||
},
|
||||
}
|
||||
|
||||
@@ -26,7 +27,7 @@ class YourPornIE(InfoExtractor):
|
||||
self._search_regex(
|
||||
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
|
||||
group='data'),
|
||||
video_id)[video_id])
|
||||
video_id)[video_id]).replace('/cdn/', '/cdn3/')
|
||||
|
||||
title = (self._search_regex(
|
||||
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
|
||||
@@ -38,4 +39,5 @@ class YourPornIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -48,6 +48,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
uppercase_escape,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -497,7 +498,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
|
||||
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
|
||||
'upload_date': '20121002',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
@@ -526,7 +526,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Icona Pop',
|
||||
'uploader_id': 'IconaPop',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
|
||||
'license': 'Standard YouTube License',
|
||||
'creator': 'Icona Pop',
|
||||
'track': 'I Love It (feat. Charli XCX)',
|
||||
'artist': 'Icona Pop',
|
||||
@@ -539,14 +538,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'id': '07FYdnEawAQ',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130703',
|
||||
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
|
||||
'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
|
||||
'alt_title': 'Tunnel Vision',
|
||||
'description': 'md5:64249768eec3bc4276236606ea996373',
|
||||
'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
|
||||
'duration': 419,
|
||||
'uploader': 'justintimberlakeVEVO',
|
||||
'uploader_id': 'justintimberlakeVEVO',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
|
||||
'license': 'Standard YouTube License',
|
||||
'creator': 'Justin Timberlake',
|
||||
'track': 'Tunnel Vision',
|
||||
'artist': 'Justin Timberlake',
|
||||
@@ -565,7 +563,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'SET India',
|
||||
'uploader_id': 'setindia',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
|
||||
'license': 'Standard YouTube License',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
@@ -580,7 +577,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': 'phihag',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
|
||||
'upload_date': '20121002',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
@@ -604,7 +600,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
|
||||
'description': '',
|
||||
'uploader': '8KVIDEO',
|
||||
'license': 'Standard YouTube License',
|
||||
'title': 'UHDTV TEST 8K VIDEO.mp4'
|
||||
},
|
||||
'params': {
|
||||
@@ -619,13 +614,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'IB3lcPjvWLA',
|
||||
'ext': 'm4a',
|
||||
'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
|
||||
'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
|
||||
'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
|
||||
'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
|
||||
'duration': 244,
|
||||
'uploader': 'AfrojackVEVO',
|
||||
'uploader_id': 'AfrojackVEVO',
|
||||
'upload_date': '20131011',
|
||||
'license': 'Standard YouTube License',
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
@@ -639,13 +633,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'id': 'nfWlot6h_JM',
|
||||
'ext': 'm4a',
|
||||
'title': 'Taylor Swift - Shake It Off',
|
||||
'alt_title': 'Shake It Off',
|
||||
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
|
||||
'description': 'md5:bec2185232c05479482cb5a9b82719bf',
|
||||
'duration': 242,
|
||||
'uploader': 'TaylorSwiftVEVO',
|
||||
'uploader_id': 'TaylorSwiftVEVO',
|
||||
'upload_date': '20140818',
|
||||
'license': 'Standard YouTube License',
|
||||
'creator': 'Taylor Swift',
|
||||
},
|
||||
'params': {
|
||||
@@ -661,10 +653,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'duration': 219,
|
||||
'upload_date': '20100909',
|
||||
'uploader': 'TJ Kirk',
|
||||
'uploader': 'Amazing Atheist',
|
||||
'uploader_id': 'TheAmazingAtheist',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
|
||||
'license': 'Standard YouTube License',
|
||||
'title': 'Burning Everyone\'s Koran',
|
||||
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
|
||||
}
|
||||
@@ -682,7 +673,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': 'WitcherGame',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
|
||||
'upload_date': '20140605',
|
||||
'license': 'Standard YouTube License',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
@@ -691,7 +681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
|
||||
'info_dict': {
|
||||
'id': '6kLq3WMV1nU',
|
||||
'ext': 'webm',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
|
||||
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
|
||||
'duration': 246,
|
||||
@@ -699,7 +689,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': 'LloydVEVO',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
|
||||
'upload_date': '20110629',
|
||||
'license': 'Standard YouTube License',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
@@ -717,7 +706,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'creator': 'deadmau5',
|
||||
'description': 'md5:12c56784b8032162bb936a5f76d55360',
|
||||
'uploader': 'deadmau5',
|
||||
'license': 'Standard YouTube License',
|
||||
'title': 'Deadmau5 - Some Chords (HD)',
|
||||
'alt_title': 'Some Chords',
|
||||
},
|
||||
@@ -735,7 +723,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'upload_date': '20150827',
|
||||
'uploader_id': 'olympic',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
||||
'uploader': 'Olympic',
|
||||
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
|
||||
@@ -757,7 +744,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
|
||||
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
|
||||
'uploader': '孫ᄋᄅ',
|
||||
'license': 'Standard YouTube License',
|
||||
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
|
||||
},
|
||||
},
|
||||
@@ -791,7 +777,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': 'dorappi2000',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
|
||||
'uploader': 'dorappi2000',
|
||||
'license': 'Standard YouTube License',
|
||||
'formats': 'mincount:31',
|
||||
},
|
||||
'skip': 'not actual anymore',
|
||||
@@ -807,7 +792,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Airtek',
|
||||
'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
|
||||
'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
|
||||
'license': 'Standard YouTube License',
|
||||
'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
|
||||
},
|
||||
'params': {
|
||||
@@ -880,6 +864,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is not available.',
|
||||
},
|
||||
{
|
||||
# Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
|
||||
@@ -916,7 +901,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': 'IronSoulElf',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
|
||||
'uploader': 'IronSoulElf',
|
||||
'license': 'Standard YouTube License',
|
||||
'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
|
||||
'track': 'Dark Walk - Position Music',
|
||||
'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
|
||||
@@ -1020,13 +1004,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'id': 'iqKdEhx-dD4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Isolation - Mind Field (Ep 1)',
|
||||
'description': 'md5:25b78d2f64ae81719f5c96319889b736',
|
||||
'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
|
||||
'duration': 2085,
|
||||
'upload_date': '20170118',
|
||||
'uploader': 'Vsauce',
|
||||
'uploader_id': 'Vsauce',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
|
||||
'license': 'Standard YouTube License',
|
||||
'series': 'Mind Field',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
@@ -1052,7 +1035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'New Century Foundation',
|
||||
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
|
||||
'license': 'Standard YouTube License',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -1076,6 +1058,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# DRM protected
|
||||
'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# Video with unsupported adaptive stream type formats
|
||||
'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
|
||||
'info_dict': {
|
||||
'id': 'Z4Vy8R84T1U',
|
||||
'ext': 'mp4',
|
||||
'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'duration': 433,
|
||||
'upload_date': '20130923',
|
||||
'uploader': 'Amelia Putri Harwita',
|
||||
'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
|
||||
'formats': 'maxcount:10',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'youtube_include_dash_manifest': False,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -1104,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
id_m = re.match(
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
if not id_m:
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
@@ -1191,8 +1198,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
funcname = self._search_regex(
|
||||
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
jscode, 'Initial JS player signature function name', group='sig')
|
||||
|
||||
@@ -1386,8 +1393,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
def _mark_watched(self, video_id, video_info):
|
||||
playback_url = video_info.get('videostats_playback_base_url', [None])[0]
|
||||
def _mark_watched(self, video_id, video_info, player_response):
|
||||
playback_url = url_or_none(try_get(
|
||||
player_response,
|
||||
lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
|
||||
video_info, lambda x: x['videostats_playback_base_url'][0]))
|
||||
if not playback_url:
|
||||
return
|
||||
parsed_playback_url = compat_urlparse.urlparse(playback_url)
|
||||
@@ -1536,6 +1546,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if dash_mpd and dash_mpd[0] not in dash_mpds:
|
||||
dash_mpds.append(dash_mpd[0])
|
||||
|
||||
def add_dash_mpd_pr(pl_response):
|
||||
dash_mpd = url_or_none(try_get(
|
||||
pl_response, lambda x: x['streamingData']['dashManifestUrl'],
|
||||
compat_str))
|
||||
if dash_mpd and dash_mpd not in dash_mpds:
|
||||
dash_mpds.append(dash_mpd)
|
||||
|
||||
is_live = None
|
||||
view_count = None
|
||||
|
||||
@@ -1593,6 +1610,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if isinstance(pl_response, dict):
|
||||
player_response = pl_response
|
||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
add_dash_mpd_pr(player_response)
|
||||
# We also try looking in get_video_info since it may contain different dashmpd
|
||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||
# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
|
||||
@@ -1624,6 +1642,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
pl_response = get_video_info.get('player_response', [None])[0]
|
||||
if isinstance(pl_response, dict):
|
||||
player_response = pl_response
|
||||
add_dash_mpd_pr(player_response)
|
||||
add_dash_mpd(get_video_info)
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(get_video_info)
|
||||
@@ -1669,6 +1688,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'"token" parameter not in video info for unknown reason',
|
||||
video_id=video_id)
|
||||
|
||||
if video_info.get('license_info'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
video_details = try_get(
|
||||
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||
|
||||
@@ -1712,30 +1734,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
video_description = ''
|
||||
|
||||
if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
|
||||
if not smuggled_data.get('force_singlefeed', False):
|
||||
if not self._downloader.params.get('noplaylist'):
|
||||
entries = []
|
||||
feed_ids = []
|
||||
multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
|
||||
for feed in multifeed_metadata_list.split(','):
|
||||
# Unquote should take place before split on comma (,) since textual
|
||||
# fields may contain comma as well (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8536)
|
||||
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Youtube',
|
||||
'url': smuggle_url(
|
||||
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
||||
{'force_singlefeed': True}),
|
||||
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
|
||||
})
|
||||
feed_ids.append(feed_data['id'][0])
|
||||
self.to_screen(
|
||||
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
||||
% (', '.join(feed_ids), video_id))
|
||||
return self.playlist_result(entries, video_id, video_title, video_description)
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
multifeed_metadata_list = try_get(
|
||||
player_response,
|
||||
lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
|
||||
compat_str) or try_get(
|
||||
video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
|
||||
if multifeed_metadata_list:
|
||||
entries = []
|
||||
feed_ids = []
|
||||
for feed in multifeed_metadata_list.split(','):
|
||||
# Unquote should take place before split on comma (,) since textual
|
||||
# fields may contain comma as well (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8536)
|
||||
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Youtube',
|
||||
'url': smuggle_url(
|
||||
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
||||
{'force_singlefeed': True}),
|
||||
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
|
||||
})
|
||||
feed_ids.append(feed_data['id'][0])
|
||||
self.to_screen(
|
||||
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
||||
% (', '.join(feed_ids), video_id))
|
||||
return self.playlist_result(entries, video_id, video_title, video_description)
|
||||
else:
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(video_info)
|
||||
@@ -1776,11 +1804,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'height': int_or_none(width_height[1]),
|
||||
}
|
||||
q = qualities(['small', 'medium', 'hd720'])
|
||||
streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
|
||||
if streaming_formats:
|
||||
for fmt in streaming_formats:
|
||||
itag = str_or_none(fmt.get('itag'))
|
||||
if not itag:
|
||||
continue
|
||||
quality = fmt.get('quality')
|
||||
quality_label = fmt.get('qualityLabel') or quality
|
||||
formats_spec[itag] = {
|
||||
'asr': int_or_none(fmt.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt.get('contentLength')),
|
||||
'format_note': quality_label,
|
||||
'fps': int_or_none(fmt.get('fps')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'quality': q(quality),
|
||||
# bitrate for itag 43 is always 2147483647
|
||||
'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
}
|
||||
formats = []
|
||||
for url_data_str in encoded_url_map.split(','):
|
||||
url_data = compat_parse_qs(url_data_str)
|
||||
if 'itag' not in url_data or 'url' not in url_data:
|
||||
continue
|
||||
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
|
||||
# Unsupported FORMAT_STREAM_TYPE_OTF
|
||||
if stream_type == 3:
|
||||
continue
|
||||
format_id = url_data['itag'][0]
|
||||
url = url_data['url'][0]
|
||||
|
||||
@@ -1824,7 +1875,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||
r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
|
||||
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
@@ -1858,7 +1909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
filesize = int_or_none(url_data.get(
|
||||
'clen', [None])[0]) or _extract_filesize(url)
|
||||
|
||||
quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
|
||||
quality = url_data.get('quality', [None])[0]
|
||||
|
||||
more_fields = {
|
||||
'filesize': filesize,
|
||||
@@ -1866,7 +1917,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'width': width,
|
||||
'height': height,
|
||||
'fps': int_or_none(url_data.get('fps', [None])[0]),
|
||||
'format_note': quality,
|
||||
'format_note': url_data.get('quality_label', [None])[0] or quality,
|
||||
'quality': q(quality),
|
||||
}
|
||||
for key, value in more_fields.items():
|
||||
@@ -1894,31 +1945,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'http_chunk_size': 10485760,
|
||||
}
|
||||
formats.append(dct)
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
formats = []
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', fatal=False)
|
||||
for a_format in m3u8_formats:
|
||||
itag = self._search_regex(
|
||||
r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
|
||||
if itag:
|
||||
a_format['format_id'] = itag
|
||||
if itag in self._formats:
|
||||
dct = self._formats[itag].copy()
|
||||
dct.update(a_format)
|
||||
a_format = dct
|
||||
a_format['player_url'] = player_url
|
||||
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
|
||||
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
|
||||
formats.append(a_format)
|
||||
else:
|
||||
error_message = clean_html(video_info.get('reason', [None])[0])
|
||||
if not error_message:
|
||||
error_message = extract_unavailable_message()
|
||||
if error_message:
|
||||
raise ExtractorError(error_message, expected=True)
|
||||
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
manifest_url = (
|
||||
url_or_none(try_get(
|
||||
player_response,
|
||||
lambda x: x['streamingData']['hlsManifestUrl'],
|
||||
compat_str)) or
|
||||
url_or_none(try_get(
|
||||
video_info, lambda x: x['hlsvp'][0], compat_str)))
|
||||
if manifest_url:
|
||||
formats = []
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', fatal=False)
|
||||
for a_format in m3u8_formats:
|
||||
itag = self._search_regex(
|
||||
r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
|
||||
if itag:
|
||||
a_format['format_id'] = itag
|
||||
if itag in self._formats:
|
||||
dct = self._formats[itag].copy()
|
||||
dct.update(a_format)
|
||||
a_format = dct
|
||||
a_format['player_url'] = player_url
|
||||
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
|
||||
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
|
||||
formats.append(a_format)
|
||||
else:
|
||||
error_message = clean_html(video_info.get('reason', [None])[0])
|
||||
if not error_message:
|
||||
error_message = extract_unavailable_message()
|
||||
if error_message:
|
||||
raise ExtractorError(error_message, expected=True)
|
||||
raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
# uploader
|
||||
video_uploader = try_get(
|
||||
@@ -2006,7 +2064,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
||||
video_webpage)
|
||||
if m_episode:
|
||||
series = m_episode.group('series')
|
||||
series = unescapeHTML(m_episode.group('series'))
|
||||
season_number = int(m_episode.group('season'))
|
||||
episode_number = int(m_episode.group('episode'))
|
||||
else:
|
||||
@@ -2116,7 +2174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
self.mark_watched(video_id, video_info)
|
||||
self.mark_watched(video_id, video_info, player_response)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -79,6 +79,20 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
||||
prefer_ffmpeg = True
|
||||
|
||||
def get_ffmpeg_version(path):
|
||||
ver = get_exe_version(path, args=['-version'])
|
||||
if ver:
|
||||
regexs = [
|
||||
r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
|
||||
r'n([0-9.]+)$', # Arch Linux
|
||||
# 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
|
||||
]
|
||||
for regex in regexs:
|
||||
mobj = re.match(regex, ver)
|
||||
if mobj:
|
||||
ver = mobj.group(1)
|
||||
return ver
|
||||
|
||||
self.basename = None
|
||||
self.probe_basename = None
|
||||
|
||||
@@ -110,11 +124,10 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
self._paths = dict(
|
||||
(p, os.path.join(location, p)) for p in programs)
|
||||
self._versions = dict(
|
||||
(p, get_exe_version(self._paths[p], args=['-version']))
|
||||
for p in programs)
|
||||
(p, get_ffmpeg_version(self._paths[p])) for p in programs)
|
||||
if self._versions is None:
|
||||
self._versions = dict(
|
||||
(p, get_exe_version(p, args=['-version'])) for p in programs)
|
||||
(p, get_ffmpeg_version(p)) for p in programs)
|
||||
self._paths = dict((p, p) for p in programs)
|
||||
|
||||
if prefer_ffmpeg is False:
|
||||
@@ -384,9 +397,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
opts += ['-c:s', 'mov_text']
|
||||
for (i, lang) in enumerate(sub_langs):
|
||||
opts.extend(['-map', '%d:0' % (i + 1)])
|
||||
lang_code = ISO639Utils.short2long(lang)
|
||||
if lang_code is not None:
|
||||
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
|
||||
lang_code = ISO639Utils.short2long(lang) or lang
|
||||
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
|
||||
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
|
||||
|
@@ -39,6 +39,7 @@ from .compat import (
|
||||
compat_HTMLParser,
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
compat_cookiejar,
|
||||
compat_ctypes_WINFUNCTYPE,
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
@@ -1139,6 +1140,33 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
||||
req, **kwargs)
|
||||
|
||||
|
||||
class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
# Store session cookies with `expires` set to 0 instead of an empty
|
||||
# string
|
||||
for cookie in self:
|
||||
if cookie.expires is None:
|
||||
cookie.expires = 0
|
||||
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
|
||||
|
||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires)
|
||||
# Session cookies are denoted by either `expires` field set to
|
||||
# an empty string or 0. MozillaCookieJar only recognizes the former
|
||||
# (see [1]). So we need force the latter to be recognized as session
|
||||
# cookies on our own.
|
||||
# Session cookies may be important for cookies-based authentication,
|
||||
# e.g. usually, when user does not check 'Remember me' check box while
|
||||
# logging in on a site, some important cookies are stored as session
|
||||
# cookies so that not recognizing them will result in failed login.
|
||||
# 1. https://bugs.python.org/issue17164
|
||||
for cookie in self:
|
||||
# Treat `expires=0` cookies as session cookies
|
||||
if cookie.expires == 0:
|
||||
cookie.expires = None
|
||||
cookie.discard = True
|
||||
|
||||
|
||||
class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
|
||||
def __init__(self, cookiejar=None):
|
||||
compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
|
||||
@@ -1840,7 +1868,7 @@ def urljoin(base, path):
|
||||
path = path.decode('utf-8')
|
||||
if not isinstance(path, compat_str) or not path:
|
||||
return None
|
||||
if re.match(r'^(?:https?:)?//', path):
|
||||
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
|
||||
return path
|
||||
if isinstance(base, bytes):
|
||||
base = base.decode('utf-8')
|
||||
@@ -2940,6 +2968,7 @@ class ISO639Utils(object):
|
||||
'gv': 'glv',
|
||||
'ha': 'hau',
|
||||
'he': 'heb',
|
||||
'iw': 'heb', # Replaced by he in 1989 revision
|
||||
'hi': 'hin',
|
||||
'ho': 'hmo',
|
||||
'hr': 'hrv',
|
||||
@@ -2949,6 +2978,7 @@ class ISO639Utils(object):
|
||||
'hz': 'her',
|
||||
'ia': 'ina',
|
||||
'id': 'ind',
|
||||
'in': 'ind', # Replaced by id in 1989 revision
|
||||
'ie': 'ile',
|
||||
'ig': 'ibo',
|
||||
'ii': 'iii',
|
||||
@@ -3063,6 +3093,7 @@ class ISO639Utils(object):
|
||||
'wo': 'wol',
|
||||
'xh': 'xho',
|
||||
'yi': 'yid',
|
||||
'ji': 'yid', # Replaced by yi in 1989 revision
|
||||
'yo': 'yor',
|
||||
'za': 'zha',
|
||||
'zh': 'zho',
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.12.03'
|
||||
__version__ = '2019.01.24'
|
||||
|
Reference in New Issue
Block a user