mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-20 23:28:37 +09:00
Compare commits
235 Commits
2018.12.09
...
2019.03.09
Author | SHA1 | Date | |
---|---|---|---|
![]() |
10734553fe | ||
![]() |
bba35695eb | ||
![]() |
9d74ea6d36 | ||
![]() |
7b6e760870 | ||
![]() |
829685b88a | ||
![]() |
fca9baf0da | ||
![]() |
d347b52b63 | ||
![]() |
97157c692c | ||
![]() |
a551768acf | ||
![]() |
ee0ba927aa | ||
![]() |
399f76870d | ||
![]() |
e5ada4f3ad | ||
![]() |
bb6f112d9d | ||
![]() |
c17eb5b4b0 | ||
![]() |
d9eb580a79 | ||
![]() |
5dcd630dca | ||
![]() |
c790e93ab5 | ||
![]() |
39c780fdec | ||
![]() |
e7e62441cd | ||
![]() |
0a5baf9c21 | ||
![]() |
8ae113ca9d | ||
![]() |
7aeb788e56 | ||
![]() |
7465e0aee2 | ||
![]() |
a8f83f0c56 | ||
![]() |
dca0e0040a | ||
![]() |
398e1e21d6 | ||
![]() |
c5b02efe20 | ||
![]() |
06242d44fe | ||
![]() |
04c33bdfb3 | ||
![]() |
333f617b12 | ||
![]() |
ff60ec8f02 | ||
![]() |
9d9a8676dc | ||
![]() |
db1c3a9d3f | ||
![]() |
55b8588f0e | ||
![]() |
f0228f56fb | ||
![]() |
8c80603f1a | ||
![]() |
37b239b3b6 | ||
![]() |
caf48f557a | ||
![]() |
77a842c892 | ||
![]() |
c76fc5b22a | ||
![]() |
388cfbd3d8 | ||
![]() |
d93083789b | ||
![]() |
34568dc296 | ||
![]() |
3c9647372e | ||
![]() |
659e93fcf5 | ||
![]() |
c9a0ea6e51 | ||
![]() |
d7d513891b | ||
![]() |
ae65c93a26 | ||
![]() |
ba2e3730d1 | ||
![]() |
2b2da3ba10 | ||
![]() |
794c1b6e02 | ||
![]() |
7bee705d8f | ||
![]() |
6f5c1807f4 | ||
![]() |
985637cbbf | ||
![]() |
7d8b89163c | ||
![]() |
d777f3e81c | ||
![]() |
4c0e0dc9dc | ||
![]() |
f516f44094 | ||
![]() |
e9dee7f1b2 | ||
![]() |
91effe22a0 | ||
![]() |
04eacf5453 | ||
![]() |
f1f5b47255 | ||
![]() |
1211bb6dac | ||
![]() |
4de3cb883c | ||
![]() |
22f5f5c6fc | ||
![]() |
49bd993fd9 | ||
![]() |
f06a1cabe8 | ||
![]() |
241c5d7d38 | ||
![]() |
8fecc7353d | ||
![]() |
5dda1edef9 | ||
![]() |
d2d970d07e | ||
![]() |
48fb963b2f | ||
![]() |
70c3ee1367 | ||
![]() |
07fbfef1c7 | ||
![]() |
eecf788b90 | ||
![]() |
0efcb5a2fe | ||
![]() |
7c5307f4c4 | ||
![]() |
6cc6e0c34d | ||
![]() |
b9bc1cff72 | ||
![]() |
e9fef7ee4e | ||
![]() |
b6423e6ca2 | ||
![]() |
3ef2da2d21 | ||
![]() |
49fe4175ae | ||
![]() |
9613e14a92 | ||
![]() |
15e832ff2a | ||
![]() |
645c4885cf | ||
![]() |
7b0f9df23d | ||
![]() |
c2a0fe2ea7 | ||
![]() |
ce52c7c111 | ||
![]() |
1063b4c707 | ||
![]() |
ca01e5f903 | ||
![]() |
5496754ae4 | ||
![]() |
9868f1ab18 | ||
![]() |
41cff90c41 | ||
![]() |
a2d821d711 | ||
![]() |
6df196f32e | ||
![]() |
41c2c254d3 | ||
![]() |
a81daba231 | ||
![]() |
61ff92e11e | ||
![]() |
1397a790ff | ||
![]() |
7f903dd8bf | ||
![]() |
2b3afe6b0f | ||
![]() |
e71be6ee9f | ||
![]() |
bf8ebc9cfe | ||
![]() |
1fcc91663b | ||
![]() |
30cd1a5f39 | ||
![]() |
458fd30f56 | ||
![]() |
845333acf6 | ||
![]() |
252abb1e8b | ||
![]() |
ae18d58297 | ||
![]() |
1602a240a7 | ||
![]() |
0eba178fce | ||
![]() |
eb35b163ad | ||
![]() |
118afcf52f | ||
![]() |
9713d1d1e0 | ||
![]() |
a1e171233d | ||
![]() |
7d311586ed | ||
![]() |
e118a8794f | ||
![]() |
435e382423 | ||
![]() |
0670bdd8f2 | ||
![]() |
71a1f61700 | ||
![]() |
6510a3aa97 | ||
![]() |
278d061a0c | ||
![]() |
503b604a31 | ||
![]() |
4b85f0f9db | ||
![]() |
19d6991312 | ||
![]() |
07f9febc4b | ||
![]() |
fad4ceb534 | ||
![]() |
6945b9e78f | ||
![]() |
29cfcb43da | ||
![]() |
a1a4607598 | ||
![]() |
73c19aaa9f | ||
![]() |
289ef490f7 | ||
![]() |
6ca3fa898c | ||
![]() |
31fbedc06a | ||
![]() |
15870747f0 | ||
![]() |
fc746c3fdd | ||
![]() |
4e58d9fabb | ||
![]() |
2cc779f497 | ||
![]() |
379306ef55 | ||
![]() |
f28363ad1f | ||
![]() |
2bfc1d9d68 | ||
![]() |
e2dd132f05 | ||
![]() |
79fec976b0 | ||
![]() |
29639b363d | ||
![]() |
f53cecd796 | ||
![]() |
fa4ac365f6 | ||
![]() |
bfc8eeea57 | ||
![]() |
b0d73a7456 | ||
![]() |
4fe54c128a | ||
![]() |
a16c7c033a | ||
![]() |
2f483bc1c3 | ||
![]() |
561b456e2d | ||
![]() |
929ba3997b | ||
![]() |
10026329c2 | ||
![]() |
3b983ee471 | ||
![]() |
f1ab3b7de7 | ||
![]() |
d65f6e734b | ||
![]() |
ed8db0a25c | ||
![]() |
60a899bb7e | ||
![]() |
cbdc688c41 | ||
![]() |
5caa531a1a | ||
![]() |
a64646e417 | ||
![]() |
c469e8808c | ||
![]() |
b64f6e690f | ||
![]() |
a4491dd55c | ||
![]() |
c3e543893b | ||
![]() |
432aba1c5e | ||
![]() |
7c072f00d6 | ||
![]() |
96c186e1fd | ||
![]() |
4ad159c7b0 | ||
![]() |
65615be368 | ||
![]() |
3c1089dba4 | ||
![]() |
6089ff40e7 | ||
![]() |
2543938bbe | ||
![]() |
440863ade1 | ||
![]() |
391256dc0e | ||
![]() |
06b4b90c70 | ||
![]() |
8cb5c2181a | ||
![]() |
0266854f63 | ||
![]() |
bcc334a3c6 | ||
![]() |
e9a50fba86 | ||
![]() |
04fb6928da | ||
![]() |
b7acc83550 | ||
![]() |
de0359c0af | ||
![]() |
c87f65e43d | ||
![]() |
d7c3af7a72 | ||
![]() |
aeb72b3a41 | ||
![]() |
2122d7151d | ||
![]() |
751e051557 | ||
![]() |
d226c560a6 | ||
![]() |
8437f5089f | ||
![]() |
1d803085d7 | ||
![]() |
696f4e4114 | ||
![]() |
0e713dbb11 | ||
![]() |
9b5c8751ee | ||
![]() |
d9f1123c08 | ||
![]() |
3d8eb6beb9 | ||
![]() |
38d15ba7f9 | ||
![]() |
6b688b8942 | ||
![]() |
9d9daed464 | ||
![]() |
32ac3d49ae | ||
![]() |
373941c5f0 | ||
![]() |
4e1ddc8da9 | ||
![]() |
e4d51e751e | ||
![]() |
c2dd2dc086 | ||
![]() |
140a13f5de | ||
![]() |
825cd268a3 | ||
![]() |
63529e935c | ||
![]() |
4273caf5c7 | ||
![]() |
e1a0628797 | ||
![]() |
835e45abab | ||
![]() |
904bb599be | ||
![]() |
65e29cdac3 | ||
![]() |
4ee1845454 | ||
![]() |
cfd13c4c45 | ||
![]() |
386d1fea79 | ||
![]() |
7216e9bff7 | ||
![]() |
4cee62ade0 | ||
![]() |
cbb3e4b14f | ||
![]() |
752582183a | ||
![]() |
1c82122741 | ||
![]() |
50a498a68e | ||
![]() |
252e172dea | ||
![]() |
90046d7761 | ||
![]() |
c8b3751086 | ||
![]() |
21c340b83f | ||
![]() |
c984196cf1 | ||
![]() |
7f41a598b3 | ||
![]() |
8fe104947d | ||
![]() |
0a05cfabb6 | ||
![]() |
13e17cd28e | ||
![]() |
102a4e54c5 | ||
![]() |
6e29458f24 | ||
![]() |
59c3940165 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.09**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.12.09
|
[debug] youtube-dl version 2019.03.09
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@@ -9,7 +9,6 @@ python:
|
|||||||
- "3.6"
|
- "3.6"
|
||||||
- "pypy"
|
- "pypy"
|
||||||
- "pypy3"
|
- "pypy3"
|
||||||
sudo: false
|
|
||||||
env:
|
env:
|
||||||
- YTDL_TEST_SET=core
|
- YTDL_TEST_SET=core
|
||||||
- YTDL_TEST_SET=download
|
- YTDL_TEST_SET=download
|
||||||
|
@@ -152,16 +152,20 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
|
||||||
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
|
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
|
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/extractors.py
|
$ git add youtube_dl/extractor/extractors.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
$ git commit -m '[yourextractor] Add new extractor'
|
$ git commit -m '[yourextractor] Add new extractor'
|
||||||
$ git push origin yourextractor
|
$ git push origin yourextractor
|
||||||
|
|
||||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||||
|
|
||||||
In any case, thank you very much for your contributions!
|
In any case, thank you very much for your contributions!
|
||||||
|
|
||||||
@@ -173,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
|||||||
|
|
||||||
### Mandatory and optional metafields
|
### Mandatory and optional metafields
|
||||||
|
|
||||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||||
|
|
||||||
- `id` (media identifier)
|
- `id` (media identifier)
|
||||||
- `title` (media title)
|
- `title` (media title)
|
||||||
@@ -181,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
|||||||
|
|
||||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||||
|
|
||||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||||
|
|
||||||
#### Example
|
#### Example
|
||||||
|
|
||||||
@@ -257,11 +261,33 @@ title = meta.get('title') or self._og_search_title(webpage)
|
|||||||
|
|
||||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||||
|
|
||||||
### Make regular expressions flexible
|
### Regular expressions
|
||||||
|
|
||||||
When using regular expressions try to write them fuzzy and flexible.
|
#### Don't capture groups you don't use
|
||||||
|
|
||||||
|
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
|
||||||
|
|
||||||
|
##### Example
|
||||||
|
|
||||||
|
Don't capture id attribute name here since you can't use it for anything anyway.
|
||||||
|
|
||||||
|
Correct:
|
||||||
|
|
||||||
|
```python
|
||||||
|
r'(?:id|ID)=(?P<id>\d+)'
|
||||||
|
```
|
||||||
|
|
||||||
|
Incorrect:
|
||||||
|
```python
|
||||||
|
r'(id|ID)=(?P<id>\d+)'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### Make regular expressions relaxed and flexible
|
||||||
|
|
||||||
|
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
|
||||||
|
|
||||||
#### Example
|
##### Example
|
||||||
|
|
||||||
Say you need to extract `title` from the following HTML code:
|
Say you need to extract `title` from the following HTML code:
|
||||||
|
|
||||||
@@ -294,7 +320,26 @@ title = self._search_regex(
|
|||||||
webpage, 'title', group='title')
|
webpage, 'title', group='title')
|
||||||
```
|
```
|
||||||
|
|
||||||
### Use safe conversion functions
|
### Long lines policy
|
||||||
|
|
||||||
|
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
|
||||||
|
|
||||||
|
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||||
|
|
||||||
|
Correct:
|
||||||
|
|
||||||
|
```python
|
||||||
|
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||||
|
```
|
||||||
|
|
||||||
|
Incorrect:
|
||||||
|
|
||||||
|
```python
|
||||||
|
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
|
||||||
|
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use convenience conversion and parsing functions
|
||||||
|
|
||||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||||
|
|
||||||
@@ -302,6 +347,8 @@ Use `url_or_none` for safe URL processing.
|
|||||||
|
|
||||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||||
|
|
||||||
|
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||||
|
|
||||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||||
|
|
||||||
#### More examples
|
#### More examples
|
||||||
|
282
ChangeLog
282
ChangeLog
@@ -1,3 +1,285 @@
|
|||||||
|
version 2019.03.09
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Use compat_etree_Element
|
||||||
|
+ [compat] Introduce compat_etree_Element
|
||||||
|
* [extractor/common] Fallback url to base URL for DASH formats
|
||||||
|
* [extractor/common] Do not fail on invalid data while parsing F4M manifest
|
||||||
|
in non fatal mode
|
||||||
|
* [extractor/common] Return MPD manifest as format's url meta field (#20242)
|
||||||
|
* [utils] Strip #HttpOnly_ prefix from cookies files (#20219)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [francetv:site] Relax video id regular expression (#20268)
|
||||||
|
* [toutv] Detect invalid login error
|
||||||
|
* [toutv] Fix authentication (#20261)
|
||||||
|
+ [urplay] Extract timestamp (#20235)
|
||||||
|
+ [openload] Add support for oload.space (#20246)
|
||||||
|
* [facebook] Improve uploader extraction (#20250)
|
||||||
|
* [bbc] Use compat_etree_Element
|
||||||
|
* [crunchyroll] Use compat_etree_Element
|
||||||
|
* [npo] Improve ISM extraction
|
||||||
|
* [rai] Improve extraction (#20253)
|
||||||
|
* [paramountnetwork] Fix mgid extraction (#20241)
|
||||||
|
* [libsyn] Improve extraction (#20229)
|
||||||
|
+ [youtube] Add more invidious instances to URL regular expression (#20228)
|
||||||
|
* [spankbang] Fix extraction (#20023)
|
||||||
|
* [espn] Extend URL regular expression (#20013)
|
||||||
|
* [sixplay] Handle videos with empty assets (#20016)
|
||||||
|
+ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.03.01
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [downloader/external] Add support for rate limit and retries for wget
|
||||||
|
* [downloader/external] Fix infinite retries for curl (#19303)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [npo] Fix extraction (#20084)
|
||||||
|
* [francetv:site] Extend video id regex (#20029, #20071)
|
||||||
|
+ [periscope] Extract width and height (#20015)
|
||||||
|
* [servus] Fix extraction (#19297)
|
||||||
|
* [bbccouk] Make subtitles non fatal (#19651)
|
||||||
|
* [metacafe] Fix family filter bypass (#19287)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.02.18
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [tvp:website] Fix and improve extraction
|
||||||
|
+ [tvp] Detect unavailable videos
|
||||||
|
* [tvp] Fix description extraction and make thumbnail optional
|
||||||
|
+ [linuxacademy] Add support for linuxacademy.com (#12207)
|
||||||
|
* [bilibili] Update keys (#19233)
|
||||||
|
* [udemy] Extend URL regular expressions (#14330, #15883)
|
||||||
|
* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
|
||||||
|
* [noovo] Fix extraction (#19230)
|
||||||
|
* [rai] Relax URL regular expression (#19232)
|
||||||
|
+ [vshare] Pass Referer to download request (#19205, #19221)
|
||||||
|
+ [openload] Add support for oload.live (#19222)
|
||||||
|
* [imgur] Use video id as title fallback (#18590)
|
||||||
|
+ [twitch] Add new source format detection approach (#19193)
|
||||||
|
* [tvplayhome] Fix video id extraction (#19190)
|
||||||
|
* [tvplayhome] Fix episode metadata extraction (#19190)
|
||||||
|
* [rutube:embed] Fix extraction (#19163)
|
||||||
|
+ [rutube:embed] Add support private videos (#19163)
|
||||||
|
+ [soundcloud] Extract more metadata
|
||||||
|
+ [trunews] Add support for trunews.com (#19153)
|
||||||
|
+ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.02.08
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Improve JSON-LD regular expression (#18058)
|
||||||
|
* [YoutubeDL] Fallback to ie_key of matching extractor while making
|
||||||
|
download archive id when no explicit ie_key is provided (#19022)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [malltv] Add support for mall.tv (#18058, #17856)
|
||||||
|
+ [spankbang:playlist] Add support for playlists (#19145)
|
||||||
|
* [spankbang] Extend URL regular expression
|
||||||
|
* [trutv] Fix extraction (#17336)
|
||||||
|
* [toutv] Fix authentication (#16398, #18700)
|
||||||
|
* [pornhub] Fix tags and categories extraction (#13720, #19135)
|
||||||
|
* [pornhd] Fix formats extraction
|
||||||
|
+ [pornhd] Extract like count (#19123, #19125)
|
||||||
|
* [radiocanada] Switch to the new media requests (#19115)
|
||||||
|
+ [teachable] Add support for courses.workitdaily.com (#18871)
|
||||||
|
- [vporn] Remove extractor (#16276)
|
||||||
|
+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
|
||||||
|
+ [drtuber] Extract duration (#19078)
|
||||||
|
* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
|
||||||
|
* [soundcloud] Update client id
|
||||||
|
* [drtv] Improve preference (#19079)
|
||||||
|
+ [openload] Add support for openload.pw and oload.pw (#18930)
|
||||||
|
+ [openload] Add support for oload.info (#19073)
|
||||||
|
* [crackle] Authorize media detail request (#16931)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.30.1
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.30
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding
|
||||||
|
subtitles (#19024, #19042)
|
||||||
|
* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061)
|
||||||
|
* [drtv] Improve extraction (#19039)
|
||||||
|
+ Add support for EncryptedUri videos
|
||||||
|
+ Extract more metadata
|
||||||
|
* Fix subtitles extraction
|
||||||
|
+ [fox] Add support for locked videos using cookies (#19060)
|
||||||
|
* [fox] Fix extraction for free videos (#19060)
|
||||||
|
+ [zattoo] Add support for tv.salt.ch (#19059)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.27
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Extract season in _json_ld
|
||||||
|
* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection
|
||||||
|
(#681)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [vice] Fix extraction for locked videos (#16248)
|
||||||
|
+ [wakanim] Detect DRM protected videos
|
||||||
|
+ [wakanim] Add support for wakanim.tv (#14374)
|
||||||
|
* [usatoday] Fix extraction for videos with custom brightcove partner id
|
||||||
|
(#18990)
|
||||||
|
* [drtv] Fix extraction (#18989)
|
||||||
|
* [nhk] Extend URL regular expression (#18968)
|
||||||
|
* [go] Fix Adobe Pass requests for Disney Now (#18901)
|
||||||
|
+ [openload] Add support for oload.club (#18969)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.24
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Fix negation for string operators in format selection (#18961)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Fix urljoin for paths with non-http(s) schemes
|
||||||
|
* [extractor/common] Improve jwplayer relative URL handling (#18892)
|
||||||
|
+ [YoutubeDL] Add negation support for string comparisons in format selection
|
||||||
|
expressions (#18600, #18805)
|
||||||
|
* [extractor/common] Improve HLS video-only format detection (#18923)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [crunchyroll] Extend URL regular expression (#18955)
|
||||||
|
* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
|
||||||
|
#17197, #18338 #18842, #18899)
|
||||||
|
+ [vrv] Add support for authentication (#14307)
|
||||||
|
* [videomore:season] Fix extraction
|
||||||
|
* [videomore] Improve extraction (#18908)
|
||||||
|
+ [tnaflix] Pass Referer in metadata request (#18925)
|
||||||
|
* [radiocanada] Relax DRM check (#18608, #18609)
|
||||||
|
* [vimeo] Fix video password verification for videos protected by
|
||||||
|
Referer HTTP header
|
||||||
|
+ [hketv] Add support for hkedcity.net (#18696)
|
||||||
|
+ [streamango] Add support for fruithosts.net (#18710)
|
||||||
|
+ [instagram] Add support for tags (#18757)
|
||||||
|
+ [odnoklassniki] Detect paid videos (#18876)
|
||||||
|
* [ted] Correct acodec for HTTP formats (#18923)
|
||||||
|
* [cartoonnetwork] Fix extraction (#15664, #17224)
|
||||||
|
* [vimeo] Fix extraction for password protected player URLs (#18889)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.17
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Extend JS player signature function name regular expressions
|
||||||
|
(#18890, #18891, #18893)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.16
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [test/helper] Add support for maxcount and count collection len checkers
|
||||||
|
* [downloader/hls] Fix uplynk ad skipping (#18824)
|
||||||
|
* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Skip unsupported adaptive stream type (#18804)
|
||||||
|
+ [youtube] Extract DASH formats from player response (#18804)
|
||||||
|
* [funimation] Fix extraction (#14089)
|
||||||
|
* [skylinewebcams] Fix extraction (#18853)
|
||||||
|
+ [curiositystream] Add support for non app URLs
|
||||||
|
+ [bitchute] Check formats (#18833)
|
||||||
|
* [wistia] Extend URL regular expression (#18823)
|
||||||
|
+ [playplustv] Add support for playplus.com (#18789)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.10
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Use episode name as title in _json_ld
|
||||||
|
+ [extractor/common] Add support for movies in _json_ld
|
||||||
|
* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
|
||||||
|
(#18765)
|
||||||
|
+ [utils] Add language codes replaced in 1989 revision of ISO 639
|
||||||
|
to ISO639Utils (#18765)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Extract live HLS URL from player response (#18799)
|
||||||
|
+ [outsidetv] Add support for outsidetv.com (#18774)
|
||||||
|
* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
|
||||||
|
+ [fox] Add support National Geographic (#17985, #15333, #14698)
|
||||||
|
+ [playplustv] Add support for playplus.tv (#18789)
|
||||||
|
* [globo] Set GLBID cookie manually (#17346)
|
||||||
|
+ [gaia] Add support for gaia.com (#14605)
|
||||||
|
* [youporn] Fix title and description extraction (#18748)
|
||||||
|
+ [hungama] Add support for hungama.com (#17402, #18771)
|
||||||
|
* [dtube] Fix extraction (#18741)
|
||||||
|
* [tvnow] Fix and rework extractors and prepare for a switch to the new API
|
||||||
|
(#17245, #18499)
|
||||||
|
* [carambatv:page] Fix extraction (#18739)
|
||||||
|
|
||||||
|
|
||||||
|
version 2019.01.02
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [discovery] Use geo verification headers (#17838)
|
||||||
|
+ [packtpub] Add support for subscription.packtpub.com (#18718)
|
||||||
|
* [yourporn] Fix extraction (#18583)
|
||||||
|
+ [acast:channel] Add support for play.acast.com (#18587)
|
||||||
|
+ [extractors] Add missing age limits (#18621)
|
||||||
|
+ [rmcdecouverte] Add support for live stream
|
||||||
|
* [rmcdecouverte] Bypass geo restriction
|
||||||
|
* [rmcdecouverte] Update URL regular expression (#18595, 18697)
|
||||||
|
* [manyvids] Fix extraction (#18604, #18614)
|
||||||
|
* [bitchute] Fix extraction (#18567)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.12.31
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [bbc] Add support for another embed pattern (#18643)
|
||||||
|
+ [npo:live] Add support for npostart.nl (#18644)
|
||||||
|
* [beeg] Fix extraction (#18610, #18626)
|
||||||
|
* [youtube] Unescape HTML for series (#18641)
|
||||||
|
+ [youtube] Extract more format metadata
|
||||||
|
* [youtube] Detect DRM protected videos (#1774)
|
||||||
|
* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
|
||||||
|
* [youtube] Extend HTML5 player regular expression (#17516)
|
||||||
|
+ [liveleak] Add support for another embed type and restore original
|
||||||
|
format extraction
|
||||||
|
+ [crackle] Extract ISM and HTTP formats
|
||||||
|
+ [twitter] Pass Referer with card request (#18579)
|
||||||
|
* [mediasite] Extend URL regular expression (#18558)
|
||||||
|
+ [lecturio] Add support for lecturio.de (#18562)
|
||||||
|
+ [discovery] Add support for Scripps Networks watch domains (#17947)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.12.17
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [ard:beta] Improve geo restricted videos extraction
|
||||||
|
* [ard:beta] Fix subtitles extraction
|
||||||
|
* [ard:beta] Improve extraction robustness
|
||||||
|
* [ard:beta] Relax URL regular expression (#18441)
|
||||||
|
* [acast] Add support for embed.acast.com and play.acast.com (#18483)
|
||||||
|
* [iprima] Relax URL regular expression (#18515, #18540)
|
||||||
|
* [vrv] Fix initial state extraction (#18553)
|
||||||
|
* [youtube] Fix mark watched (#18546)
|
||||||
|
+ [safari] Add support for learning.oreilly.com (#18510)
|
||||||
|
* [youtube] Fix multifeed extraction (#18531)
|
||||||
|
* [lecturio] Improve subtitles extraction (#18488)
|
||||||
|
* [uol] Fix format URL extraction (#18480)
|
||||||
|
+ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
|
||||||
|
|
||||||
|
|
||||||
version 2018.12.09
|
version 2018.12.09
|
||||||
|
|
||||||
Core
|
Core
|
||||||
|
73
README.md
73
README.md
@@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names.
|
|||||||
|
|
||||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||||
|
|
||||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
|
||||||
|
|
||||||
- `id` (string): Video identifier
|
- `id` (string): Video identifier
|
||||||
- `title` (string): Video title
|
- `title` (string): Video title
|
||||||
@@ -667,7 +667,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
|
|||||||
- `asr`: Audio sampling rate in Hertz
|
- `asr`: Audio sampling rate in Hertz
|
||||||
- `fps`: Frame rate
|
- `fps`: Frame rate
|
||||||
|
|
||||||
Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||||
- `ext`: File extension
|
- `ext`: File extension
|
||||||
- `acodec`: Name of the audio codec in use
|
- `acodec`: Name of the audio codec in use
|
||||||
- `vcodec`: Name of the video codec in use
|
- `vcodec`: Name of the video codec in use
|
||||||
@@ -675,6 +675,8 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
|||||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||||
- `format_id`: A short description of the format
|
- `format_id`: A short description of the format
|
||||||
|
|
||||||
|
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||||
|
|
||||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||||
|
|
||||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
|
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
|
||||||
@@ -1024,16 +1026,20 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
|
||||||
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
|
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
|
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/extractors.py
|
$ git add youtube_dl/extractor/extractors.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
$ git commit -m '[yourextractor] Add new extractor'
|
$ git commit -m '[yourextractor] Add new extractor'
|
||||||
$ git push origin yourextractor
|
$ git push origin yourextractor
|
||||||
|
|
||||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||||
|
|
||||||
In any case, thank you very much for your contributions!
|
In any case, thank you very much for your contributions!
|
||||||
|
|
||||||
@@ -1045,7 +1051,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
|||||||
|
|
||||||
### Mandatory and optional metafields
|
### Mandatory and optional metafields
|
||||||
|
|
||||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||||
|
|
||||||
- `id` (media identifier)
|
- `id` (media identifier)
|
||||||
- `title` (media title)
|
- `title` (media title)
|
||||||
@@ -1053,7 +1059,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
|||||||
|
|
||||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||||
|
|
||||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||||
|
|
||||||
#### Example
|
#### Example
|
||||||
|
|
||||||
@@ -1129,11 +1135,33 @@ title = meta.get('title') or self._og_search_title(webpage)
|
|||||||
|
|
||||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||||
|
|
||||||
### Make regular expressions flexible
|
### Regular expressions
|
||||||
|
|
||||||
When using regular expressions try to write them fuzzy and flexible.
|
#### Don't capture groups you don't use
|
||||||
|
|
||||||
|
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
|
||||||
|
|
||||||
|
##### Example
|
||||||
|
|
||||||
|
Don't capture id attribute name here since you can't use it for anything anyway.
|
||||||
|
|
||||||
|
Correct:
|
||||||
|
|
||||||
|
```python
|
||||||
|
r'(?:id|ID)=(?P<id>\d+)'
|
||||||
|
```
|
||||||
|
|
||||||
|
Incorrect:
|
||||||
|
```python
|
||||||
|
r'(id|ID)=(?P<id>\d+)'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### Make regular expressions relaxed and flexible
|
||||||
|
|
||||||
|
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
|
||||||
|
|
||||||
#### Example
|
##### Example
|
||||||
|
|
||||||
Say you need to extract `title` from the following HTML code:
|
Say you need to extract `title` from the following HTML code:
|
||||||
|
|
||||||
@@ -1166,7 +1194,26 @@ title = self._search_regex(
|
|||||||
webpage, 'title', group='title')
|
webpage, 'title', group='title')
|
||||||
```
|
```
|
||||||
|
|
||||||
### Use safe conversion functions
|
### Long lines policy
|
||||||
|
|
||||||
|
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
|
||||||
|
|
||||||
|
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||||
|
|
||||||
|
Correct:
|
||||||
|
|
||||||
|
```python
|
||||||
|
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||||
|
```
|
||||||
|
|
||||||
|
Incorrect:
|
||||||
|
|
||||||
|
```python
|
||||||
|
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
|
||||||
|
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use convenience conversion and parsing functions
|
||||||
|
|
||||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||||
|
|
||||||
@@ -1174,6 +1221,8 @@ Use `url_or_none` for safe URL processing.
|
|||||||
|
|
||||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||||
|
|
||||||
|
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||||
|
|
||||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||||
|
|
||||||
#### More examples
|
#### More examples
|
||||||
|
@@ -320,6 +320,7 @@
|
|||||||
- **Fusion**
|
- **Fusion**
|
||||||
- **Fux**
|
- **Fux**
|
||||||
- **FXNetworks**
|
- **FXNetworks**
|
||||||
|
- **Gaia**
|
||||||
- **GameInformer**
|
- **GameInformer**
|
||||||
- **GameOne**
|
- **GameOne**
|
||||||
- **gameone:playlist**
|
- **gameone:playlist**
|
||||||
@@ -360,6 +361,7 @@
|
|||||||
- **hitbox**
|
- **hitbox**
|
||||||
- **hitbox:live**
|
- **hitbox:live**
|
||||||
- **HitRecord**
|
- **HitRecord**
|
||||||
|
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
|
||||||
- **HornBunny**
|
- **HornBunny**
|
||||||
- **HotNewHipHop**
|
- **HotNewHipHop**
|
||||||
- **hotstar**
|
- **hotstar**
|
||||||
@@ -370,6 +372,8 @@
|
|||||||
- **HRTiPlaylist**
|
- **HRTiPlaylist**
|
||||||
- **Huajiao**: 花椒直播
|
- **Huajiao**: 花椒直播
|
||||||
- **HuffPost**: Huffington Post
|
- **HuffPost**: Huffington Post
|
||||||
|
- **Hungama**
|
||||||
|
- **HungamaSong**
|
||||||
- **Hypem**
|
- **Hypem**
|
||||||
- **Iconosquare**
|
- **Iconosquare**
|
||||||
- **ign.com**
|
- **ign.com**
|
||||||
@@ -383,6 +387,7 @@
|
|||||||
- **IndavideoEmbed**
|
- **IndavideoEmbed**
|
||||||
- **InfoQ**
|
- **InfoQ**
|
||||||
- **Instagram**
|
- **Instagram**
|
||||||
|
- **instagram:tag**: Instagram hashtag search
|
||||||
- **instagram:user**: Instagram user profile
|
- **instagram:user**: Instagram user profile
|
||||||
- **Internazionale**
|
- **Internazionale**
|
||||||
- **InternetVideoArchive**
|
- **InternetVideoArchive**
|
||||||
@@ -438,6 +443,7 @@
|
|||||||
- **Lecture2Go**
|
- **Lecture2Go**
|
||||||
- **Lecturio**
|
- **Lecturio**
|
||||||
- **LecturioCourse**
|
- **LecturioCourse**
|
||||||
|
- **LecturioDeCourse**
|
||||||
- **LEGO**
|
- **LEGO**
|
||||||
- **Lemonde**
|
- **Lemonde**
|
||||||
- **Lenta**
|
- **Lenta**
|
||||||
@@ -452,6 +458,7 @@
|
|||||||
- **LineTV**
|
- **LineTV**
|
||||||
- **linkedin:learning**
|
- **linkedin:learning**
|
||||||
- **linkedin:learning:course**
|
- **linkedin:learning:course**
|
||||||
|
- **LinuxAcademy**
|
||||||
- **LiTV**
|
- **LiTV**
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **LiveLeakEmbed**
|
- **LiveLeakEmbed**
|
||||||
@@ -470,6 +477,7 @@
|
|||||||
- **mailru:music**: Музыка@Mail.Ru
|
- **mailru:music**: Музыка@Mail.Ru
|
||||||
- **mailru:music:search**: Музыка@Mail.Ru
|
- **mailru:music:search**: Музыка@Mail.Ru
|
||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
|
- **MallTV**
|
||||||
- **mangomolo:live**
|
- **mangomolo:live**
|
||||||
- **mangomolo:video**
|
- **mangomolo:video**
|
||||||
- **ManyVids**
|
- **ManyVids**
|
||||||
@@ -539,9 +547,8 @@
|
|||||||
- **MyviEmbed**
|
- **MyviEmbed**
|
||||||
- **MyVisionTV**
|
- **MyVisionTV**
|
||||||
- **n-tv.de**
|
- **n-tv.de**
|
||||||
- **natgeo**
|
|
||||||
- **natgeo:episodeguide**
|
|
||||||
- **natgeo:video**
|
- **natgeo:video**
|
||||||
|
- **NationalGeographicTV**
|
||||||
- **Naver**
|
- **Naver**
|
||||||
- **NBA**
|
- **NBA**
|
||||||
- **NBC**
|
- **NBC**
|
||||||
@@ -641,6 +648,7 @@
|
|||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
- **OsnatelTV**
|
- **OsnatelTV**
|
||||||
|
- **OutsideTV**
|
||||||
- **PacktPub**
|
- **PacktPub**
|
||||||
- **PacktPubCourse**
|
- **PacktPubCourse**
|
||||||
- **PandaTV**: 熊猫TV
|
- **PandaTV**: 熊猫TV
|
||||||
@@ -665,6 +673,7 @@
|
|||||||
- **Pinkbike**
|
- **Pinkbike**
|
||||||
- **Pladform**
|
- **Pladform**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
|
- **PlayPlusTV**
|
||||||
- **PlaysTV**
|
- **PlaysTV**
|
||||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||||
- **Playvid**
|
- **Playvid**
|
||||||
@@ -770,6 +779,7 @@
|
|||||||
- **safari:api**
|
- **safari:api**
|
||||||
- **safari:course**: safaribooksonline.com online courses
|
- **safari:course**: safaribooksonline.com online courses
|
||||||
- **SAKTV**
|
- **SAKTV**
|
||||||
|
- **SaltTV**
|
||||||
- **Sapo**: SAPO Vídeos
|
- **Sapo**: SAPO Vídeos
|
||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
- **SBS**: sbs.com.au
|
- **SBS**: sbs.com.au
|
||||||
@@ -819,6 +829,7 @@
|
|||||||
- **southpark.nl**
|
- **southpark.nl**
|
||||||
- **southparkstudios.dk**
|
- **southparkstudios.dk**
|
||||||
- **SpankBang**
|
- **SpankBang**
|
||||||
|
- **SpankBangPlaylist**
|
||||||
- **Spankwire**
|
- **Spankwire**
|
||||||
- **Spiegel**
|
- **Spiegel**
|
||||||
- **Spiegel:Article**: Articles on spiegel.de
|
- **Spiegel:Article**: Articles on spiegel.de
|
||||||
@@ -905,6 +916,7 @@
|
|||||||
- **ToypicsUser**: Toypics user profile
|
- **ToypicsUser**: Toypics user profile
|
||||||
- **TrailerAddict** (Currently broken)
|
- **TrailerAddict** (Currently broken)
|
||||||
- **Trilulilu**
|
- **Trilulilu**
|
||||||
|
- **TruNews**
|
||||||
- **TruTV**
|
- **TruTV**
|
||||||
- **Tube8**
|
- **Tube8**
|
||||||
- **TubiTv**
|
- **TubiTv**
|
||||||
@@ -933,7 +945,9 @@
|
|||||||
- **TVNet**
|
- **TVNet**
|
||||||
- **TVNoe**
|
- **TVNoe**
|
||||||
- **TVNow**
|
- **TVNow**
|
||||||
- **TVNowList**
|
- **TVNowAnnual**
|
||||||
|
- **TVNowNew**
|
||||||
|
- **TVNowSeason**
|
||||||
- **TVNowShow**
|
- **TVNowShow**
|
||||||
- **tvp**: Telewizja Polska
|
- **tvp**: Telewizja Polska
|
||||||
- **tvp:embed**: Telewizja Polska
|
- **tvp:embed**: Telewizja Polska
|
||||||
@@ -1047,7 +1061,6 @@
|
|||||||
- **Voot**
|
- **Voot**
|
||||||
- **VoxMedia**
|
- **VoxMedia**
|
||||||
- **VoxMediaVolume**
|
- **VoxMediaVolume**
|
||||||
- **Vporn**
|
|
||||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
- **Vrak**
|
- **Vrak**
|
||||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||||
@@ -1061,6 +1074,7 @@
|
|||||||
- **VVVVID**
|
- **VVVVID**
|
||||||
- **VyboryMos**
|
- **VyboryMos**
|
||||||
- **Vzaar**
|
- **Vzaar**
|
||||||
|
- **Wakanim**
|
||||||
- **Walla**
|
- **Walla**
|
||||||
- **WalyTV**
|
- **WalyTV**
|
||||||
- **washingtonpost**
|
- **washingtonpost**
|
||||||
|
@@ -153,15 +153,27 @@ def expect_value(self, got, expected, field):
|
|||||||
isinstance(got, compat_str),
|
isinstance(got, compat_str),
|
||||||
'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
|
'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
|
||||||
got = 'md5:' + md5(got)
|
got = 'md5:' + md5(got)
|
||||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
isinstance(got, (list, dict)),
|
isinstance(got, (list, dict)),
|
||||||
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||||
field, type(got).__name__))
|
field, type(got).__name__))
|
||||||
expected_num = int(expected.partition(':')[2])
|
op, _, expected_num = expected.partition(':')
|
||||||
assertGreaterEqual(
|
expected_num = int(expected_num)
|
||||||
|
if op == 'mincount':
|
||||||
|
assert_func = assertGreaterEqual
|
||||||
|
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
||||||
|
elif op == 'maxcount':
|
||||||
|
assert_func = assertLessEqual
|
||||||
|
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
||||||
|
elif op == 'count':
|
||||||
|
assert_func = assertEqual
|
||||||
|
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
assert_func(
|
||||||
self, len(got), expected_num,
|
self, len(got), expected_num,
|
||||||
'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got)))
|
msg_tmpl % (expected_num, field, len(got)))
|
||||||
return
|
return
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
expected, got,
|
expected, got,
|
||||||
@@ -237,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None):
|
|||||||
self.assertTrue(got >= expected, msg)
|
self.assertTrue(got >= expected, msg)
|
||||||
|
|
||||||
|
|
||||||
|
def assertLessEqual(self, got, expected, msg=None):
|
||||||
|
if not (got <= expected):
|
||||||
|
if msg is None:
|
||||||
|
msg = '%r not less than or equal to %r' % (got, expected)
|
||||||
|
self.assertTrue(got <= expected, msg)
|
||||||
|
|
||||||
|
|
||||||
|
def assertEqual(self, got, expected, msg=None):
|
||||||
|
if not (got == expected):
|
||||||
|
if msg is None:
|
||||||
|
msg = '%r not equal to %r' % (got, expected)
|
||||||
|
self.assertTrue(got == expected, msg)
|
||||||
|
|
||||||
|
|
||||||
def expect_warnings(ydl, warnings_re):
|
def expect_warnings(ydl, warnings_re):
|
||||||
real_warning = ydl.report_warning
|
real_warning = ydl.report_warning
|
||||||
|
|
||||||
|
@@ -61,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
<meta content='Foo' property=og:foobar>
|
<meta content='Foo' property=og:foobar>
|
||||||
<meta name="og:test1" content='foo > < bar'/>
|
<meta name="og:test1" content='foo > < bar'/>
|
||||||
<meta name="og:test2" content="foo >//< bar"/>
|
<meta name="og:test2" content="foo >//< bar"/>
|
||||||
|
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
||||||
'''
|
'''
|
||||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||||
@@ -69,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||||
|
self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
|
||||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||||
@@ -497,7 +499,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
'width': 1280,
|
'width': 1280,
|
||||||
'height': 720,
|
'height': 720,
|
||||||
}]
|
}]
|
||||||
)
|
),
|
||||||
|
(
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/18923
|
||||||
|
# https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
|
||||||
|
'ted_18923',
|
||||||
|
'http://hls.ted.com/talks/31241.m3u8',
|
||||||
|
[{
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '600k-Audio',
|
||||||
|
'vcodec': 'none',
|
||||||
|
}, {
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '68',
|
||||||
|
'vcodec': 'none',
|
||||||
|
}, {
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '163',
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': 320,
|
||||||
|
'height': 180,
|
||||||
|
}, {
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '481',
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': 512,
|
||||||
|
'height': 288,
|
||||||
|
}, {
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '769',
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': 512,
|
||||||
|
'height': 288,
|
||||||
|
}, {
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '984',
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': 512,
|
||||||
|
'height': 288,
|
||||||
|
}, {
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '1255',
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': 640,
|
||||||
|
'height': 360,
|
||||||
|
}, {
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '1693',
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': 853,
|
||||||
|
'height': 480,
|
||||||
|
}, {
|
||||||
|
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||||
|
'format_id': '2462',
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': 1280,
|
||||||
|
'height': 720,
|
||||||
|
}]
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
||||||
|
@@ -239,6 +239,76 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
|
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
|
||||||
|
|
||||||
|
def test_format_selection_string_ops(self):
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
|
||||||
|
{'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
|
||||||
|
]
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
|
# equals (=)
|
||||||
|
ydl = YDL({'format': '[format_id=abc-cba]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||||
|
|
||||||
|
# does not equal (!=)
|
||||||
|
ydl = YDL({'format': '[format_id!=abc-cba]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||||
|
|
||||||
|
ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
|
||||||
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
|
# starts with (^=)
|
||||||
|
ydl = YDL({'format': '[format_id^=abc]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||||
|
|
||||||
|
# does not start with (!^=)
|
||||||
|
ydl = YDL({'format': '[format_id!^=abc]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||||
|
|
||||||
|
ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
|
||||||
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
|
# ends with ($=)
|
||||||
|
ydl = YDL({'format': '[format_id$=cba]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||||
|
|
||||||
|
# does not end with (!$=)
|
||||||
|
ydl = YDL({'format': '[format_id!$=cba]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||||
|
|
||||||
|
ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
|
||||||
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
|
# contains (*=)
|
||||||
|
ydl = YDL({'format': '[format_id*=bc-cb]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||||
|
|
||||||
|
# does not contain (!*=)
|
||||||
|
ydl = YDL({'format': '[format_id!*=bc-cb]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||||
|
|
||||||
|
ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
|
||||||
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
|
ydl = YDL({'format': '[format_id!*=-]'})
|
||||||
|
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||||
|
|
||||||
def test_youtube_format_selection(self):
|
def test_youtube_format_selection(self):
|
||||||
order = [
|
order = [
|
||||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
||||||
|
@@ -29,6 +29,16 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
|||||||
tf.close()
|
tf.close()
|
||||||
os.remove(tf.name)
|
os.remove(tf.name)
|
||||||
|
|
||||||
|
def test_strip_httponly_prefix(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
|
def assert_cookie_has_value(key):
|
||||||
|
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
|
||||||
|
|
||||||
|
assert_cookie_has_value('HTTPONLY_COOKIE')
|
||||||
|
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_setenv,
|
compat_setenv,
|
||||||
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
@@ -90,6 +91,12 @@ class TestCompat(unittest.TestCase):
|
|||||||
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
|
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
|
||||||
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
|
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
|
||||||
|
|
||||||
|
def test_compat_etree_Element(self):
|
||||||
|
try:
|
||||||
|
compat_etree_Element.items
|
||||||
|
except AttributeError:
|
||||||
|
self.fail('compat_etree_Element is not a type')
|
||||||
|
|
||||||
def test_compat_etree_fromstring(self):
|
def test_compat_etree_fromstring(self):
|
||||||
xml = '''
|
xml = '''
|
||||||
<root foo="bar" spam="中文">
|
<root foo="bar" spam="中文">
|
||||||
|
@@ -507,6 +507,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(urljoin('http://foo.de/', ''), None)
|
self.assertEqual(urljoin('http://foo.de/', ''), None)
|
||||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||||
|
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
|
||||||
|
self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
|
||||||
|
|
||||||
def test_url_or_none(self):
|
def test_url_or_none(self):
|
||||||
self.assertEqual(url_or_none(None), None)
|
self.assertEqual(url_or_none(None), None)
|
||||||
|
6
test/testdata/cookies/httponly_cookies.txt
vendored
Normal file
6
test/testdata/cookies/httponly_cookies.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# Netscape HTTP Cookie File
|
||||||
|
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||||
|
# This is a generated file! Do not edit.
|
||||||
|
|
||||||
|
#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE
|
||||||
|
www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE
|
2
test/testdata/cookies/session_cookies.txt
vendored
2
test/testdata/cookies/session_cookies.txt
vendored
@@ -2,5 +2,5 @@
|
|||||||
# http://curl.haxx.se/rfc/cookie_spec.html
|
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||||
# This is a generated file! Do not edit.
|
# This is a generated file! Do not edit.
|
||||||
|
|
||||||
|
www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
|
||||||
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
|
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
|
||||||
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
|
|
||||||
|
28
test/testdata/m3u8/ted_18923.m3u8
vendored
Normal file
28
test/testdata/m3u8/ted_18923.m3u8
vendored
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
#EXTM3U
|
||||||
|
#EXT-X-VERSION:4
|
||||||
|
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
|
||||||
|
/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||||
|
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
|
||||||
|
/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||||
|
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||||
|
/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||||
|
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||||
|
/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||||
|
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||||
|
/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||||
|
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
|
||||||
|
/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||||
|
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
|
||||||
|
/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||||
|
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
|
||||||
|
/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||||
|
|
||||||
|
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||||
|
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||||
|
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||||
|
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||||
|
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||||
|
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||||
|
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||||
|
|
||||||
|
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400
|
@@ -82,6 +82,7 @@ from .utils import (
|
|||||||
sanitize_url,
|
sanitize_url,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
std_headers,
|
std_headers,
|
||||||
|
str_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
url_basename,
|
url_basename,
|
||||||
@@ -1063,21 +1064,24 @@ class YoutubeDL(object):
|
|||||||
if not m:
|
if not m:
|
||||||
STR_OPERATORS = {
|
STR_OPERATORS = {
|
||||||
'=': operator.eq,
|
'=': operator.eq,
|
||||||
'!=': operator.ne,
|
|
||||||
'^=': lambda attr, value: attr.startswith(value),
|
'^=': lambda attr, value: attr.startswith(value),
|
||||||
'$=': lambda attr, value: attr.endswith(value),
|
'$=': lambda attr, value: attr.endswith(value),
|
||||||
'*=': lambda attr, value: value in attr,
|
'*=': lambda attr, value: value in attr,
|
||||||
}
|
}
|
||||||
str_operator_rex = re.compile(r'''(?x)
|
str_operator_rex = re.compile(r'''(?x)
|
||||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||||
\s*$
|
\s*$
|
||||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||||
m = str_operator_rex.search(filter_spec)
|
m = str_operator_rex.search(filter_spec)
|
||||||
if m:
|
if m:
|
||||||
comparison_value = m.group('value')
|
comparison_value = m.group('value')
|
||||||
op = STR_OPERATORS[m.group('op')]
|
str_op = STR_OPERATORS[m.group('op')]
|
||||||
|
if m.group('negation'):
|
||||||
|
op = lambda attr, value: not str_op(attr, value)
|
||||||
|
else:
|
||||||
|
op = str_op
|
||||||
|
|
||||||
if not m:
|
if not m:
|
||||||
raise ValueError('Invalid filter specification %r' % filter_spec)
|
raise ValueError('Invalid filter specification %r' % filter_spec)
|
||||||
@@ -2057,15 +2061,24 @@ class YoutubeDL(object):
|
|||||||
self.report_warning('Unable to remove downloaded original file')
|
self.report_warning('Unable to remove downloaded original file')
|
||||||
|
|
||||||
def _make_archive_id(self, info_dict):
|
def _make_archive_id(self, info_dict):
|
||||||
|
video_id = info_dict.get('id')
|
||||||
|
if not video_id:
|
||||||
|
return
|
||||||
# Future-proof against any change in case
|
# Future-proof against any change in case
|
||||||
# and backwards compatibility with prior versions
|
# and backwards compatibility with prior versions
|
||||||
extractor = info_dict.get('extractor_key')
|
extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
|
||||||
if extractor is None:
|
if extractor is None:
|
||||||
if 'id' in info_dict:
|
url = str_or_none(info_dict.get('url'))
|
||||||
extractor = info_dict.get('ie_key') # key in a playlist
|
if not url:
|
||||||
if extractor is None:
|
return
|
||||||
return None # Incomplete video information
|
# Try to find matching extractor for the URL and take its ie_key
|
||||||
return extractor.lower() + ' ' + info_dict['id']
|
for ie in self._ies:
|
||||||
|
if ie.suitable(url):
|
||||||
|
extractor = ie.ie_key()
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
return extractor.lower() + ' ' + video_id
|
||||||
|
|
||||||
def in_download_archive(self, info_dict):
|
def in_download_archive(self, info_dict):
|
||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
@@ -2073,7 +2086,7 @@ class YoutubeDL(object):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
vid_id = self._make_archive_id(info_dict)
|
vid_id = self._make_archive_id(info_dict)
|
||||||
if vid_id is None:
|
if not vid_id:
|
||||||
return False # Incomplete video information
|
return False # Incomplete video information
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@@ -2508,6 +2508,15 @@ class _TreeBuilder(etree.TreeBuilder):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
||||||
|
# the following will crash with:
|
||||||
|
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
||||||
|
isinstance(None, xml.etree.ElementTree.Element)
|
||||||
|
from xml.etree.ElementTree import Element as compat_etree_Element
|
||||||
|
except TypeError: # Python <=2.6
|
||||||
|
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
||||||
|
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
def compat_etree_fromstring(text):
|
def compat_etree_fromstring(text):
|
||||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||||
@@ -2969,6 +2978,7 @@ __all__ = [
|
|||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
'compat_cookies',
|
'compat_cookies',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
|
'compat_etree_Element',
|
||||||
'compat_etree_fromstring',
|
'compat_etree_fromstring',
|
||||||
'compat_etree_register_namespace',
|
'compat_etree_register_namespace',
|
||||||
'compat_expanduser',
|
'compat_expanduser',
|
||||||
|
@@ -121,7 +121,11 @@ class CurlFD(ExternalFD):
|
|||||||
cmd += self._valueless_option('--silent', 'noprogress')
|
cmd += self._valueless_option('--silent', 'noprogress')
|
||||||
cmd += self._valueless_option('--verbose', 'verbose')
|
cmd += self._valueless_option('--verbose', 'verbose')
|
||||||
cmd += self._option('--limit-rate', 'ratelimit')
|
cmd += self._option('--limit-rate', 'ratelimit')
|
||||||
cmd += self._option('--retry', 'retries')
|
retry = self._option('--retry', 'retries')
|
||||||
|
if len(retry) == 2:
|
||||||
|
if retry[1] in ('inf', 'infinite'):
|
||||||
|
retry[1] = '2147483647'
|
||||||
|
cmd += retry
|
||||||
cmd += self._option('--max-filesize', 'max_filesize')
|
cmd += self._option('--max-filesize', 'max_filesize')
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--proxy', 'proxy')
|
cmd += self._option('--proxy', 'proxy')
|
||||||
@@ -160,6 +164,12 @@ class WgetFD(ExternalFD):
|
|||||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._option('--limit-rate', 'ratelimit')
|
||||||
|
retry = self._option('--tries', 'retries')
|
||||||
|
if len(retry) == 2:
|
||||||
|
if retry[1] in ('inf', 'infinite'):
|
||||||
|
retry[1] = '0'
|
||||||
|
cmd += retry
|
||||||
cmd += self._option('--bind-address', 'source_address')
|
cmd += self._option('--bind-address', 'source_address')
|
||||||
cmd += self._option('--proxy', 'proxy')
|
cmd += self._option('--proxy', 'proxy')
|
||||||
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||||
|
@@ -75,10 +75,14 @@ class HlsFD(FragmentFD):
|
|||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
return fd.real_download(filename, info_dict)
|
return fd.real_download(filename, info_dict)
|
||||||
|
|
||||||
def is_ad_fragment(s):
|
def is_ad_fragment_start(s):
|
||||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
|
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
|
||||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||||
|
|
||||||
|
def is_ad_fragment_end(s):
|
||||||
|
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or
|
||||||
|
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||||
|
|
||||||
media_frags = 0
|
media_frags = 0
|
||||||
ad_frags = 0
|
ad_frags = 0
|
||||||
ad_frag_next = False
|
ad_frag_next = False
|
||||||
@@ -87,12 +91,13 @@ class HlsFD(FragmentFD):
|
|||||||
if not line:
|
if not line:
|
||||||
continue
|
continue
|
||||||
if line.startswith('#'):
|
if line.startswith('#'):
|
||||||
if is_ad_fragment(line):
|
if is_ad_fragment_start(line):
|
||||||
ad_frags += 1
|
|
||||||
ad_frag_next = True
|
ad_frag_next = True
|
||||||
|
elif is_ad_fragment_end(line):
|
||||||
|
ad_frag_next = False
|
||||||
continue
|
continue
|
||||||
if ad_frag_next:
|
if ad_frag_next:
|
||||||
ad_frag_next = False
|
ad_frags += 1
|
||||||
continue
|
continue
|
||||||
media_frags += 1
|
media_frags += 1
|
||||||
|
|
||||||
@@ -123,7 +128,6 @@ class HlsFD(FragmentFD):
|
|||||||
if line:
|
if line:
|
||||||
if not line.startswith('#'):
|
if not line.startswith('#'):
|
||||||
if ad_frag_next:
|
if ad_frag_next:
|
||||||
ad_frag_next = False
|
|
||||||
continue
|
continue
|
||||||
frag_index += 1
|
frag_index += 1
|
||||||
if frag_index <= ctx['fragment_index']:
|
if frag_index <= ctx['fragment_index']:
|
||||||
@@ -196,8 +200,10 @@ class HlsFD(FragmentFD):
|
|||||||
'start': sub_range_start,
|
'start': sub_range_start,
|
||||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||||
}
|
}
|
||||||
elif is_ad_fragment(line):
|
elif is_ad_fragment_start(line):
|
||||||
ad_frag_next = True
|
ad_frag_next = True
|
||||||
|
elif is_ad_fragment_end(line):
|
||||||
|
ad_frag_next = False
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
||||||
|
@@ -17,25 +17,15 @@ from ..utils import (
|
|||||||
|
|
||||||
class ACastIE(InfoExtractor):
|
class ACastIE(InfoExtractor):
|
||||||
IE_NAME = 'acast'
|
IE_NAME = 'acast'
|
||||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:(?:embed|www)\.)?acast\.com/|
|
||||||
|
play\.acast\.com/s/
|
||||||
|
)
|
||||||
|
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# test with one bling
|
|
||||||
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
|
|
||||||
'md5': 'ada3de5a1e3a2a381327d749854788bb',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
|
||||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
|
||||||
'timestamp': 1196172000,
|
|
||||||
'upload_date': '20071127',
|
|
||||||
'duration': 211,
|
|
||||||
'creator': 'Concierge',
|
|
||||||
'series': 'Condé Nast Traveler Podcast',
|
|
||||||
'episode': '"Where Are You?": Taipei 101, Taiwan',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# test with multiple blings
|
|
||||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -50,6 +40,12 @@ class ACastIE(InfoExtractor):
|
|||||||
'series': 'Spår',
|
'series': 'Spår',
|
||||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -83,17 +79,27 @@ class ACastIE(InfoExtractor):
|
|||||||
|
|
||||||
class ACastChannelIE(InfoExtractor):
|
class ACastChannelIE(InfoExtractor):
|
||||||
IE_NAME = 'acast:channel'
|
IE_NAME = 'acast:channel'
|
||||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
_VALID_URL = r'''(?x)
|
||||||
_TEST = {
|
https?://
|
||||||
'url': 'https://www.acast.com/condenasttraveler',
|
(?:
|
||||||
|
(?:www\.)?acast\.com/|
|
||||||
|
play\.acast\.com/s/
|
||||||
|
)
|
||||||
|
(?P<id>[^/#?]+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.acast.com/todayinfocus',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '50544219-29bb-499e-a083-6087f4cb7797',
|
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||||
'title': 'Condé Nast Traveler Podcast',
|
'title': 'Today in Focus',
|
||||||
'description': 'md5:98646dee22a5b386626ae31866638fbd',
|
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 35,
|
||||||
}
|
}, {
|
||||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_API_BASE_URL = 'https://play.acast.com/api/'
|
||||||
_PAGE_SIZE = 10
|
_PAGE_SIZE = 10
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -106,7 +112,7 @@ class ACastChannelIE(InfoExtractor):
|
|||||||
channel_slug, note='Download page %d of channel data' % page)
|
channel_slug, note='Download page %d of channel data' % page)
|
||||||
for cast in casts:
|
for cast in casts:
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
|
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
||||||
'ACast', cast['id'])
|
'ACast', cast['id'])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -8,20 +8,23 @@ from .generic import GenericIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
qualities,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
qualities,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_text,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
|
|
||||||
|
|
||||||
class ARDMediathekIE(InfoExtractor):
|
class ARDMediathekIE(InfoExtractor):
|
||||||
IE_NAME = 'ARD:mediathek'
|
IE_NAME = 'ARD:mediathek'
|
||||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 26.07.2022
|
# available till 26.07.2022
|
||||||
@@ -51,8 +54,15 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
# audio
|
# audio
|
||||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||||
|
|
||||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||||
media_info = self._download_json(
|
media_info = self._download_json(
|
||||||
media_info_url, video_id, 'Downloading media JSON')
|
media_info_url, video_id, 'Downloading media JSON')
|
||||||
@@ -293,7 +303,7 @@ class ARDIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class ARDBetaMediathekIE(InfoExtractor):
|
class ARDBetaMediathekIE(InfoExtractor):
|
||||||
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
|
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||||
@@ -307,12 +317,18 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||||||
'upload_date': '20180826',
|
'upload_date': '20180826',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
display_id = mobj.group('display_id')
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||||
@@ -323,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
}
|
}
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
|
geoblocked = False
|
||||||
for widget in data.values():
|
for widget in data.values():
|
||||||
if widget.get('_geoblocked'):
|
if widget.get('_geoblocked') is True:
|
||||||
raise ExtractorError('This video is not available due to geoblocking', expected=True)
|
geoblocked = True
|
||||||
|
|
||||||
if '_duration' in widget:
|
if '_duration' in widget:
|
||||||
res['duration'] = widget['_duration']
|
res['duration'] = int_or_none(widget['_duration'])
|
||||||
if 'clipTitle' in widget:
|
if 'clipTitle' in widget:
|
||||||
res['title'] = widget['clipTitle']
|
res['title'] = widget['clipTitle']
|
||||||
if '_previewImage' in widget:
|
if '_previewImage' in widget:
|
||||||
res['thumbnail'] = widget['_previewImage']
|
res['thumbnail'] = widget['_previewImage']
|
||||||
if 'broadcastedOn' in widget:
|
if 'broadcastedOn' in widget:
|
||||||
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
|
res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
|
||||||
if 'synopsis' in widget:
|
if 'synopsis' in widget:
|
||||||
res['description'] = widget['synopsis']
|
res['description'] = widget['synopsis']
|
||||||
if '_subtitleUrl' in widget:
|
subtitle_url = url_or_none(widget.get('_subtitleUrl'))
|
||||||
res['subtitles'] = {'de': [{
|
if subtitle_url:
|
||||||
|
subtitles.setdefault('de', []).append({
|
||||||
'ext': 'ttml',
|
'ext': 'ttml',
|
||||||
'url': widget['_subtitleUrl'],
|
'url': subtitle_url,
|
||||||
}]}
|
})
|
||||||
if '_quality' in widget:
|
if '_quality' in widget:
|
||||||
format_url = widget['_stream']['json'][0]
|
format_url = url_or_none(try_get(
|
||||||
|
widget, lambda x: x['_stream']['json'][0]))
|
||||||
if format_url.endswith('.f4m'):
|
if not format_url:
|
||||||
|
continue
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
format_url + '?hdcore=3.11.0',
|
format_url + '?hdcore=3.11.0',
|
||||||
video_id, f4m_id='hds', fatal=False))
|
video_id, f4m_id='hds', fatal=False))
|
||||||
elif format_url.endswith('m3u8'):
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
format_url, video_id, 'mp4', m3u8_id='hls',
|
||||||
|
fatal=False))
|
||||||
else:
|
else:
|
||||||
|
# HTTP formats are not available when geoblocked is True,
|
||||||
|
# other formats are fine though
|
||||||
|
if geoblocked:
|
||||||
|
continue
|
||||||
|
quality = str_or_none(widget.get('_quality'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'http-' + widget['_quality'],
|
'format_id': ('http-' + quality) if quality else 'http',
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'preference': 10, # Plain HTTP, that's nice
|
'preference': 10, # Plain HTTP, that's nice
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if not formats and geoblocked:
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
msg='This video is not available due to geoblocking',
|
||||||
|
countries=['DE'])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
res['formats'] = formats
|
res.update({
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
@@ -62,7 +62,7 @@ class AudiomackIE(InfoExtractor):
|
|||||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||||
# if so, pass the work off to the soundcloud extractor
|
# if so, pass the work off to the soundcloud extractor
|
||||||
if SoundcloudIE.suitable(api_response['url']):
|
if SoundcloudIE.suitable(api_response['url']):
|
||||||
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||||
|
@@ -1,8 +1,8 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -17,10 +17,12 @@ from ..utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_etree_Element,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@@ -310,7 +312,13 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
def _get_subtitles(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for connection in self._extract_connections(media):
|
for connection in self._extract_connections(media):
|
||||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
cc_url = url_or_none(connection.get('href'))
|
||||||
|
if not cc_url:
|
||||||
|
continue
|
||||||
|
captions = self._download_xml(
|
||||||
|
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||||
|
if not isinstance(captions, compat_etree_Element):
|
||||||
|
continue
|
||||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
subtitles[lang] = [
|
subtitles[lang] = [
|
||||||
{
|
{
|
||||||
@@ -795,6 +803,15 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'uploader': 'Radio 3',
|
'uploader': 'Radio 3',
|
||||||
'uploader_id': 'bbc_radio_three',
|
'uploader_id': 'bbc_radio_three',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p06w9tws',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||||
|
'description': 'Learn English words and phrases from this story',
|
||||||
|
},
|
||||||
|
'add_ie': [BBCCoUkIE.ie_key()],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -945,6 +962,15 @@ class BBCIE(BBCCoUkIE):
|
|||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
|
||||||
|
group_id = self._search_regex(
|
||||||
|
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||||
|
webpage, 'group id', default=None)
|
||||||
|
if playlist_id:
|
||||||
|
return self.url_result(
|
||||||
|
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||||
|
ie=BBCCoUkIE.ie_key())
|
||||||
|
|
||||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||||
programme_id = self._search_regex(
|
programme_id = self._search_regex(
|
||||||
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
||||||
|
@@ -1,15 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_chr,
|
|
||||||
compat_ord,
|
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
unified_timestamp,
|
||||||
urljoin,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -36,29 +31,9 @@ class BeegIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
cpl_url = self._search_regex(
|
beeg_version = self._search_regex(
|
||||||
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
|
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||||
webpage, 'cpl', default=None, group='url')
|
default='1546225636701')
|
||||||
|
|
||||||
cpl_url = urljoin(url, cpl_url)
|
|
||||||
|
|
||||||
beeg_version, beeg_salt = [None] * 2
|
|
||||||
|
|
||||||
if cpl_url:
|
|
||||||
cpl = self._download_webpage(
|
|
||||||
self._proto_relative_url(cpl_url), video_id,
|
|
||||||
'Downloading cpl JS', fatal=False)
|
|
||||||
if cpl:
|
|
||||||
beeg_version = int_or_none(self._search_regex(
|
|
||||||
r'beeg_version\s*=\s*([^\b]+)', cpl,
|
|
||||||
'beeg version', default=None)) or self._search_regex(
|
|
||||||
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
|
|
||||||
beeg_salt = self._search_regex(
|
|
||||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
|
||||||
default=None, group='beeg_salt')
|
|
||||||
|
|
||||||
beeg_version = beeg_version or '2185'
|
|
||||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
|
||||||
|
|
||||||
for api_path in ('', 'api.'):
|
for api_path in ('', 'api.'):
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
@@ -68,37 +43,6 @@ class BeegIE(InfoExtractor):
|
|||||||
if video:
|
if video:
|
||||||
break
|
break
|
||||||
|
|
||||||
def split(o, e):
|
|
||||||
def cut(s, x):
|
|
||||||
n.append(s[:x])
|
|
||||||
return s[x:]
|
|
||||||
n = []
|
|
||||||
r = len(o) % e
|
|
||||||
if r > 0:
|
|
||||||
o = cut(o, r)
|
|
||||||
while len(o) > e:
|
|
||||||
o = cut(o, e)
|
|
||||||
n.append(o)
|
|
||||||
return n
|
|
||||||
|
|
||||||
def decrypt_key(key):
|
|
||||||
# Reverse engineered from http://static.beeg.com/cpl/1738.js
|
|
||||||
a = beeg_salt
|
|
||||||
e = compat_urllib_parse_unquote(key)
|
|
||||||
o = ''.join([
|
|
||||||
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
|
|
||||||
for n in range(len(e))])
|
|
||||||
return ''.join(split(o, 3)[::-1])
|
|
||||||
|
|
||||||
def decrypt_url(encrypted_url):
|
|
||||||
encrypted_url = self._proto_relative_url(
|
|
||||||
encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
|
|
||||||
key = self._search_regex(
|
|
||||||
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
|
|
||||||
if not key:
|
|
||||||
return encrypted_url
|
|
||||||
return encrypted_url.replace(key, decrypt_key(key))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, video_url in video.items():
|
for format_id, video_url in video.items():
|
||||||
if not video_url:
|
if not video_url:
|
||||||
@@ -108,18 +52,20 @@ class BeegIE(InfoExtractor):
|
|||||||
if not height:
|
if not height:
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': decrypt_url(video_url),
|
'url': self._proto_relative_url(
|
||||||
|
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'height': int(height),
|
'height': int(height),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
video_id = video.get('id') or video_id
|
video_id = compat_str(video.get('id') or video_id)
|
||||||
display_id = video.get('code')
|
display_id = video.get('code')
|
||||||
description = video.get('desc')
|
description = video.get('desc')
|
||||||
|
series = video.get('ps_name')
|
||||||
|
|
||||||
timestamp = parse_iso8601(video.get('date'), ' ')
|
timestamp = unified_timestamp(video.get('date'))
|
||||||
duration = int_or_none(video.get('duration'))
|
duration = int_or_none(video.get('duration'))
|
||||||
|
|
||||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||||
@@ -129,6 +75,7 @@ class BeegIE(InfoExtractor):
|
|||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'series': series,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
|
@@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_APP_KEY = '84956560bc028eb7'
|
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||||
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
|
_BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
|
||||||
|
|
||||||
def _report_error(self, result):
|
def _report_error(self, result):
|
||||||
if 'message' in result:
|
if 'message' in result:
|
||||||
|
@@ -5,7 +5,10 @@ import itertools
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import urlencode_postdata
|
from ..utils import (
|
||||||
|
orderedSet,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BitChuteIE(InfoExtractor):
|
class BitChuteIE(InfoExtractor):
|
||||||
@@ -43,10 +46,16 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'description', webpage, 'title',
|
'description', webpage, 'title',
|
||||||
default=None) or self._og_search_description(webpage)
|
default=None) or self._og_search_description(webpage)
|
||||||
|
|
||||||
|
format_urls = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||||
|
format_urls.append(mobj.group('url'))
|
||||||
|
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{'url': mobj.group('url')}
|
{'url': format_url}
|
||||||
for mobj in re.finditer(
|
for format_url in orderedSet(format_urls)]
|
||||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
|
@@ -14,6 +14,7 @@ class CamModelsIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
|
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
'age_limit': 18
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -93,4 +94,5 @@ class CamModelsIE(InfoExtractor):
|
|||||||
'title': self._live_title(user_id),
|
'title': self._live_title(user_id),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'age_limit': 18
|
||||||
}
|
}
|
||||||
|
@@ -20,6 +20,7 @@ class CamTubeIE(InfoExtractor):
|
|||||||
'duration': 1274,
|
'duration': 1274,
|
||||||
'timestamp': 1528018608,
|
'timestamp': 1528018608,
|
||||||
'upload_date': '20180603',
|
'upload_date': '20180603',
|
||||||
|
'age_limit': 18
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -66,4 +67,5 @@ class CamTubeIE(InfoExtractor):
|
|||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'creator': creator,
|
'creator': creator,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'age_limit': 18
|
||||||
}
|
}
|
||||||
|
@@ -25,6 +25,7 @@ class CamWithHerIE(InfoExtractor):
|
|||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'uploader': 'MileenaK',
|
'uploader': 'MileenaK',
|
||||||
'upload_date': '20160322',
|
'upload_date': '20160322',
|
||||||
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -84,4 +85,5 @@ class CamWithHerIE(InfoExtractor):
|
|||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
|
'age_limit': 18
|
||||||
}
|
}
|
||||||
|
@@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
videomore_url = VideomoreIE._extract_url(webpage)
|
videomore_url = VideomoreIE._extract_url(webpage)
|
||||||
|
if not videomore_url:
|
||||||
|
videomore_id = self._search_regex(
|
||||||
|
r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
|
||||||
|
default=None)
|
||||||
|
if videomore_id:
|
||||||
|
videomore_url = 'videomore:%s' % videomore_id
|
||||||
if videomore_url:
|
if videomore_url:
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
return {
|
return {
|
||||||
|
@@ -1,20 +1,19 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .turner import TurnerBaseIE
|
from .turner import TurnerBaseIE
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class CartoonNetworkIE(TurnerBaseIE):
|
class CartoonNetworkIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Starfire the Cat Lady',
|
'title': 'How to Draw Upgrade',
|
||||||
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
|
||||||
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
|
||||||
return self._extract_cvp_info(
|
metadata_re = ''
|
||||||
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
if content_re:
|
||||||
'secure': {
|
metadata_re = r'|video_metadata\.content_' + content_re
|
||||||
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
return self._search_regex(
|
||||||
'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
|
r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
|
||||||
},
|
webpage, name, fatal=fatal)
|
||||||
}, {
|
|
||||||
|
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
|
||||||
|
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
|
||||||
|
|
||||||
|
info = self._extract_ngtv_info(
|
||||||
|
media_id, {'networkId': 'cartoonnetwork'}, {
|
||||||
'url': url,
|
'url': url,
|
||||||
'site_name': 'CartoonNetwork',
|
'site_name': 'CartoonNetwork',
|
||||||
'auth_required': self._search_regex(
|
'auth_required': find_field('authType', 'auth type') != 'unauth',
|
||||||
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
|
|
||||||
webpage, 'auth required', default='false') == 'true',
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
series = find_field(
|
||||||
|
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
|
||||||
|
info.update({
|
||||||
|
'id': media_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._html_search_meta('description', webpage),
|
||||||
|
'series': series,
|
||||||
|
'episode': title,
|
||||||
|
})
|
||||||
|
|
||||||
|
for field in ('season', 'episode'):
|
||||||
|
field_name = field + 'Number'
|
||||||
|
info[field + '_number'] = int_or_none(find_field(
|
||||||
|
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
|
||||||
|
|
||||||
|
return info
|
||||||
|
@@ -119,11 +119,7 @@ class CNNBlogsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, url_basename(url))
|
webpage = self._download_webpage(url, url_basename(url))
|
||||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||||
return {
|
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||||
'_type': 'url',
|
|
||||||
'url': cnn_url,
|
|
||||||
'ie_key': CNNIE.ie_key(),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class CNNArticleIE(InfoExtractor):
|
class CNNArticleIE(InfoExtractor):
|
||||||
@@ -145,8 +141,4 @@ class CNNArticleIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, url_basename(url))
|
webpage = self._download_webpage(url, url_basename(url))
|
||||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||||
return {
|
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||||
'_type': 'url',
|
|
||||||
'url': 'http://cnn.com/video/?/video/' + cnn_url,
|
|
||||||
'ie_key': CNNIE.ie_key(),
|
|
||||||
}
|
|
||||||
|
@@ -17,6 +17,7 @@ import math
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
compat_cookies,
|
compat_cookies,
|
||||||
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_integer_types,
|
compat_integer_types,
|
||||||
@@ -102,10 +103,23 @@ class InfoExtractor(object):
|
|||||||
from worst to best quality.
|
from worst to best quality.
|
||||||
|
|
||||||
Potential fields:
|
Potential fields:
|
||||||
* url Mandatory. The URL of the video file
|
* url The mandatory URL representing the media:
|
||||||
|
for plain file media - HTTP URL of this file,
|
||||||
|
for RTMP - RTMP URL,
|
||||||
|
for HLS - URL of the M3U8 media playlist,
|
||||||
|
for HDS - URL of the F4M manifest,
|
||||||
|
for DASH - URL of the MPD manifest or
|
||||||
|
base URL representing the media
|
||||||
|
if MPD manifest is parsed from
|
||||||
|
a string,
|
||||||
|
for MSS - URL of the ISM manifest.
|
||||||
* manifest_url
|
* manifest_url
|
||||||
The URL of the manifest file in case of
|
The URL of the manifest file in case of
|
||||||
fragmented media (DASH, hls, hds)
|
fragmented media:
|
||||||
|
for HLS - URL of the M3U8 master playlist,
|
||||||
|
for HDS - URL of the F4M manifest,
|
||||||
|
for DASH - URL of the MPD manifest,
|
||||||
|
for MSS - URL of the ISM manifest.
|
||||||
* ext Will be calculated from URL if missing
|
* ext Will be calculated from URL if missing
|
||||||
* format A human-readable description of the format
|
* format A human-readable description of the format
|
||||||
("mp4 container with h264/opus").
|
("mp4 container with h264/opus").
|
||||||
@@ -788,7 +802,7 @@ class InfoExtractor(object):
|
|||||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||||
expected_status=None):
|
expected_status=None):
|
||||||
"""
|
"""
|
||||||
Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
|
Return a tuple (xml as an compat_etree_Element, URL handle).
|
||||||
|
|
||||||
See _download_webpage docstring for arguments specification.
|
See _download_webpage docstring for arguments specification.
|
||||||
"""
|
"""
|
||||||
@@ -809,7 +823,7 @@ class InfoExtractor(object):
|
|||||||
transform_source=None, fatal=True, encoding=None,
|
transform_source=None, fatal=True, encoding=None,
|
||||||
data=None, headers={}, query={}, expected_status=None):
|
data=None, headers={}, query={}, expected_status=None):
|
||||||
"""
|
"""
|
||||||
Return the xml as an xml.etree.ElementTree.Element.
|
Return the xml as an compat_etree_Element.
|
||||||
|
|
||||||
See _download_webpage docstring for arguments specification.
|
See _download_webpage docstring for arguments specification.
|
||||||
"""
|
"""
|
||||||
@@ -1058,7 +1072,7 @@ class InfoExtractor(object):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||||
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
||||||
% {'prop': re.escape(prop)})
|
% {'prop': re.escape(prop)})
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
return [
|
return [
|
||||||
@@ -1239,17 +1253,30 @@ class InfoExtractor(object):
|
|||||||
if expected_type is not None and expected_type != item_type:
|
if expected_type is not None and expected_type != item_type:
|
||||||
return info
|
return info
|
||||||
if item_type in ('TVEpisode', 'Episode'):
|
if item_type in ('TVEpisode', 'Episode'):
|
||||||
|
episode_name = unescapeHTML(e.get('name'))
|
||||||
info.update({
|
info.update({
|
||||||
'episode': unescapeHTML(e.get('name')),
|
'episode': episode_name,
|
||||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
})
|
})
|
||||||
|
if not info.get('title') and episode_name:
|
||||||
|
info['title'] = episode_name
|
||||||
part_of_season = e.get('partOfSeason')
|
part_of_season = e.get('partOfSeason')
|
||||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
|
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
|
||||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
info.update({
|
||||||
|
'season': unescapeHTML(part_of_season.get('name')),
|
||||||
|
'season_number': int_or_none(part_of_season.get('seasonNumber')),
|
||||||
|
})
|
||||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||||
|
elif item_type == 'Movie':
|
||||||
|
info.update({
|
||||||
|
'title': unescapeHTML(e.get('name')),
|
||||||
|
'description': unescapeHTML(e.get('description')),
|
||||||
|
'duration': parse_duration(e.get('duration')),
|
||||||
|
'timestamp': unified_timestamp(e.get('dateCreated')),
|
||||||
|
})
|
||||||
elif item_type in ('Article', 'NewsArticle'):
|
elif item_type in ('Article', 'NewsArticle'):
|
||||||
info.update({
|
info.update({
|
||||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||||
@@ -1441,6 +1468,9 @@ class InfoExtractor(object):
|
|||||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||||
fatal=True, m3u8_id=None):
|
fatal=True, m3u8_id=None):
|
||||||
|
if not isinstance(manifest, compat_etree_Element) and not fatal:
|
||||||
|
return []
|
||||||
|
|
||||||
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
||||||
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
||||||
if akamai_pv is not None and ';' in akamai_pv.text:
|
if akamai_pv is not None and ';' in akamai_pv.text:
|
||||||
@@ -1586,6 +1616,7 @@ class InfoExtractor(object):
|
|||||||
# References:
|
# References:
|
||||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||||
# 2. https://github.com/rg3/youtube-dl/issues/12211
|
# 2. https://github.com/rg3/youtube-dl/issues/12211
|
||||||
|
# 3. https://github.com/rg3/youtube-dl/issues/18923
|
||||||
|
|
||||||
# We should try extracting formats only from master playlists [1, 4.3.4],
|
# We should try extracting formats only from master playlists [1, 4.3.4],
|
||||||
# i.e. playlists that describe available qualities. On the other hand
|
# i.e. playlists that describe available qualities. On the other hand
|
||||||
@@ -1657,11 +1688,16 @@ class InfoExtractor(object):
|
|||||||
rendition = stream_group[0]
|
rendition = stream_group[0]
|
||||||
return rendition.get('NAME') or stream_group_id
|
return rendition.get('NAME') or stream_group_id
|
||||||
|
|
||||||
|
# parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
|
||||||
|
# chance to detect video only formats when EXT-X-STREAM-INF tags
|
||||||
|
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
|
||||||
|
for line in m3u8_doc.splitlines():
|
||||||
|
if line.startswith('#EXT-X-MEDIA:'):
|
||||||
|
extract_media(line)
|
||||||
|
|
||||||
for line in m3u8_doc.splitlines():
|
for line in m3u8_doc.splitlines():
|
||||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||||
last_stream_inf = parse_m3u8_attributes(line)
|
last_stream_inf = parse_m3u8_attributes(line)
|
||||||
elif line.startswith('#EXT-X-MEDIA:'):
|
|
||||||
extract_media(line)
|
|
||||||
elif line.startswith('#') or not line.strip():
|
elif line.startswith('#') or not line.strip():
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@@ -2101,7 +2137,8 @@ class InfoExtractor(object):
|
|||||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||||
f = {
|
f = {
|
||||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||||
'url': base_url,
|
# NB: mpd_url may be empty when MPD manifest is parsed from a string
|
||||||
|
'url': mpd_url or base_url,
|
||||||
'manifest_url': mpd_url,
|
'manifest_url': mpd_url,
|
||||||
'ext': mimetype2ext(mime_type),
|
'ext': mimetype2ext(mime_type),
|
||||||
'width': int_or_none(representation_attrib.get('width')),
|
'width': int_or_none(representation_attrib.get('width')),
|
||||||
@@ -2614,7 +2651,7 @@ class InfoExtractor(object):
|
|||||||
'id': this_video_id,
|
'id': this_video_id,
|
||||||
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
||||||
'description': video_data.get('description'),
|
'description': video_data.get('description'),
|
||||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
||||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
@@ -2641,12 +2678,9 @@ class InfoExtractor(object):
|
|||||||
for source in jwplayer_sources_data:
|
for source in jwplayer_sources_data:
|
||||||
if not isinstance(source, dict):
|
if not isinstance(source, dict):
|
||||||
continue
|
continue
|
||||||
source_url = self._proto_relative_url(source.get('file'))
|
source_url = urljoin(
|
||||||
if not source_url:
|
base_url, self._proto_relative_url(source.get('file')))
|
||||||
continue
|
if not source_url or source_url in urls:
|
||||||
if base_url:
|
|
||||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
|
||||||
if source_url in urls:
|
|
||||||
continue
|
continue
|
||||||
urls.append(source_url)
|
urls.append(source_url)
|
||||||
source_type = source.get('type') or ''
|
source_type = source.get('type') or ''
|
||||||
|
@@ -1,7 +1,10 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals, division
|
from __future__ import unicode_literals, division
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
@@ -48,6 +51,21 @@ class CrackleIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_MEDIA_FILE_SLOTS = {
|
||||||
|
'360p.mp4': {
|
||||||
|
'width': 640,
|
||||||
|
'height': 360,
|
||||||
|
},
|
||||||
|
'480p.mp4': {
|
||||||
|
'width': 768,
|
||||||
|
'height': 432,
|
||||||
|
},
|
||||||
|
'480p_1mbps.mp4': {
|
||||||
|
'width': 852,
|
||||||
|
'height': 480,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
@@ -59,13 +77,16 @@ class CrackleIE(InfoExtractor):
|
|||||||
|
|
||||||
for country in countries:
|
for country in countries:
|
||||||
try:
|
try:
|
||||||
|
# Authorization generation algorithm is reverse engineered from:
|
||||||
|
# https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
|
||||||
|
media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
|
||||||
|
timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
|
||||||
|
h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
|
||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
|
media_detail_url, video_id, 'Downloading media JSON as %s' % country,
|
||||||
% (video_id, country), video_id,
|
'Unable to download media JSON', headers={
|
||||||
'Downloading media JSON as %s' % country,
|
'Accept': 'application/json',
|
||||||
'Unable to download media JSON', query={
|
'Authorization': '|'.join([h, timestamp, '117', '1']),
|
||||||
'disableProtocols': 'true',
|
|
||||||
'format': 'json'
|
|
||||||
})
|
})
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
# 401 means geo restriction, trying next country
|
# 401 means geo restriction, trying next country
|
||||||
@@ -95,6 +116,20 @@ class CrackleIE(InfoExtractor):
|
|||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
format_url, video_id, mpd_id='dash', fatal=False))
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif format_url.endswith('.ism/Manifest'):
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
format_url, video_id, ism_id='mss', fatal=False))
|
||||||
|
else:
|
||||||
|
mfs_path = e.get('Type')
|
||||||
|
mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
|
||||||
|
if not mfs_info:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||||
|
'width': mfs_info['width'],
|
||||||
|
'height': mfs_info['height'],
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
description = media.get('Description')
|
description = media.get('Description')
|
||||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree as etree
|
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
@@ -12,6 +11,7 @@ from .common import InfoExtractor
|
|||||||
from .vrv import VRVIE
|
from .vrv import VRVIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@@ -56,22 +56,11 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
self._download_webpage(
|
|
||||||
'https://www.crunchyroll.com/?a=formhandler',
|
|
||||||
None, 'Logging in', 'Wrong login info',
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'formname': 'RpcApiUser_Login',
|
|
||||||
'next_url': 'https://www.crunchyroll.com/acct/membership',
|
|
||||||
'name': username,
|
|
||||||
'password': password,
|
|
||||||
}))
|
|
||||||
|
|
||||||
'''
|
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
self._LOGIN_URL, None, 'Downloading login page')
|
self._LOGIN_URL, None, 'Downloading login page')
|
||||||
|
|
||||||
def is_logged(webpage):
|
def is_logged(webpage):
|
||||||
return '<title>Redirecting' in webpage
|
return 'href="/logout"' in webpage
|
||||||
|
|
||||||
# Already logged in
|
# Already logged in
|
||||||
if is_logged(login_page):
|
if is_logged(login_page):
|
||||||
@@ -110,7 +99,6 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
|
||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
'''
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
@@ -144,7 +132,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||||
IE_NAME = 'crunchyroll'
|
IE_NAME = 'crunchyroll'
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -269,6 +257,9 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.crunchyroll.com/media-723735',
|
'url': 'http://www.crunchyroll.com/media-723735',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMAT_IDS = {
|
_FORMAT_IDS = {
|
||||||
@@ -399,7 +390,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'Downloading subtitles for ' + sub_name, data={
|
'Downloading subtitles for ' + sub_name, data={
|
||||||
'subtitle_script_id': sub_id,
|
'subtitle_script_id': sub_id,
|
||||||
})
|
})
|
||||||
if not isinstance(sub_doc, etree.Element):
|
if not isinstance(sub_doc, compat_etree_Element):
|
||||||
continue
|
continue
|
||||||
sid = sub_doc.get('id')
|
sid = sub_doc.get('id')
|
||||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||||
@@ -516,7 +507,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'video_quality': stream_quality,
|
'video_quality': stream_quality,
|
||||||
'current_page': url,
|
'current_page': url,
|
||||||
})
|
})
|
||||||
if isinstance(streamdata, etree.Element):
|
if isinstance(streamdata, compat_etree_Element):
|
||||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||||
if stream_info is not None:
|
if stream_info is not None:
|
||||||
stream_infos.append(stream_info)
|
stream_infos.append(stream_info)
|
||||||
@@ -527,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'video_format': stream_format,
|
'video_format': stream_format,
|
||||||
'video_encode_quality': stream_quality,
|
'video_encode_quality': stream_quality,
|
||||||
})
|
})
|
||||||
if isinstance(stream_info, etree.Element):
|
if isinstance(stream_info, compat_etree_Element):
|
||||||
stream_infos.append(stream_info)
|
stream_infos.append(stream_info)
|
||||||
for stream_info in stream_infos:
|
for stream_info in stream_infos:
|
||||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||||
@@ -602,7 +593,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
|
|
||||||
season = episode = episode_number = duration = thumbnail = None
|
season = episode = episode_number = duration = thumbnail = None
|
||||||
|
|
||||||
if isinstance(metadata, etree.Element):
|
if isinstance(metadata, compat_etree_Element):
|
||||||
season = xpath_text(metadata, 'series_title')
|
season = xpath_text(metadata, 'series_title')
|
||||||
episode = xpath_text(metadata, 'episode_title')
|
episode = xpath_text(metadata, 'episode_title')
|
||||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||||
|
@@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
|||||||
self._handle_errors(result)
|
self._handle_errors(result)
|
||||||
self._auth_token = result['message']['auth_token']
|
self._auth_token = result['message']['auth_token']
|
||||||
|
|
||||||
def _extract_media_info(self, media):
|
|
||||||
video_id = compat_str(media['id'])
|
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||||
|
IE_NAME = 'curiositystream'
|
||||||
|
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://app.curiositystream.com/video/2',
|
||||||
|
'md5': '262bb2f257ff301115f1973540de8983',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How Did You Develop The Internet?',
|
||||||
|
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
media = self._call_api('media/' + video_id, video_id)
|
||||||
title = media['title']
|
title = media['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
@@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
|
||||||
IE_NAME = 'curiositystream'
|
|
||||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://app.curiositystream.com/video/2',
|
|
||||||
'md5': '262bb2f257ff301115f1973540de8983',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'How Did You Develop The Internet?',
|
|
||||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
media = self._call_api('media/' + video_id, video_id)
|
|
||||||
return self._extract_media_info(media)
|
|
||||||
|
|
||||||
|
|
||||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||||
IE_NAME = 'curiositystream:collection'
|
IE_NAME = 'curiositystream:collection'
|
||||||
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://app.curiositystream.com/collection/2',
|
'url': 'https://app.curiositystream.com/collection/2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2',
|
'id': '2',
|
||||||
'title': 'Curious Minds: The Internet',
|
'title': 'Curious Minds: The Internet',
|
||||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 12,
|
'playlist_mincount': 17,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://curiositystream.com/series/2',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
collection_id = self._match_id(url)
|
collection_id = self._match_id(url)
|
||||||
@@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
|||||||
'collections/' + collection_id, collection_id)
|
'collections/' + collection_id, collection_id)
|
||||||
entries = []
|
entries = []
|
||||||
for media in collection.get('media', []):
|
for media in collection.get('media', []):
|
||||||
entries.append(self._extract_media_info(media))
|
media_id = compat_str(media.get('id'))
|
||||||
|
entries.append(self.url_result(
|
||||||
|
'https://curiositystream.com/video/' + media_id,
|
||||||
|
CuriosityStreamIE.ie_key(), media_id))
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, collection_id,
|
entries, collection_id,
|
||||||
collection.get('title'), collection.get('description'))
|
collection.get('title'), collection.get('description'))
|
||||||
|
@@ -17,16 +17,29 @@ from ..compat import compat_HTTPError
|
|||||||
|
|
||||||
|
|
||||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
|
_VALID_URL = r'''(?x)https?://
|
||||||
discovery|
|
(?P<site>
|
||||||
investigationdiscovery|
|
(?:www\.)?
|
||||||
discoverylife|
|
(?:
|
||||||
animalplanet|
|
discovery|
|
||||||
ahctv|
|
investigationdiscovery|
|
||||||
destinationamerica|
|
discoverylife|
|
||||||
sciencechannel|
|
animalplanet|
|
||||||
tlc|
|
ahctv|
|
||||||
velocity
|
destinationamerica|
|
||||||
|
sciencechannel|
|
||||||
|
tlc|
|
||||||
|
velocity
|
||||||
|
)|
|
||||||
|
watch\.
|
||||||
|
(?:
|
||||||
|
hgtv|
|
||||||
|
foodnetwork|
|
||||||
|
travelchannel|
|
||||||
|
diynetwork|
|
||||||
|
cookingchanneltv|
|
||||||
|
motortrend
|
||||||
|
)
|
||||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
||||||
@@ -71,7 +84,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||||||
|
|
||||||
if not access_token:
|
if not access_token:
|
||||||
access_token = self._download_json(
|
access_token = self._download_json(
|
||||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
'https://%s.com/anonymous' % site, display_id, query={
|
||||||
'authRel': 'authorization',
|
'authRel': 'authorization',
|
||||||
'client_id': try_get(
|
'client_id': try_get(
|
||||||
react_data, lambda x: x['application']['apiClientId'],
|
react_data, lambda x: x['application']['apiClientId'],
|
||||||
@@ -81,11 +94,12 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||||||
})['access_token']
|
})['access_token']
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
headers = self.geo_verification_headers()
|
||||||
|
headers['Authorization'] = 'Bearer ' + access_token
|
||||||
|
|
||||||
stream = self._download_json(
|
stream = self._download_json(
|
||||||
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
||||||
display_id, headers={
|
display_id, headers=headers)
|
||||||
'Authorization': 'Bearer ' + access_token,
|
|
||||||
})
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||||
e_description = self._parse_json(
|
e_description = self._parse_json(
|
||||||
|
@@ -4,7 +4,9 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
|
parse_duration,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -65,6 +67,9 @@ class DrTuberIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(video_data.get('duration')) or parse_duration(
|
||||||
|
video_data.get('duration_format'))
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
|
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
|
||||||
r'<title>([^<]+)\s*@\s+DrTuber',
|
r'<title>([^<]+)\s*@\s+DrTuber',
|
||||||
@@ -103,4 +108,5 @@ class DrTuberIE(InfoExtractor):
|
|||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'age_limit': self._rta_search(webpage),
|
'age_limit': self._rta_search(webpage),
|
||||||
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
@@ -1,15 +1,25 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import binascii
|
||||||
|
import hashlib
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..aes import aes_cbc_decrypt
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
bytes_to_intlist,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
intlist_to_bytes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_iso8601,
|
str_or_none,
|
||||||
remove_end,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -20,23 +30,31 @@ class DRTVIE(InfoExtractor):
|
|||||||
IE_NAME = 'drtv'
|
IE_NAME = 'drtv'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||||
'md5': '7ae17b4e18eb5d29212f424a7511c184',
|
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'klassen-darlig-taber-10',
|
'id': 'klassen-darlig-taber-10',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Klassen - Dårlig taber (10)',
|
'title': 'Klassen - Dårlig taber (10)',
|
||||||
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
|
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
|
||||||
'timestamp': 1471991907,
|
'timestamp': 1539085800,
|
||||||
'upload_date': '20160823',
|
'upload_date': '20181009',
|
||||||
'duration': 606.84,
|
'duration': 606.84,
|
||||||
|
'series': 'Klassen',
|
||||||
|
'season': 'Klassen I',
|
||||||
|
'season_number': 1,
|
||||||
|
'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
|
||||||
|
'episode': 'Episode 10',
|
||||||
|
'episode_number': 10,
|
||||||
|
'release_year': 2016,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
# embed
|
# embed
|
||||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
'title': 'christiania pusher street ryddes drdkrjpo',
|
||||||
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
|
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
|
||||||
'timestamp': 1472800279,
|
'timestamp': 1472800279,
|
||||||
'upload_date': '20160902',
|
'upload_date': '20160902',
|
||||||
@@ -45,17 +63,18 @@ class DRTVIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
# with SignLanguage formats
|
# with SignLanguage formats
|
||||||
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
|
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'historien-om-danmark-stenalder',
|
'id': 'historien-om-danmark-stenalder',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Historien om Danmark: Stenalder (1)',
|
'title': 'Historien om Danmark: Stenalder',
|
||||||
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
||||||
'timestamp': 1490401996,
|
'timestamp': 1546628400,
|
||||||
'upload_date': '20170325',
|
'upload_date': '20190104',
|
||||||
'duration': 3502.04,
|
'duration': 3502.56,
|
||||||
'formats': 'mincount:20',
|
'formats': 'mincount:20',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@@ -74,20 +93,26 @@ class DRTVIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||||
webpage, 'video id')
|
webpage, 'video id', default=None)
|
||||||
|
|
||||||
programcard = self._download_json(
|
if not video_id:
|
||||||
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
|
video_id = compat_urllib_parse_unquote(self._search_regex(
|
||||||
video_id, 'Downloading video JSON')
|
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
|
||||||
data = programcard['Data'][0]
|
webpage, 'urn'))
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(
|
data = self._download_json(
|
||||||
webpage, default=None), ' | TV | DR') or data['Title']
|
'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON', query={'expanded': 'true'})
|
||||||
|
|
||||||
|
title = str_or_none(data.get('Title')) or re.sub(
|
||||||
|
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
|
||||||
|
self._og_search_title(webpage))
|
||||||
description = self._og_search_description(
|
description = self._og_search_description(
|
||||||
webpage, default=None) or data.get('Description')
|
webpage, default=None) or data.get('Description')
|
||||||
|
|
||||||
timestamp = parse_iso8601(data.get('CreatedTime'))
|
timestamp = unified_timestamp(
|
||||||
|
data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
|
||||||
|
|
||||||
thumbnail = None
|
thumbnail = None
|
||||||
duration = None
|
duration = None
|
||||||
@@ -97,24 +122,62 @@ class DRTVIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
|
||||||
for asset in data['Assets']:
|
assets = []
|
||||||
|
primary_asset = data.get('PrimaryAsset')
|
||||||
|
if isinstance(primary_asset, dict):
|
||||||
|
assets.append(primary_asset)
|
||||||
|
secondary_assets = data.get('SecondaryAssets')
|
||||||
|
if isinstance(secondary_assets, list):
|
||||||
|
for secondary_asset in secondary_assets:
|
||||||
|
if isinstance(secondary_asset, dict):
|
||||||
|
assets.append(secondary_asset)
|
||||||
|
|
||||||
|
def hex_to_bytes(hex):
|
||||||
|
return binascii.a2b_hex(hex.encode('ascii'))
|
||||||
|
|
||||||
|
def decrypt_uri(e):
|
||||||
|
n = int(e[2:10], 16)
|
||||||
|
a = e[10 + n:]
|
||||||
|
data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
|
||||||
|
key = bytes_to_intlist(hashlib.sha256(
|
||||||
|
('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
|
||||||
|
iv = bytes_to_intlist(hex_to_bytes(a))
|
||||||
|
decrypted = aes_cbc_decrypt(data, key, iv)
|
||||||
|
return intlist_to_bytes(
|
||||||
|
decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
|
||||||
|
|
||||||
|
for asset in assets:
|
||||||
kind = asset.get('Kind')
|
kind = asset.get('Kind')
|
||||||
if kind == 'Image':
|
if kind == 'Image':
|
||||||
thumbnail = asset.get('Uri')
|
thumbnail = url_or_none(asset.get('Uri'))
|
||||||
elif kind in ('VideoResource', 'AudioResource'):
|
elif kind in ('VideoResource', 'AudioResource'):
|
||||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||||
asset_target = asset.get('Target')
|
asset_target = asset.get('Target')
|
||||||
for link in asset.get('Links', []):
|
for link in asset.get('Links', []):
|
||||||
uri = link.get('Uri')
|
uri = link.get('Uri')
|
||||||
|
if not uri:
|
||||||
|
encrypted_uri = link.get('EncryptedUri')
|
||||||
|
if not encrypted_uri:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
uri = decrypt_uri(encrypted_uri)
|
||||||
|
except Exception:
|
||||||
|
self.report_warning(
|
||||||
|
'Unable to decrypt EncryptedUri', video_id)
|
||||||
|
continue
|
||||||
|
uri = url_or_none(uri)
|
||||||
if not uri:
|
if not uri:
|
||||||
continue
|
continue
|
||||||
target = link.get('Target')
|
target = link.get('Target')
|
||||||
format_id = target or ''
|
format_id = target or ''
|
||||||
preference = None
|
if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
|
||||||
if asset_target in ('SpokenSubtitles', 'SignLanguage'):
|
|
||||||
preference = -1
|
preference = -1
|
||||||
format_id += '-%s' % asset_target
|
format_id += '-%s' % asset_target
|
||||||
|
elif asset_target == 'Default':
|
||||||
|
preference = 1
|
||||||
|
else:
|
||||||
|
preference = None
|
||||||
if target == 'HDS':
|
if target == 'HDS':
|
||||||
f4m_formats = self._extract_f4m_formats(
|
f4m_formats = self._extract_f4m_formats(
|
||||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||||
@@ -140,19 +203,22 @@ class DRTVIE(InfoExtractor):
|
|||||||
'vcodec': 'none' if kind == 'AudioResource' else None,
|
'vcodec': 'none' if kind == 'AudioResource' else None,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
})
|
})
|
||||||
subtitles_list = asset.get('SubtitlesList')
|
subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
|
||||||
if isinstance(subtitles_list, list):
|
if isinstance(subtitles_list, list):
|
||||||
LANGS = {
|
LANGS = {
|
||||||
'Danish': 'da',
|
'Danish': 'da',
|
||||||
}
|
}
|
||||||
for subs in subtitles_list:
|
for subs in subtitles_list:
|
||||||
if not subs.get('Uri'):
|
if not isinstance(subs, dict):
|
||||||
continue
|
continue
|
||||||
lang = subs.get('Language') or 'da'
|
sub_uri = url_or_none(subs.get('Uri'))
|
||||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
if not sub_uri:
|
||||||
'url': subs['Uri'],
|
continue
|
||||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
lang = subs.get('Language') or 'da'
|
||||||
})
|
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||||
|
'url': sub_uri,
|
||||||
|
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||||
|
})
|
||||||
|
|
||||||
if not formats and restricted_to_denmark:
|
if not formats and restricted_to_denmark:
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(
|
||||||
@@ -170,6 +236,13 @@ class DRTVIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'series': str_or_none(data.get('SeriesTitle')),
|
||||||
|
'season': str_or_none(data.get('SeasonTitle')),
|
||||||
|
'season_number': int_or_none(data.get('SeasonNumber')),
|
||||||
|
'season_id': str_or_none(data.get('SeasonUrn')),
|
||||||
|
'episode': str_or_none(data.get('EpisodeTitle')),
|
||||||
|
'episode_number': int_or_none(data.get('EpisodeNumber')),
|
||||||
|
'release_year': int_or_none(data.get('ProductionYear')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -15,16 +15,16 @@ from ..utils import (
|
|||||||
class DTubeIE(InfoExtractor):
|
class DTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
|
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://d.tube/#!/v/benswann/zqd630em',
|
'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
|
||||||
'md5': 'a03eaa186618ffa7a3145945543a251e',
|
'md5': '9f29088fa08d699a7565ee983f56a06e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'zqd630em',
|
'id': 'x380jtr1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
|
'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
|
||||||
'description': 'md5:700d164e066b87f9eac057949e4227c2',
|
'description': 'md5:60be222088183be3a42f196f34235776',
|
||||||
'uploader_id': 'benswann',
|
'uploader_id': 'broncnutz',
|
||||||
'upload_date': '20180222',
|
'upload_date': '20190107',
|
||||||
'timestamp': 1519328958,
|
'timestamp': 1546854054,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'format': '480p',
|
'format': '480p',
|
||||||
@@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor):
|
|||||||
def canonical_url(h):
|
def canonical_url(h):
|
||||||
if not h:
|
if not h:
|
||||||
return None
|
return None
|
||||||
return 'https://ipfs.io/ipfs/' + h
|
return 'https://video.dtube.top/ipfs/' + h
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for q in ('240', '480', '720', '1080', ''):
|
for q in ('240', '480', '720', '1080', ''):
|
||||||
|
@@ -29,7 +29,8 @@ class ESPNIE(OnceIE):
|
|||||||
(?:
|
(?:
|
||||||
.*?\?.*?\bid=|
|
.*?\?.*?\bid=|
|
||||||
/_/id/
|
/_/id/
|
||||||
)
|
)|
|
||||||
|
[^/]+/video/
|
||||||
)
|
)
|
||||||
)|
|
)|
|
||||||
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
|
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
|
||||||
@@ -94,6 +95,9 @@ class ESPNIE(OnceIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
|
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -411,6 +411,7 @@ from .funk import (
|
|||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
from .fusion import FusionIE
|
from .fusion import FusionIE
|
||||||
from .fxnetworks import FXNetworksIE
|
from .fxnetworks import FXNetworksIE
|
||||||
|
from .gaia import GaiaIE
|
||||||
from .gameinformer import GameInformerIE
|
from .gameinformer import GameInformerIE
|
||||||
from .gameone import (
|
from .gameone import (
|
||||||
GameOneIE,
|
GameOneIE,
|
||||||
@@ -451,6 +452,7 @@ from .hellporno import HellPornoIE
|
|||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
from .hgtv import HGTVComShowIE
|
from .hgtv import HGTVComShowIE
|
||||||
|
from .hketv import HKETVIE
|
||||||
from .hidive import HiDiveIE
|
from .hidive import HiDiveIE
|
||||||
from .historicfilms import HistoricFilmsIE
|
from .historicfilms import HistoricFilmsIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
@@ -469,6 +471,10 @@ from .hrti import (
|
|||||||
)
|
)
|
||||||
from .huajiao import HuajiaoIE
|
from .huajiao import HuajiaoIE
|
||||||
from .huffpost import HuffPostIE
|
from .huffpost import HuffPostIE
|
||||||
|
from .hungama import (
|
||||||
|
HungamaIE,
|
||||||
|
HungamaSongIE,
|
||||||
|
)
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
from .iconosquare import IconosquareIE
|
from .iconosquare import IconosquareIE
|
||||||
from .ign import (
|
from .ign import (
|
||||||
@@ -489,7 +495,11 @@ from .ina import InaIE
|
|||||||
from .inc import IncIE
|
from .inc import IncIE
|
||||||
from .indavideo import IndavideoEmbedIE
|
from .indavideo import IndavideoEmbedIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE, InstagramUserIE
|
from .instagram import (
|
||||||
|
InstagramIE,
|
||||||
|
InstagramUserIE,
|
||||||
|
InstagramTagIE,
|
||||||
|
)
|
||||||
from .internazionale import InternazionaleIE
|
from .internazionale import InternazionaleIE
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from .internetvideoarchive import InternetVideoArchiveIE
|
||||||
from .iprima import IPrimaIE
|
from .iprima import IPrimaIE
|
||||||
@@ -557,6 +567,7 @@ from .lecture2go import Lecture2GoIE
|
|||||||
from .lecturio import (
|
from .lecturio import (
|
||||||
LecturioIE,
|
LecturioIE,
|
||||||
LecturioCourseIE,
|
LecturioCourseIE,
|
||||||
|
LecturioDeCourseIE,
|
||||||
)
|
)
|
||||||
from .leeco import (
|
from .leeco import (
|
||||||
LeIE,
|
LeIE,
|
||||||
@@ -582,6 +593,7 @@ from .linkedin import (
|
|||||||
LinkedInLearningIE,
|
LinkedInLearningIE,
|
||||||
LinkedInLearningCourseIE,
|
LinkedInLearningCourseIE,
|
||||||
)
|
)
|
||||||
|
from .linuxacademy import LinuxAcademyIE
|
||||||
from .litv import LiTVIE
|
from .litv import LiTVIE
|
||||||
from .liveleak import (
|
from .liveleak import (
|
||||||
LiveLeakIE,
|
LiveLeakIE,
|
||||||
@@ -608,6 +620,7 @@ from .mailru import (
|
|||||||
MailRuMusicSearchIE,
|
MailRuMusicSearchIE,
|
||||||
)
|
)
|
||||||
from .makertv import MakerTVIE
|
from .makertv import MakerTVIE
|
||||||
|
from .malltv import MallTVIE
|
||||||
from .mangomolo import (
|
from .mangomolo import (
|
||||||
MangomoloVideoIE,
|
MangomoloVideoIE,
|
||||||
MangomoloLiveIE,
|
MangomoloLiveIE,
|
||||||
@@ -683,8 +696,7 @@ from .myvi import (
|
|||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
from .nationalgeographic import (
|
from .nationalgeographic import (
|
||||||
NationalGeographicVideoIE,
|
NationalGeographicVideoIE,
|
||||||
NationalGeographicIE,
|
NationalGeographicTVIE,
|
||||||
NationalGeographicEpisodeGuideIE,
|
|
||||||
)
|
)
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
@@ -827,6 +839,7 @@ from .orf import (
|
|||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
|
from .outsidetv import OutsideTVIE
|
||||||
from .packtpub import (
|
from .packtpub import (
|
||||||
PacktPubIE,
|
PacktPubIE,
|
||||||
PacktPubCourseIE,
|
PacktPubCourseIE,
|
||||||
@@ -855,6 +868,7 @@ from .piksel import PikselIE
|
|||||||
from .pinkbike import PinkbikeIE
|
from .pinkbike import PinkbikeIE
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
|
from .playplustv import PlayPlusTVIE
|
||||||
from .plays import PlaysTVIE
|
from .plays import PlaysTVIE
|
||||||
from .playtvak import PlaytvakIE
|
from .playtvak import PlaytvakIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
@@ -1046,7 +1060,10 @@ from .southpark import (
|
|||||||
SouthParkEsIE,
|
SouthParkEsIE,
|
||||||
SouthParkNlIE
|
SouthParkNlIE
|
||||||
)
|
)
|
||||||
from .spankbang import SpankBangIE
|
from .spankbang import (
|
||||||
|
SpankBangIE,
|
||||||
|
SpankBangPlaylistIE,
|
||||||
|
)
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
from .spiegeltv import SpiegeltvIE
|
||||||
@@ -1155,6 +1172,7 @@ from .toutv import TouTvIE
|
|||||||
from .toypics import ToypicsUserIE, ToypicsIE
|
from .toypics import ToypicsUserIE, ToypicsIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
|
from .trunews import TruNewsIE
|
||||||
from .trutv import TruTVIE
|
from .trutv import TruTVIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tubitv import TubiTvIE
|
from .tubitv import TubiTvIE
|
||||||
@@ -1192,13 +1210,15 @@ from .tvnet import TVNetIE
|
|||||||
from .tvnoe import TVNoeIE
|
from .tvnoe import TVNoeIE
|
||||||
from .tvnow import (
|
from .tvnow import (
|
||||||
TVNowIE,
|
TVNowIE,
|
||||||
TVNowListIE,
|
TVNowNewIE,
|
||||||
|
TVNowSeasonIE,
|
||||||
|
TVNowAnnualIE,
|
||||||
TVNowShowIE,
|
TVNowShowIE,
|
||||||
)
|
)
|
||||||
from .tvp import (
|
from .tvp import (
|
||||||
TVPEmbedIE,
|
TVPEmbedIE,
|
||||||
TVPIE,
|
TVPIE,
|
||||||
TVPSeriesIE,
|
TVPWebsiteIE,
|
||||||
)
|
)
|
||||||
from .tvplay import (
|
from .tvplay import (
|
||||||
TVPlayIE,
|
TVPlayIE,
|
||||||
@@ -1348,7 +1368,6 @@ from .voxmedia import (
|
|||||||
VoxMediaVolumeIE,
|
VoxMediaVolumeIE,
|
||||||
VoxMediaIE,
|
VoxMediaIE,
|
||||||
)
|
)
|
||||||
from .vporn import VpornIE
|
|
||||||
from .vrt import VRTIE
|
from .vrt import VRTIE
|
||||||
from .vrak import VrakIE
|
from .vrak import VrakIE
|
||||||
from .vrv import (
|
from .vrv import (
|
||||||
@@ -1362,6 +1381,7 @@ from .vuclip import VuClipIE
|
|||||||
from .vvvvid import VVVVIDIE
|
from .vvvvid import VVVVIDIE
|
||||||
from .vyborymos import VyboryMosIE
|
from .vyborymos import VyboryMosIE
|
||||||
from .vzaar import VzaarIE
|
from .vzaar import VzaarIE
|
||||||
|
from .wakanim import WakanimIE
|
||||||
from .walla import WallaIE
|
from .walla import WallaIE
|
||||||
from .washingtonpost import (
|
from .washingtonpost import (
|
||||||
WashingtonPostIE,
|
WashingtonPostIE,
|
||||||
@@ -1485,6 +1505,7 @@ from .zattoo import (
|
|||||||
QuantumTVIE,
|
QuantumTVIE,
|
||||||
QuicklineIE,
|
QuicklineIE,
|
||||||
QuicklineLiveIE,
|
QuicklineLiveIE,
|
||||||
|
SaltTVIE,
|
||||||
SAKTVIE,
|
SAKTVIE,
|
||||||
VTXTVIE,
|
VTXTVIE,
|
||||||
WalyTVIE,
|
WalyTVIE,
|
||||||
|
@@ -424,7 +424,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
uploader = clean_html(get_element_by_id(
|
uploader = clean_html(get_element_by_id(
|
||||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
|
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
|
||||||
fatal=False) or self._og_search_title(webpage, fatal=False)
|
default=None) or self._og_search_title(webpage, fatal=False)
|
||||||
timestamp = int_or_none(self._search_regex(
|
timestamp = int_or_none(self._search_regex(
|
||||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||||
'timestamp', default=None))
|
'timestamp', default=None))
|
||||||
|
@@ -1,17 +1,20 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from .uplynk import UplynkPreplayIE
|
from ..compat import (
|
||||||
from ..compat import compat_str
|
compat_str,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,6 +34,7 @@ class FOXIE(AdobePassIE):
|
|||||||
'upload_date': '20170901',
|
'upload_date': '20170901',
|
||||||
'creator': 'FOX',
|
'creator': 'FOX',
|
||||||
'series': 'Gotham',
|
'series': 'Gotham',
|
||||||
|
'age_limit': 14,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -44,48 +48,54 @@ class FOXIE(AdobePassIE):
|
|||||||
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_HOME_PAGE_URL = 'https://www.fox.com/'
|
||||||
|
_API_KEY = 'abdcbed02c124d393b39e818a4312055'
|
||||||
|
_access_token = None
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, data=None):
|
||||||
|
headers = {
|
||||||
|
'X-Api-Key': self._API_KEY,
|
||||||
|
}
|
||||||
|
if self._access_token:
|
||||||
|
headers['Authorization'] = 'Bearer ' + self._access_token
|
||||||
|
return self._download_json(
|
||||||
|
'https://api2.fox.com/v2.0/' + path,
|
||||||
|
video_id, data=data, headers=headers)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if not self._access_token:
|
||||||
|
mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
|
||||||
|
if mvpd_auth:
|
||||||
|
self._access_token = (self._parse_json(compat_urllib_parse_unquote(
|
||||||
|
mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
|
||||||
|
if not self._access_token:
|
||||||
|
self._access_token = self._call_api(
|
||||||
|
'login', None, json.dumps({
|
||||||
|
'deviceId': compat_str(uuid.uuid4()),
|
||||||
|
}).encode())['accessToken']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._call_api('vodplayer/' + video_id, video_id)
|
||||||
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
|
|
||||||
video_id, headers={
|
|
||||||
'apikey': 'abdcbed02c124d393b39e818a4312055',
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Referer': url,
|
|
||||||
})
|
|
||||||
|
|
||||||
title = video['name']
|
title = video['name']
|
||||||
release_url = video['videoRelease']['url']
|
release_url = video['url']
|
||||||
|
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||||
description = video.get('description')
|
formats = self._extract_m3u8_formats(
|
||||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
m3u8_url, video_id, 'mp4',
|
||||||
video.get('duration')) or parse_duration(video.get('duration'))
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
timestamp = unified_timestamp(video.get('datePublished'))
|
self._sort_formats(formats)
|
||||||
rating = video.get('contentRating')
|
|
||||||
age_limit = parse_age_limit(rating)
|
|
||||||
|
|
||||||
data = try_get(
|
data = try_get(
|
||||||
video, lambda x: x['trackingData']['properties'], dict) or {}
|
video, lambda x: x['trackingData']['properties'], dict) or {}
|
||||||
|
|
||||||
|
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||||
|
video.get('duration')) or parse_duration(video.get('duration'))
|
||||||
|
timestamp = unified_timestamp(video.get('datePublished'))
|
||||||
creator = data.get('brand') or data.get('network') or video.get('network')
|
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||||
|
|
||||||
series = video.get('seriesName') or data.get(
|
series = video.get('seriesName') or data.get(
|
||||||
'seriesName') or data.get('show')
|
'seriesName') or data.get('show')
|
||||||
season_number = int_or_none(video.get('seasonNumber'))
|
|
||||||
episode = video.get('name')
|
|
||||||
episode_number = int_or_none(video.get('episodeNumber'))
|
|
||||||
release_year = int_or_none(video.get('releaseYear'))
|
|
||||||
|
|
||||||
if data.get('authRequired'):
|
|
||||||
resource = self._get_mvpd_resource(
|
|
||||||
'fbc-fox', title, video.get('guid'), rating)
|
|
||||||
release_url = update_url_query(
|
|
||||||
release_url, {
|
|
||||||
'auth': self._extract_mvpd_auth(
|
|
||||||
url, video_id, 'fbc-fox', resource)
|
|
||||||
})
|
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for doc_rel in video.get('documentReleases', []):
|
for doc_rel in video.get('documentReleases', []):
|
||||||
@@ -98,36 +108,19 @@ class FOXIE(AdobePassIE):
|
|||||||
}]
|
}]
|
||||||
break
|
break
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'formats': formats,
|
||||||
|
'description': video.get('description'),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'age_limit': age_limit,
|
'age_limit': parse_age_limit(video.get('contentRating')),
|
||||||
'creator': creator,
|
'creator': creator,
|
||||||
'series': series,
|
'series': series,
|
||||||
'season_number': season_number,
|
'season_number': int_or_none(video.get('seasonNumber')),
|
||||||
'episode': episode,
|
'episode': video.get('name'),
|
||||||
'episode_number': episode_number,
|
'episode_number': int_or_none(video.get('episodeNumber')),
|
||||||
'release_year': release_year,
|
'release_year': int_or_none(video.get('releaseYear')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
urlh = self._request_webpage(HEADRequest(release_url), video_id)
|
|
||||||
video_url = compat_str(urlh.geturl())
|
|
||||||
|
|
||||||
if UplynkPreplayIE.suitable(video_url):
|
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': video_url,
|
|
||||||
'ie_key': UplynkPreplayIE.ie_key(),
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
m3u8_url, video_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
|
||||||
self._sort_formats(formats)
|
|
||||||
info['formats'] = formats
|
|
||||||
return info
|
|
||||||
|
@@ -215,7 +215,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '162311093',
|
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||||
@@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
catalogue = None
|
catalogue = None
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
webpage, 'video id', default=None, group='id')
|
webpage, 'video id', default=None, group='id')
|
||||||
|
|
||||||
if not video_id:
|
if not video_id:
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
class FreespeechIE(InfoExtractor):
|
class FreespeechIE(InfoExtractor):
|
||||||
@@ -27,8 +28,4 @@ class FreespeechIE(InfoExtractor):
|
|||||||
r'data-video-url="([^"]+)"',
|
r'data-video-url="([^"]+)"',
|
||||||
webpage, 'youtube url')
|
webpage, 'youtube url')
|
||||||
|
|
||||||
return {
|
return self.url_result(youtube_url, YoutubeIE.ie_key())
|
||||||
'_type': 'url',
|
|
||||||
'url': youtube_url,
|
|
||||||
'ie_key': 'Youtube',
|
|
||||||
}
|
|
||||||
|
@@ -1,6 +1,9 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = title_data.get('id') or self._search_regex([
|
video_id = title_data.get('id') or self._search_regex([
|
||||||
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
||||||
r'<iframe[^>]+src="/player/(\d+)"',
|
r'<iframe[^>]+src="/player/(\d+)',
|
||||||
], webpage, 'video_id', default=None)
|
], webpage, 'video_id', default=None)
|
||||||
if not video_id:
|
if not video_id:
|
||||||
player_url = self._html_search_meta([
|
player_url = self._html_search_meta([
|
||||||
@@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor):
|
|||||||
if self._TOKEN:
|
if self._TOKEN:
|
||||||
headers['Authorization'] = 'Token %s' % self._TOKEN
|
headers['Authorization'] = 'Token %s' % self._TOKEN
|
||||||
sources = self._download_json(
|
sources = self._download_json(
|
||||||
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
'https://www.funimation.com/api/showexperience/%s/' % video_id,
|
||||||
video_id, headers=headers)['items']
|
video_id, headers=headers, query={
|
||||||
|
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
|
||||||
|
})['items']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||||
|
98
youtube_dl/extractor/gaia.py
Normal file
98
youtube_dl/extractor/gaia.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GaiaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '89356',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Connecting with Universal Consciousness',
|
||||||
|
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
|
||||||
|
'upload_date': '20151116',
|
||||||
|
'timestamp': 1447707266,
|
||||||
|
'duration': 936,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '89351',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Connecting with Universal Consciousness',
|
||||||
|
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
|
||||||
|
'upload_date': '20151116',
|
||||||
|
'timestamp': 1447707266,
|
||||||
|
'duration': 53,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id, vtype = re.search(self._VALID_URL, url).groups()
|
||||||
|
node_id = self._download_json(
|
||||||
|
'https://brooklyn.gaia.com/pathinfo', display_id, query={
|
||||||
|
'path': 'video/' + display_id,
|
||||||
|
})['id']
|
||||||
|
node = self._download_json(
|
||||||
|
'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
|
||||||
|
vdata = node[vtype]
|
||||||
|
media_id = compat_str(vdata['nid'])
|
||||||
|
title = node['title']
|
||||||
|
|
||||||
|
media = self._download_json(
|
||||||
|
'https://brooklyn.gaia.com/media/' + media_id, media_id)
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
media['mediaUrls']['bcHLS'], media_id, 'mp4')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
text_tracks = media.get('textTracks', {})
|
||||||
|
for key in ('captions', 'subtitles'):
|
||||||
|
for lang, sub_url in text_tracks.get(key, {}).items():
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': sub_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
fivestar = node.get('fivestar', {})
|
||||||
|
fields = node.get('fields', {})
|
||||||
|
|
||||||
|
def get_field_value(key, value_key='value'):
|
||||||
|
return try_get(fields, lambda x: x[key][0][value_key])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': media_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
|
||||||
|
'timestamp': int_or_none(node.get('created')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': int_or_none(vdata.get('duration')),
|
||||||
|
'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
|
||||||
|
'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
|
||||||
|
'comment_count': int_or_none(node.get('comment_count')),
|
||||||
|
'series': try_get(node, lambda x: x['series']['title'], compat_str),
|
||||||
|
'season_number': int_or_none(get_field_value('season')),
|
||||||
|
'season_id': str_or_none(get_field_value('series_nid', 'nid')),
|
||||||
|
'episode_number': int_or_none(get_field_value('episode')),
|
||||||
|
}
|
@@ -2197,10 +2197,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
return {
|
return self.url_result(self.http_scheme() + url)
|
||||||
'_type': 'url',
|
|
||||||
'url': self.http_scheme() + url,
|
|
||||||
}
|
|
||||||
|
|
||||||
parsed_url = compat_urlparse.urlparse(url)
|
parsed_url = compat_urlparse.urlparse(url)
|
||||||
if not parsed_url.scheme:
|
if not parsed_url.scheme:
|
||||||
|
@@ -72,7 +72,7 @@ class GloboIE(InfoExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._download_json(
|
glb_id = (self._download_json(
|
||||||
'https://login.globo.com/api/authentication', None, data=json.dumps({
|
'https://login.globo.com/api/authentication', None, data=json.dumps({
|
||||||
'payload': {
|
'payload': {
|
||||||
'email': email,
|
'email': email,
|
||||||
@@ -81,7 +81,9 @@ class GloboIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}).encode(), headers={
|
}).encode(), headers={
|
||||||
'Content-Type': 'application/json; charset=utf-8',
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
})
|
}) or {}).get('glbId')
|
||||||
|
if glb_id:
|
||||||
|
self._set_cookie('.globo.com', 'GLBID', glb_id)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
resp = self._parse_json(e.cause.read(), None)
|
resp = self._parse_json(e.cause.read(), None)
|
||||||
|
@@ -25,15 +25,15 @@ class GoIE(AdobePassIE):
|
|||||||
},
|
},
|
||||||
'watchdisneychannel': {
|
'watchdisneychannel': {
|
||||||
'brand': '004',
|
'brand': '004',
|
||||||
'requestor_id': 'Disney',
|
'resource_id': 'Disney',
|
||||||
},
|
},
|
||||||
'watchdisneyjunior': {
|
'watchdisneyjunior': {
|
||||||
'brand': '008',
|
'brand': '008',
|
||||||
'requestor_id': 'DisneyJunior',
|
'resource_id': 'DisneyJunior',
|
||||||
},
|
},
|
||||||
'watchdisneyxd': {
|
'watchdisneyxd': {
|
||||||
'brand': '009',
|
'brand': '009',
|
||||||
'requestor_id': 'DisneyXD',
|
'resource_id': 'DisneyXD',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||||
@@ -130,8 +130,8 @@ class GoIE(AdobePassIE):
|
|||||||
'device': '001',
|
'device': '001',
|
||||||
}
|
}
|
||||||
if video_data.get('accesslevel') == '1':
|
if video_data.get('accesslevel') == '1':
|
||||||
requestor_id = site_info['requestor_id']
|
requestor_id = site_info.get('requestor_id', 'DisneyChannels')
|
||||||
resource = self._get_mvpd_resource(
|
resource = site_info.get('resource_id') or self._get_mvpd_resource(
|
||||||
requestor_id, title, video_id, None)
|
requestor_id, title, video_id, None)
|
||||||
auth = self._extract_mvpd_auth(
|
auth = self._extract_mvpd_auth(
|
||||||
url, video_id, requestor_id, resource)
|
url, video_id, requestor_id, resource)
|
||||||
|
191
youtube_dl/extractor/hketv.py
Normal file
191
youtube_dl/extractor/hketv.py
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_count,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HKETVIE(InfoExtractor):
|
||||||
|
IE_NAME = 'hketv'
|
||||||
|
IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_GEO_COUNTRIES = ['HK']
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.hkedcity.net/etv/resource/2932360618',
|
||||||
|
'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2932360618',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
|
||||||
|
'description': 'md5:d5286d05219ef50e0613311cbe96e560',
|
||||||
|
'upload_date': '20181024',
|
||||||
|
'duration': 900,
|
||||||
|
'subtitles': 'count:2',
|
||||||
|
},
|
||||||
|
'skip': 'Geo restricted to HK',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.hkedcity.net/etv/resource/972641418',
|
||||||
|
'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '972641418',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '衣冠楚楚 (天使系列之一)',
|
||||||
|
'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
|
||||||
|
'upload_date': '20070109',
|
||||||
|
'duration': 907,
|
||||||
|
'subtitles': {},
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'geo_verification_proxy': '<HK proxy here>',
|
||||||
|
},
|
||||||
|
'skip': 'Geo restricted to HK',
|
||||||
|
}]
|
||||||
|
|
||||||
|
_CC_LANGS = {
|
||||||
|
'中文(繁體中文)': 'zh-Hant',
|
||||||
|
'中文(简体中文)': 'zh-Hans',
|
||||||
|
'English': 'en',
|
||||||
|
'Bahasa Indonesia': 'id',
|
||||||
|
'\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
|
||||||
|
'\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
|
||||||
|
'Tagalog': 'tl',
|
||||||
|
'\u0e44\u0e17\u0e22': 'th',
|
||||||
|
'\u0627\u0631\u062f\u0648': 'ur',
|
||||||
|
}
|
||||||
|
_FORMAT_HEIGHTS = {
|
||||||
|
'SD': 360,
|
||||||
|
'HD': 720,
|
||||||
|
}
|
||||||
|
_APPS_BASE_URL = 'https://apps.hkedcity.net'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = (
|
||||||
|
self._html_search_meta(
|
||||||
|
('ed_title', 'search.ed_title'), webpage, default=None) or
|
||||||
|
self._search_regex(
|
||||||
|
r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'title', default=None, group='url') or
|
||||||
|
self._html_search_regex(
|
||||||
|
r'<h1>([^<]+)</h1>', webpage, 'title', default=None) or
|
||||||
|
self._og_search_title(webpage)
|
||||||
|
)
|
||||||
|
|
||||||
|
file_id = self._search_regex(
|
||||||
|
r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
|
||||||
|
webpage, 'file ID')
|
||||||
|
curr_url = self._search_regex(
|
||||||
|
r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
|
||||||
|
webpage, 'curr URL')
|
||||||
|
data = {
|
||||||
|
'action': 'get_info',
|
||||||
|
'curr_url': curr_url,
|
||||||
|
'file_id': file_id,
|
||||||
|
'video_url': file_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = self._download_json(
|
||||||
|
self._APPS_BASE_URL + '/media/play/handler.php', video_id,
|
||||||
|
data=urlencode_postdata(data),
|
||||||
|
headers=merge_dicts({
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded'},
|
||||||
|
self.geo_verification_headers()))
|
||||||
|
|
||||||
|
result = response['result']
|
||||||
|
|
||||||
|
if not response.get('success') or not response.get('access'):
|
||||||
|
error = clean_html(response.get('access_err_msg'))
|
||||||
|
if 'Video streaming is not available in your country' in error:
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
msg=error, countries=self._GEO_COUNTRIES)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
width = int_or_none(result.get('width'))
|
||||||
|
height = int_or_none(result.get('height'))
|
||||||
|
|
||||||
|
playlist0 = result['playlist'][0]
|
||||||
|
for fmt in playlist0['sources']:
|
||||||
|
file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
|
||||||
|
if not file_url:
|
||||||
|
continue
|
||||||
|
# If we ever wanted to provide the final resolved URL that
|
||||||
|
# does not require cookies, albeit with a shorter lifespan:
|
||||||
|
# urlh = self._downloader.urlopen(file_url)
|
||||||
|
# resolved_url = urlh.geturl()
|
||||||
|
label = fmt.get('label')
|
||||||
|
h = self._FORMAT_HEIGHTS.get(label)
|
||||||
|
w = h * width // height if h and width and height else None
|
||||||
|
formats.append({
|
||||||
|
'format_id': label,
|
||||||
|
'ext': fmt.get('type'),
|
||||||
|
'url': file_url,
|
||||||
|
'width': w,
|
||||||
|
'height': h,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
|
||||||
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
|
track_kind = str_or_none(track.get('kind'))
|
||||||
|
if not track_kind or not isinstance(track_kind, compat_str):
|
||||||
|
continue
|
||||||
|
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||||
|
continue
|
||||||
|
track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
|
||||||
|
if not track_url:
|
||||||
|
continue
|
||||||
|
track_label = track.get('label')
|
||||||
|
subtitles.setdefault(self._CC_LANGS.get(
|
||||||
|
track_label, track_label), []).append({
|
||||||
|
'url': self._proto_relative_url(track_url),
|
||||||
|
'ext': 'srt',
|
||||||
|
})
|
||||||
|
|
||||||
|
# Likes
|
||||||
|
emotion = self._download_json(
|
||||||
|
'https://emocounter.hkedcity.net/handler.php', video_id,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'action': 'get_emotion',
|
||||||
|
'data[bucket_id]': 'etv',
|
||||||
|
'data[identifier]': video_id,
|
||||||
|
}),
|
||||||
|
headers={'Content-Type': 'application/x-www-form-urlencoded'},
|
||||||
|
fatal=False) or {}
|
||||||
|
like_count = int_or_none(try_get(
|
||||||
|
emotion, lambda x: x['data']['emotion_data'][0]['count']))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._html_search_meta(
|
||||||
|
'description', webpage, fatal=False),
|
||||||
|
'upload_date': unified_strdate(self._html_search_meta(
|
||||||
|
'ed_date', webpage, fatal=False), day_first=False),
|
||||||
|
'duration': int_or_none(result.get('length')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
|
||||||
|
'view_count': parse_count(result.get('view_count')),
|
||||||
|
'like_count': like_count,
|
||||||
|
}
|
117
youtube_dl/extractor/hungama.py
Normal file
117
youtube_dl/extractor/hungama.py
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HungamaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?hungama\.com/
|
||||||
|
(?:
|
||||||
|
(?:video|movie)/[^/]+/|
|
||||||
|
tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
|
||||||
|
'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2931166',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lucky Ali - Kitni Haseen Zindagi',
|
||||||
|
'track': 'Kitni Haseen Zindagi',
|
||||||
|
'artist': 'Lucky Ali',
|
||||||
|
'album': 'Aks',
|
||||||
|
'release_year': 2000,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
info = self._search_json_ld(webpage, video_id)
|
||||||
|
|
||||||
|
m3u8_url = self._download_json(
|
||||||
|
'https://www.hungama.com/index.php', video_id,
|
||||||
|
data=urlencode_postdata({'content_id': video_id}), headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
}, query={
|
||||||
|
'c': 'common',
|
||||||
|
'm': 'get_video_mdn_url',
|
||||||
|
})['stream_url']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class HungamaSongIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
|
||||||
|
'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2931166',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lucky Ali - Kitni Haseen Zindagi',
|
||||||
|
'track': 'Kitni Haseen Zindagi',
|
||||||
|
'artist': 'Lucky Ali',
|
||||||
|
'album': 'Aks',
|
||||||
|
'release_year': 2000,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
|
||||||
|
audio_id, query={'_country': 'IN'})[0]
|
||||||
|
|
||||||
|
track = data['song_name']
|
||||||
|
artist = data.get('singer_name')
|
||||||
|
|
||||||
|
m3u8_url = self._download_json(
|
||||||
|
data.get('file') or data['preview_link'],
|
||||||
|
audio_id)['response']['media_url']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = '%s - %s' % (artist, track) if artist else track
|
||||||
|
thumbnail = data.get('img_src') or data.get('album_image')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'track': track,
|
||||||
|
'artist': artist,
|
||||||
|
'album': data.get('album_name'),
|
||||||
|
'release_year': int_or_none(data.get('date')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -27,6 +27,10 @@ class ImgurIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# no title
|
||||||
|
'url': 'https://i.imgur.com/jxBXAMC.gifv',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -87,7 +91,7 @@ class ImgurIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage, default=video_id),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -227,44 +227,37 @@ class InstagramIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class InstagramUserIE(InfoExtractor):
|
class InstagramPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
|
# A superclass for handling any kind of query based on GraphQL which
|
||||||
IE_DESC = 'Instagram user profile'
|
# results in a playlist.
|
||||||
IE_NAME = 'instagram:user'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://instagram.com/porsche',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'porsche',
|
|
||||||
'title': 'porsche',
|
|
||||||
},
|
|
||||||
'playlist_count': 5,
|
|
||||||
'params': {
|
|
||||||
'extract_flat': True,
|
|
||||||
'skip_download': True,
|
|
||||||
'playlistend': 5,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_gis_tmpl = None
|
_gis_tmpl = None # used to cache GIS request type
|
||||||
|
|
||||||
def _entries(self, data):
|
def _parse_graphql(self, webpage, item_id):
|
||||||
|
# Reads a webpage and returns its GraphQL data.
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
|
||||||
|
item_id)
|
||||||
|
|
||||||
|
def _extract_graphql(self, data, url):
|
||||||
|
# Parses GraphQL queries containing videos and generates a playlist.
|
||||||
def get_count(suffix):
|
def get_count(suffix):
|
||||||
return int_or_none(try_get(
|
return int_or_none(try_get(
|
||||||
node, lambda x: x['edge_media_' + suffix]['count']))
|
node, lambda x: x['edge_media_' + suffix]['count']))
|
||||||
|
|
||||||
uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
uploader_id = self._match_id(url)
|
||||||
csrf_token = data['config']['csrf_token']
|
csrf_token = data['config']['csrf_token']
|
||||||
rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
|
rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
|
||||||
|
|
||||||
self._set_cookie('instagram.com', 'ig_pr', '1')
|
|
||||||
|
|
||||||
cursor = ''
|
cursor = ''
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
variables = json.dumps({
|
variables = {
|
||||||
'id': uploader_id,
|
|
||||||
'first': 12,
|
'first': 12,
|
||||||
'after': cursor,
|
'after': cursor,
|
||||||
})
|
}
|
||||||
|
variables.update(self._query_vars_for(data))
|
||||||
|
variables = json.dumps(variables)
|
||||||
|
|
||||||
if self._gis_tmpl:
|
if self._gis_tmpl:
|
||||||
gis_tmpls = [self._gis_tmpl]
|
gis_tmpls = [self._gis_tmpl]
|
||||||
@@ -276,21 +269,26 @@ class InstagramUserIE(InfoExtractor):
|
|||||||
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
|
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# try all of the ways to generate a GIS query, and not only use the
|
||||||
|
# first one that works, but cache it for future requests
|
||||||
for gis_tmpl in gis_tmpls:
|
for gis_tmpl in gis_tmpls:
|
||||||
try:
|
try:
|
||||||
media = self._download_json(
|
json_data = self._download_json(
|
||||||
'https://www.instagram.com/graphql/query/', uploader_id,
|
'https://www.instagram.com/graphql/query/', uploader_id,
|
||||||
'Downloading JSON page %d' % page_num, headers={
|
'Downloading JSON page %d' % page_num, headers={
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
'X-Instagram-GIS': hashlib.md5(
|
'X-Instagram-GIS': hashlib.md5(
|
||||||
('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
|
('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
|
||||||
}, query={
|
}, query={
|
||||||
'query_hash': '42323d64886122307be10013ad2dcc44',
|
'query_hash': self._QUERY_HASH,
|
||||||
'variables': variables,
|
'variables': variables,
|
||||||
})['data']['user']['edge_owner_to_timeline_media']
|
})
|
||||||
|
media = self._parse_timeline_from(json_data)
|
||||||
self._gis_tmpl = gis_tmpl
|
self._gis_tmpl = gis_tmpl
|
||||||
break
|
break
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
# if it's an error caused by a bad query, and there are
|
||||||
|
# more GIS templates to try, ignore it and keep trying
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
if gis_tmpl != gis_tmpls[-1]:
|
if gis_tmpl != gis_tmpls[-1]:
|
||||||
continue
|
continue
|
||||||
@@ -348,14 +346,80 @@ class InstagramUserIE(InfoExtractor):
|
|||||||
break
|
break
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
username = self._match_id(url)
|
user_or_tag = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, user_or_tag)
|
||||||
|
data = self._parse_graphql(webpage, user_or_tag)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, username)
|
self._set_cookie('instagram.com', 'ig_pr', '1')
|
||||||
|
|
||||||
data = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
|
|
||||||
username)
|
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(data), username, username)
|
self._extract_graphql(data, url), user_or_tag, user_or_tag)
|
||||||
|
|
||||||
|
|
||||||
|
class InstagramUserIE(InstagramPlaylistIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
|
||||||
|
IE_DESC = 'Instagram user profile'
|
||||||
|
IE_NAME = 'instagram:user'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://instagram.com/porsche',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'porsche',
|
||||||
|
'title': 'porsche',
|
||||||
|
},
|
||||||
|
'playlist_count': 5,
|
||||||
|
'params': {
|
||||||
|
'extract_flat': True,
|
||||||
|
'skip_download': True,
|
||||||
|
'playlistend': 5,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_QUERY_HASH = '42323d64886122307be10013ad2dcc44',
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_timeline_from(data):
|
||||||
|
# extracts the media timeline data from a GraphQL result
|
||||||
|
return data['data']['user']['edge_owner_to_timeline_media']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _query_vars_for(data):
|
||||||
|
# returns a dictionary of variables to add to the timeline query based
|
||||||
|
# on the GraphQL of the original page
|
||||||
|
return {
|
||||||
|
'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class InstagramTagIE(InstagramPlaylistIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)'
|
||||||
|
IE_DESC = 'Instagram hashtag search'
|
||||||
|
IE_NAME = 'instagram:tag'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://instagram.com/explore/tags/lolcats',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lolcats',
|
||||||
|
'title': 'lolcats',
|
||||||
|
},
|
||||||
|
'playlist_count': 50,
|
||||||
|
'params': {
|
||||||
|
'extract_flat': True,
|
||||||
|
'skip_download': True,
|
||||||
|
'playlistend': 50,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314',
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_timeline_from(data):
|
||||||
|
# extracts the media timeline data from a GraphQL result
|
||||||
|
return data['data']['hashtag']['edge_hashtag_to_media']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _query_vars_for(data):
|
||||||
|
# returns a dictionary of variables to add to the timeline query based
|
||||||
|
# on the GraphQL of the original page
|
||||||
|
return {
|
||||||
|
'tag_name':
|
||||||
|
data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
|
||||||
|
}
|
||||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class IPrimaIE(InfoExtractor):
|
class IPrimaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:play|prima|www)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -44,6 +44,21 @@ class IPrimaIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.iprima.cz/filmy/desne-rande',
|
'url': 'http://www.iprima.cz/filmy/desne-rande',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -7,8 +7,8 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class JWPlatformIE(InfoExtractor):
|
class JWPlatformIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||||
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
|
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -19,7 +19,10 @@ class JWPlatformIE(InfoExtractor):
|
|||||||
'upload_date': '20081127',
|
'upload_date': '20081127',
|
||||||
'timestamp': 1227796140,
|
'timestamp': 1227796140,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
@@ -34,5 +37,5 @@ class JWPlatformIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
|
json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
|
||||||
return self._parse_jwplayer_data(json_data, video_id)
|
return self._parse_jwplayer_data(json_data, video_id)
|
||||||
|
@@ -64,8 +64,14 @@ class LecturioBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class LecturioIE(LecturioBaseIE):
|
class LecturioIE(LecturioBaseIE):
|
||||||
_VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture'
|
_VALID_URL = r'''(?x)
|
||||||
_TEST = {
|
https://
|
||||||
|
(?:
|
||||||
|
app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture|
|
||||||
|
(?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
|
'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
|
||||||
'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870',
|
'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -74,7 +80,10 @@ class LecturioIE(LecturioBaseIE):
|
|||||||
'title': 'Important Concepts and Terms – Introduction to Microbiology',
|
'title': 'Important Concepts and Terms – Introduction to Microbiology',
|
||||||
},
|
},
|
||||||
'skip': 'Requires lecturio account credentials',
|
'skip': 'Requires lecturio account credentials',
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_CC_LANGS = {
|
_CC_LANGS = {
|
||||||
'German': 'de',
|
'German': 'de',
|
||||||
@@ -86,7 +95,8 @@ class LecturioIE(LecturioBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id') or mobj.group('id_de')
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://app.lecturio.com/en/lecture/%s/player.html' % display_id,
|
'https://app.lecturio.com/en/lecture/%s/player.html' % display_id,
|
||||||
@@ -136,9 +146,15 @@ class LecturioIE(LecturioBaseIE):
|
|||||||
cc_url = url_or_none(cc_url)
|
cc_url = url_or_none(cc_url)
|
||||||
if not cc_url:
|
if not cc_url:
|
||||||
continue
|
continue
|
||||||
sub_dict = automatic_captions if 'auto-translated' in cc_label else subtitles
|
|
||||||
lang = self._search_regex(
|
lang = self._search_regex(
|
||||||
r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0])
|
r'/([a-z]{2})_', cc_url, 'lang',
|
||||||
|
default=cc_label.split()[0] if cc_label else 'en')
|
||||||
|
original_lang = self._search_regex(
|
||||||
|
r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
|
||||||
|
default=None)
|
||||||
|
sub_dict = (automatic_captions
|
||||||
|
if 'auto-translated' in cc_label or original_lang
|
||||||
|
else subtitles)
|
||||||
sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
|
sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
|
||||||
'url': cc_url,
|
'url': cc_url,
|
||||||
})
|
})
|
||||||
@@ -184,3 +200,30 @@ class LecturioCourseIE(LecturioBaseIE):
|
|||||||
'title', default=None)
|
'title', default=None)
|
||||||
|
|
||||||
return self.playlist_result(entries, display_id, title)
|
return self.playlist_result(entries, display_id, title)
|
||||||
|
|
||||||
|
|
||||||
|
class LecturioDeCourseIE(LecturioBaseIE):
|
||||||
|
_VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
|
||||||
|
webpage):
|
||||||
|
lecture_url = urljoin(url, mobj.group('url'))
|
||||||
|
lecture_id = mobj.group('id')
|
||||||
|
entries.append(self.url_result(
|
||||||
|
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, display_id, title)
|
||||||
|
@@ -1,12 +1,14 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
get_element_by_class,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
strip_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -21,7 +23,9 @@ class LibsynIE(InfoExtractor):
|
|||||||
'id': '6385796',
|
'id': '6385796',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': "Champion Minded - Developing a Growth Mindset",
|
'title': "Champion Minded - Developing a Growth Mindset",
|
||||||
'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
|
# description fetched using another request:
|
||||||
|
# http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796
|
||||||
|
# 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
|
||||||
'upload_date': '20180320',
|
'upload_date': '20180320',
|
||||||
'thumbnail': 're:^https?://.*',
|
'thumbnail': 're:^https?://.*',
|
||||||
},
|
},
|
||||||
@@ -38,22 +42,36 @@ class LibsynIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
url, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
video_id = m.group('id')
|
|
||||||
url = m.group('mainurl')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
podcast_title = self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
|
r'var\s+playlistItem\s*=\s*({.+?});',
|
||||||
if podcast_title:
|
webpage, 'JSON data block'), video_id)
|
||||||
podcast_title = podcast_title.strip()
|
|
||||||
episode_title = self._search_regex(
|
episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage)
|
||||||
r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
|
if not episode_title:
|
||||||
if episode_title:
|
self._search_regex(
|
||||||
episode_title = episode_title.strip()
|
[r'data-title="([^"]+)"', r'<title>(.+?)</title>'],
|
||||||
|
webpage, 'episode title')
|
||||||
|
episode_title = episode_title.strip()
|
||||||
|
|
||||||
|
podcast_title = strip_or_none(clean_html(self._search_regex(
|
||||||
|
r'<h3>([^<]+)</h3>', webpage, 'podcast title',
|
||||||
|
default=None) or get_element_by_class('podcast-title', webpage)))
|
||||||
|
|
||||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')):
|
||||||
|
f_url = data.get(k)
|
||||||
|
if not f_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': f_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
|
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
|
||||||
'description', default=None)
|
'description', default=None)
|
||||||
@@ -61,27 +79,15 @@ class LibsynIE(InfoExtractor):
|
|||||||
# Strip non-breaking and normal spaces
|
# Strip non-breaking and normal spaces
|
||||||
description = description.replace('\u00A0', ' ').strip()
|
description = description.replace('\u00A0', ' ').strip()
|
||||||
release_date = unified_strdate(self._search_regex(
|
release_date = unified_strdate(self._search_regex(
|
||||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
r'<div class="release_date">Released: ([^<]+)<',
|
||||||
|
webpage, 'release date', default=None) or data.get('release_date'))
|
||||||
data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
|
|
||||||
data = json.loads(data_json)
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': data['media_url'],
|
|
||||||
'format_id': 'main',
|
|
||||||
}, {
|
|
||||||
'url': data['media_url_libsyn'],
|
|
||||||
'format_id': 'libsyn',
|
|
||||||
}]
|
|
||||||
thumbnail = data.get('thumbnail_url')
|
|
||||||
duration = parse_duration(data.get('duration'))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': data.get('thumbnail_url'),
|
||||||
'upload_date': release_date,
|
'upload_date': release_date,
|
||||||
'duration': duration,
|
'duration': parse_duration(data.get('duration')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@@ -34,12 +34,15 @@ class LinkedInLearningBaseIE(InfoExtractor):
|
|||||||
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
||||||
}, query=query)['elements'][0]
|
}, query=query)['elements'][0]
|
||||||
|
|
||||||
def _get_video_id(self, urn, course_slug, video_slug):
|
def _get_urn_id(self, video_data):
|
||||||
|
urn = video_data.get('urn')
|
||||||
if urn:
|
if urn:
|
||||||
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
|
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group(1)
|
return mobj.group(1)
|
||||||
return '%s/%s' % (course_slug, video_slug)
|
|
||||||
|
def _get_video_id(self, video_data, course_slug, video_slug):
|
||||||
|
return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
email, password = self._get_login_info()
|
email, password = self._get_login_info()
|
||||||
@@ -123,7 +126,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
|||||||
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug),
|
'id': self._get_video_id(video_data, course_slug, video_slug),
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': video_data.get('defaultThumbnail'),
|
'thumbnail': video_data.get('defaultThumbnail'),
|
||||||
@@ -154,18 +157,21 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
|||||||
course_data = self._call_api(course_slug, 'chapters,description,title')
|
course_data = self._call_api(course_slug, 'chapters,description,title')
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for chapter in course_data.get('chapters', []):
|
for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
|
||||||
chapter_title = chapter.get('title')
|
chapter_title = chapter.get('title')
|
||||||
|
chapter_id = self._get_urn_id(chapter)
|
||||||
for video in chapter.get('videos', []):
|
for video in chapter.get('videos', []):
|
||||||
video_slug = video.get('slug')
|
video_slug = video.get('slug')
|
||||||
if not video_slug:
|
if not video_slug:
|
||||||
continue
|
continue
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'id': self._get_video_id(video.get('urn'), course_slug, video_slug),
|
'id': self._get_video_id(video, course_slug, video_slug),
|
||||||
'title': video.get('title'),
|
'title': video.get('title'),
|
||||||
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
|
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
|
||||||
'chapter': chapter_title,
|
'chapter': chapter_title,
|
||||||
|
'chapter_number': chapter_number,
|
||||||
|
'chapter_id': chapter_id,
|
||||||
'ie_key': LinkedInLearningIE.ie_key(),
|
'ie_key': LinkedInLearningIE.ie_key(),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
174
youtube_dl/extractor/linuxacademy.py
Normal file
174
youtube_dl/extractor/linuxacademy.py
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_b64decode,
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
orderedSet,
|
||||||
|
unescapeHTML,
|
||||||
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LinuxAcademyIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?linuxacademy\.com/cp/
|
||||||
|
(?:
|
||||||
|
courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
|
||||||
|
modules/view/id/(?P<course_id>\d+)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1498-2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Introduction to the Practitioner's Brief",
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://linuxacademy.com/cp/modules/view/id/154',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '154',
|
||||||
|
'title': 'AWS Certified Cloud Practitioner',
|
||||||
|
'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
|
||||||
|
},
|
||||||
|
'playlist_count': 41,
|
||||||
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
|
}]
|
||||||
|
|
||||||
|
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
|
||||||
|
_ORIGIN_URL = 'https://linuxacademy.com'
|
||||||
|
_CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
|
||||||
|
_NETRC_MACHINE = 'linuxacademy'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
def random_string():
|
||||||
|
return ''.join([
|
||||||
|
random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
|
||||||
|
for _ in range(32)])
|
||||||
|
|
||||||
|
webpage, urlh = self._download_webpage_handle(
|
||||||
|
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
|
||||||
|
'client_id': self._CLIENT_ID,
|
||||||
|
'response_type': 'token id_token',
|
||||||
|
'redirect_uri': self._ORIGIN_URL,
|
||||||
|
'scope': 'openid email user_impersonation profile',
|
||||||
|
'audience': self._ORIGIN_URL,
|
||||||
|
'state': random_string(),
|
||||||
|
'nonce': random_string(),
|
||||||
|
})
|
||||||
|
|
||||||
|
login_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'login info', group='value'), None,
|
||||||
|
transform_source=lambda x: compat_b64decode(x).decode('utf-8')
|
||||||
|
)['extraParams']
|
||||||
|
|
||||||
|
login_data.update({
|
||||||
|
'client_id': self._CLIENT_ID,
|
||||||
|
'redirect_uri': self._ORIGIN_URL,
|
||||||
|
'tenant': 'lacausers',
|
||||||
|
'connection': 'Username-Password-Authentication',
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'sso': 'true',
|
||||||
|
})
|
||||||
|
|
||||||
|
login_state_url = compat_str(urlh.geturl())
|
||||||
|
|
||||||
|
try:
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
'https://login.linuxacademy.com/usernamepassword/login', None,
|
||||||
|
'Downloading login page', data=json.dumps(login_data).encode(),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Origin': 'https://login.linuxacademy.com',
|
||||||
|
'Referer': login_state_url,
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
|
error = self._parse_json(e.cause.read(), None)
|
||||||
|
message = error.get('description') or error['code']
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
callback_page, urlh = self._download_webpage_handle(
|
||||||
|
'https://login.linuxacademy.com/login/callback', None,
|
||||||
|
'Downloading callback page',
|
||||||
|
data=urlencode_postdata(self._hidden_inputs(login_page)),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'Origin': 'https://login.linuxacademy.com',
|
||||||
|
'Referer': login_state_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
access_token = self._search_regex(
|
||||||
|
r'access_token=([^=&]+)', compat_str(urlh.geturl()),
|
||||||
|
'access token')
|
||||||
|
|
||||||
|
self._download_webpage(
|
||||||
|
'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
|
||||||
|
% access_token, None, 'Downloading token validation page')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
|
||||||
|
item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, item_id)
|
||||||
|
|
||||||
|
# course path
|
||||||
|
if course_id:
|
||||||
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
|
||||||
|
for lesson_url in orderedSet(re.findall(
|
||||||
|
r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
|
||||||
|
webpage))]
|
||||||
|
title = unescapeHTML(self._html_search_regex(
|
||||||
|
(r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
|
||||||
|
r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
|
||||||
|
webpage, 'title', default=None, group='value'))
|
||||||
|
description = unescapeHTML(self._html_search_regex(
|
||||||
|
r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'description', default=None, group='value'))
|
||||||
|
return self.playlist_result(entries, course_id, title, description)
|
||||||
|
|
||||||
|
# single video path
|
||||||
|
info = self._extract_jwplayer_data(
|
||||||
|
webpage, item_id, require_title=False, m3u8_id='hls',)
|
||||||
|
title = self._search_regex(
|
||||||
|
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
||||||
|
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||||
|
'title', group='value')
|
||||||
|
info.update({
|
||||||
|
'id': item_id,
|
||||||
|
'title': title,
|
||||||
|
})
|
||||||
|
return info
|
@@ -87,7 +87,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
|
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -120,13 +120,27 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for idx, info_dict in enumerate(entries):
|
for idx, info_dict in enumerate(entries):
|
||||||
|
formats = []
|
||||||
for a_format in info_dict['formats']:
|
for a_format in info_dict['formats']:
|
||||||
if not a_format.get('height'):
|
if not a_format.get('height'):
|
||||||
a_format['height'] = int_or_none(self._search_regex(
|
a_format['height'] = int_or_none(self._search_regex(
|
||||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||||
default=None))
|
default=None))
|
||||||
|
formats.append(a_format)
|
||||||
|
|
||||||
self._sort_formats(info_dict['formats'])
|
# Removing '.*.mp4' gives the raw video, which is essentially
|
||||||
|
# the same video without the LiveLeak logo at the top (see
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/4768)
|
||||||
|
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
||||||
|
if a_format['url'] != orig_url:
|
||||||
|
format_id = a_format.get('format_id')
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'original' + ('-' + format_id if format_id else ''),
|
||||||
|
'url': orig_url,
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info_dict['formats'] = formats
|
||||||
|
|
||||||
# Don't append entry ID for one-video pages to keep backward compatibility
|
# Don't append entry ID for one-video pages to keep backward compatibility
|
||||||
if len(entries) > 1:
|
if len(entries) > 1:
|
||||||
@@ -146,7 +160,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class LiveLeakEmbedIE(InfoExtractor):
|
class LiveLeakEmbedIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
|
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
|
||||||
|
|
||||||
# See generic.py for actual test cases
|
# See generic.py for actual test cases
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -158,15 +172,14 @@ class LiveLeakEmbedIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
kind, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
kind, video_id = mobj.group('kind', 'id')
|
|
||||||
|
|
||||||
if kind == 'f':
|
if kind == 'f':
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
liveleak_url = self._search_regex(
|
liveleak_url = self._search_regex(
|
||||||
r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||||
webpage, 'LiveLeak URL', group='url')
|
webpage, 'LiveLeak URL', group='url')
|
||||||
elif kind == 'i':
|
else:
|
||||||
liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
|
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
|
||||||
|
|
||||||
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
||||||
|
@@ -363,7 +363,4 @@ class LivestreamShortenerIE(InfoExtractor):
|
|||||||
id = mobj.group('id')
|
id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, id)
|
webpage = self._download_webpage(url, id)
|
||||||
|
|
||||||
return {
|
return self.url_result(self._og_search_url(webpage))
|
||||||
'_type': 'url',
|
|
||||||
'url': self._og_search_url(webpage),
|
|
||||||
}
|
|
||||||
|
53
youtube_dl/extractor/malltv.py
Normal file
53
youtube_dl/extractor/malltv.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import merge_dicts
|
||||||
|
|
||||||
|
|
||||||
|
class MallTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
|
'md5': '1c4a37f080e1f3023103a7b43458e518',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 't0zzt0',
|
||||||
|
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||||
|
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
||||||
|
'duration': 216,
|
||||||
|
'timestamp': 1538870400,
|
||||||
|
'upload_date': '20181007',
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, display_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
|
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
||||||
|
video_id = self._search_regex(
|
||||||
|
SOURCE_RE, webpage, 'video id', group='id')
|
||||||
|
|
||||||
|
media = self._parse_html5_media_entries(
|
||||||
|
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
||||||
|
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
||||||
|
|
||||||
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
|
return merge_dicts(media, info, {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': self._og_search_title(webpage, default=None) or display_id,
|
||||||
|
'description': self._og_search_description(webpage, default=None),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
|
})
|
@@ -2,12 +2,18 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
str_to_int,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ManyVidsIE(InfoExtractor):
|
class ManyVidsIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# preview video
|
||||||
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||||
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -17,7 +23,18 @@ class ManyVidsIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
# full video
|
||||||
|
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||||
|
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '935718',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'MY FACE REVEAL',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -28,12 +45,41 @@ class ManyVidsIE(InfoExtractor):
|
|||||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
webpage, 'video URL', group='url')
|
webpage, 'video URL', group='url')
|
||||||
|
|
||||||
title = '%s (Preview)' % self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
|
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||||
|
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||||
|
webpage, 'title', default=None) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
|
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||||
|
title += ' (Preview)'
|
||||||
|
|
||||||
|
mv_token = self._search_regex(
|
||||||
|
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'mv token', default=None, group='value')
|
||||||
|
|
||||||
|
if mv_token:
|
||||||
|
# Sets some cookies
|
||||||
|
self._download_webpage(
|
||||||
|
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||||
|
video_id, fatal=False, data=urlencode_postdata({
|
||||||
|
'mvtoken': mv_token,
|
||||||
|
'vid': video_id,
|
||||||
|
}), headers={
|
||||||
|
'Referer': url,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest'
|
||||||
|
})
|
||||||
|
|
||||||
|
if determine_ext(video_url) == 'm3u8':
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
else:
|
||||||
|
formats = [{'url': video_url}]
|
||||||
|
|
||||||
like_count = int_or_none(self._search_regex(
|
like_count = int_or_none(self._search_regex(
|
||||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = str_to_int(self._html_search_regex(
|
||||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||||
'view count', default=None))
|
'view count', default=None))
|
||||||
|
|
||||||
@@ -42,7 +88,5 @@ class ManyVidsIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'formats': [{
|
'formats': formats,
|
||||||
'url': video_url,
|
|
||||||
}],
|
|
||||||
}
|
}
|
||||||
|
@@ -21,7 +21,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MediasiteIE(InfoExtractor):
|
class MediasiteIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
|
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
|
||||||
@@ -84,7 +84,15 @@ class MediasiteIE(InfoExtractor):
|
|||||||
'timestamp': 1333983600,
|
'timestamp': 1333983600,
|
||||||
'duration': 7794,
|
'duration': 7794,
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
|
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
|
||||||
|
@@ -1,12 +1,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@@ -144,7 +145,7 @@ class MetacafeIE(InfoExtractor):
|
|||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
# Disable family filter
|
# Disable family filter
|
||||||
'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
|
'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
|
||||||
}
|
}
|
||||||
|
|
||||||
# AnyClip videos require the flashversion cookie so that we get the link
|
# AnyClip videos require the flashversion cookie so that we get the link
|
||||||
|
@@ -1,15 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .adobepass import AdobePassIE
|
from .fox import FOXIE
|
||||||
from .theplatform import ThePlatformIE
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
url_basename,
|
url_basename,
|
||||||
update_url_query,
|
|
||||||
get_element_by_class,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -66,130 +61,22 @@ class NationalGeographicVideoIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
class NationalGeographicTVIE(FOXIE):
|
||||||
IE_NAME = 'natgeo'
|
_VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)'
|
||||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
|
_TESTS = [{
|
||||||
|
'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/',
|
||||||
_TESTS = [
|
'info_dict': {
|
||||||
{
|
'id': '6a875e6e734b479beda26438c9f21138',
|
||||||
'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
|
'ext': 'mp4',
|
||||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
'title': 'Why Nat Geo? Valley of the Boom',
|
||||||
'info_dict': {
|
'description': 'The lives of prominent figures in the tech world, including their friendships, rivalries, victories and failures.',
|
||||||
'id': 'vKInpacll2pC',
|
'timestamp': 1542662458,
|
||||||
'ext': 'mp4',
|
'upload_date': '20181119',
|
||||||
'title': 'Uncovering a Universal Knowledge',
|
'age_limit': 14,
|
||||||
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
|
|
||||||
'timestamp': 1458680907,
|
|
||||||
'upload_date': '20160322',
|
|
||||||
'uploader': 'NEWA-FNG-NGTV',
|
|
||||||
},
|
|
||||||
'add_ie': ['ThePlatform'],
|
|
||||||
},
|
},
|
||||||
{
|
'params': {
|
||||||
'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
|
'skip_download': True,
|
||||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'Pok5lWCkiEFA',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'The Stunning Red Bird of Paradise',
|
|
||||||
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
|
|
||||||
'timestamp': 1459362152,
|
|
||||||
'upload_date': '20160330',
|
|
||||||
'uploader': 'NEWA-FNG-NGTV',
|
|
||||||
},
|
|
||||||
'add_ie': ['ThePlatform'],
|
|
||||||
},
|
},
|
||||||
{
|
}]
|
||||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
|
_HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/'
|
||||||
'only_matching': True,
|
_API_KEY = '238bb0a0c2aba67922c48709ce0c06fd'
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
|
||||||
'only_matching': True,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
release_url = self._search_regex(
|
|
||||||
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
|
|
||||||
webpage, 'release url')
|
|
||||||
theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
|
|
||||||
video_id = theplatform_path.split('/')[-1]
|
|
||||||
query = {
|
|
||||||
'mbr': 'true',
|
|
||||||
}
|
|
||||||
is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
|
|
||||||
if is_auth == 'auth':
|
|
||||||
auth_resource_id = self._search_regex(
|
|
||||||
r"video_auth_resourceId\s*=\s*'([^']+)'",
|
|
||||||
webpage, 'auth resource id')
|
|
||||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
subtitles = {}
|
|
||||||
for key, value in (('switch', 'http'), ('manifest', 'm3u')):
|
|
||||||
tp_query = query.copy()
|
|
||||||
tp_query.update({
|
|
||||||
key: value,
|
|
||||||
})
|
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
|
||||||
update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value)
|
|
||||||
formats.extend(tp_formats)
|
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = self._extract_theplatform_metadata(theplatform_path, display_id)
|
|
||||||
info.update({
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'display_id': display_id,
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class NationalGeographicEpisodeGuideIE(InfoExtractor):
|
|
||||||
IE_NAME = 'natgeo:episodeguide'
|
|
||||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'the-story-of-god-with-morgan-freeman-season-1',
|
|
||||||
'title': 'The Story of God with Morgan Freeman - Season 1',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 6,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'underworld-inc-season-2',
|
|
||||||
'title': 'Underworld, Inc. - Season 2',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 7,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
show = get_element_by_class('show', webpage)
|
|
||||||
selected_season = self._search_regex(
|
|
||||||
r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>',
|
|
||||||
webpage, 'selected season')
|
|
||||||
entries = [
|
|
||||||
self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic')
|
|
||||||
for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)]
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')),
|
|
||||||
'%s - %s' % (show, selected_season))
|
|
||||||
|
@@ -5,8 +5,8 @@ from ..utils import ExtractorError
|
|||||||
|
|
||||||
|
|
||||||
class NhkVodIE(InfoExtractor):
|
class NhkVodIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>[^/]+/[^/?#&]+)'
|
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/(?:vod|ondemand)/(?P<id>[^/]+/[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
# Videos available only for a limited period of time. Visit
|
# Videos available only for a limited period of time. Visit
|
||||||
# http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
|
# http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
|
||||||
'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815',
|
'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815',
|
||||||
@@ -19,7 +19,10 @@ class NhkVodIE(InfoExtractor):
|
|||||||
'episode': 'The Kimono as Global Fashion',
|
'episode': 'The Kimono as Global Fashion',
|
||||||
},
|
},
|
||||||
'skip': 'Videos available only for a limited period of time',
|
'skip': 'Videos available only for a limited period of time',
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
_API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k'
|
_API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -57,7 +57,8 @@ class NoovoIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
bc_url = BrightcoveNewIE._extract_url(self, webpage)
|
brightcove_id = self._search_regex(
|
||||||
|
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||||
|
|
||||||
data = self._parse_json(
|
data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
@@ -89,7 +90,10 @@ class NoovoIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': BrightcoveNewIE.ie_key(),
|
'ie_key': BrightcoveNewIE.ie_key(),
|
||||||
'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
|
'url': smuggle_url(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||||
|
{'geo_countries': ['CA']}),
|
||||||
|
'id': brightcove_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'series': series,
|
'series': series,
|
||||||
|
@@ -12,11 +12,16 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -176,9 +181,118 @@ class NPOIE(NPOBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
return self._get_info(video_id)
|
try:
|
||||||
|
return self._get_info(url, video_id)
|
||||||
|
except ExtractorError:
|
||||||
|
return self._get_old_info(video_id)
|
||||||
|
|
||||||
def _get_info(self, video_id):
|
def _get_info(self, url, video_id):
|
||||||
|
token = self._download_json(
|
||||||
|
'https://www.npostart.nl/api/token', video_id,
|
||||||
|
'Downloading token', headers={
|
||||||
|
'Referer': url,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
})['token']
|
||||||
|
|
||||||
|
player = self._download_json(
|
||||||
|
'https://www.npostart.nl/player/%s' % video_id, video_id,
|
||||||
|
'Downloading player JSON', data=urlencode_postdata({
|
||||||
|
'autoplay': 0,
|
||||||
|
'share': 1,
|
||||||
|
'pageUrl': url,
|
||||||
|
'hasAdConsent': 0,
|
||||||
|
'_token': token,
|
||||||
|
}))
|
||||||
|
|
||||||
|
player_token = player['token']
|
||||||
|
|
||||||
|
format_urls = set()
|
||||||
|
formats = []
|
||||||
|
for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
|
||||||
|
streams = self._download_json(
|
||||||
|
'https://start-player.npo.nl/video/%s/streams' % video_id,
|
||||||
|
video_id, 'Downloading %s profile JSON' % profile, fatal=False,
|
||||||
|
query={
|
||||||
|
'profile': profile,
|
||||||
|
'quality': 'npo',
|
||||||
|
'tokenId': player_token,
|
||||||
|
'streamType': 'broadcast',
|
||||||
|
})
|
||||||
|
if not streams:
|
||||||
|
continue
|
||||||
|
stream = streams.get('stream')
|
||||||
|
if not isinstance(stream, dict):
|
||||||
|
continue
|
||||||
|
stream_url = url_or_none(stream.get('src'))
|
||||||
|
if not stream_url or stream_url in format_urls:
|
||||||
|
continue
|
||||||
|
format_urls.add(stream_url)
|
||||||
|
if stream.get('protection') is not None:
|
||||||
|
continue
|
||||||
|
stream_type = stream.get('type')
|
||||||
|
stream_ext = determine_ext(stream_url)
|
||||||
|
if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
stream_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, video_id, ext='mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
elif re.search(r'\.isml?/Manifest', stream_url):
|
||||||
|
formats.extend(self._extract_ism_formats(
|
||||||
|
stream_url, video_id, ism_id='mss', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': stream_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
embed_url = url_or_none(player.get('embedUrl'))
|
||||||
|
if embed_url:
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
embed_url, video_id, 'Downloading embed page', fatal=False)
|
||||||
|
if webpage:
|
||||||
|
video = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
|
||||||
|
default='{}'), video_id)
|
||||||
|
if video:
|
||||||
|
title = video.get('episodeTitle')
|
||||||
|
subtitles = {}
|
||||||
|
subtitles_list = video.get('subtitles')
|
||||||
|
if isinstance(subtitles_list, list):
|
||||||
|
for cc in subtitles_list:
|
||||||
|
cc_url = url_or_none(cc.get('src'))
|
||||||
|
if not cc_url:
|
||||||
|
continue
|
||||||
|
lang = str_or_none(cc.get('language')) or 'nl'
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': cc_url,
|
||||||
|
})
|
||||||
|
return merge_dicts({
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'thumbnail': url_or_none(
|
||||||
|
video.get('still_image_url') or video.get('orig_image_url')),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'timestamp': unified_timestamp(video.get('broadcastDate')),
|
||||||
|
'creator': video.get('channel'),
|
||||||
|
'series': video.get('title'),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(video.get('episodeNumber')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}, info)
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
def _get_old_info(self, video_id):
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
'http://e.omroep.nl/metadata/%s' % video_id,
|
'http://e.omroep.nl/metadata/%s' % video_id,
|
||||||
video_id,
|
video_id,
|
||||||
@@ -280,7 +394,7 @@ class NPOIE(NPOBaseIE):
|
|||||||
# JSON
|
# JSON
|
||||||
else:
|
else:
|
||||||
video_url = stream_info.get('url')
|
video_url = stream_info.get('url')
|
||||||
if not video_url or video_url in urls:
|
if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
|
||||||
continue
|
continue
|
||||||
urls.add(video_url)
|
urls.add(video_url)
|
||||||
if determine_ext(video_url) == 'm3u8':
|
if determine_ext(video_url) == 'm3u8':
|
||||||
@@ -363,7 +477,7 @@ class NPOIE(NPOBaseIE):
|
|||||||
|
|
||||||
class NPOLiveIE(NPOBaseIE):
|
class NPOLiveIE(NPOBaseIE):
|
||||||
IE_NAME = 'npo.nl:live'
|
IE_NAME = 'npo.nl:live'
|
||||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
|
_VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.npo.nl/live/npo-1',
|
'url': 'http://www.npo.nl/live/npo-1',
|
||||||
@@ -380,6 +494,9 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.npo.nl/live',
|
'url': 'http://www.npo.nl/live',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.npostart.nl/live/npo-1',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -115,6 +115,10 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
|
'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Paid video
|
||||||
|
'url': 'https://ok.ru/video/954886983203',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -244,6 +248,11 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
payment_info = metadata.get('paymentInfo')
|
||||||
|
if payment_info:
|
||||||
|
raise ExtractorError('This video is paid, subscribe to download it', expected=True)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info['formats'] = formats
|
info['formats'] = formats
|
||||||
|
@@ -248,8 +248,8 @@ class OpenloadIE(InfoExtractor):
|
|||||||
(?P<host>
|
(?P<host>
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
openload\.(?:co|io|link)|
|
openload\.(?:co|io|link|pw)|
|
||||||
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
|
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space)
|
||||||
)
|
)
|
||||||
)/
|
)/
|
||||||
(?:f|embed)/
|
(?:f|embed)/
|
||||||
@@ -334,6 +334,24 @@ class OpenloadIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://oload.fun/f/gb6G1H4sHXY',
|
'url': 'https://oload.fun/f/gb6G1H4sHXY',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.club/f/Nr1L-aZ2dbQ',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.info/f/5NEAbI2BDSk',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://openload.pw/f/WyKgK8s94N0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.pw/f/WyKgK8s94N0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.live/f/-Z58UZ-GR4M',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.space/f/IY4eZSst3u8/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||||
|
28
youtube_dl/extractor/outsidetv.py
Normal file
28
youtube_dl/extractor/outsidetv.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class OutsideTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P<id>[a-zA-Z0-9]{8})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4',
|
||||||
|
'md5': '192d968fedc10b2f70ec31865ffba0da',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Hdg0jukV',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Home - Jackson Ep 1 | Arbor Snowboards',
|
||||||
|
'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd',
|
||||||
|
'upload_date': '20181225',
|
||||||
|
'timestamp': 1545742800,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
jw_media_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id)
|
@@ -24,9 +24,9 @@ class PacktPubBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class PacktPubIE(PacktPubBaseIE):
|
class PacktPubIE(PacktPubBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
|
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
|
||||||
'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
|
'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -37,7 +37,10 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
'timestamp': 1490918400,
|
'timestamp': 1490918400,
|
||||||
'upload_date': '20170331',
|
'upload_date': '20170331',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
_NETRC_MACHINE = 'packtpub'
|
_NETRC_MACHINE = 'packtpub'
|
||||||
_TOKEN = None
|
_TOKEN = None
|
||||||
|
|
||||||
@@ -110,15 +113,18 @@ class PacktPubIE(PacktPubBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class PacktPubCourseIE(PacktPubBaseIE):
|
class PacktPubCourseIE(PacktPubBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
|
_VALID_URL = r'(?P<url>https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<id>\d+))'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
|
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9781787122215',
|
'id': '9781787122215',
|
||||||
'title': 'Learn Nodejs by building 12 projects [Video]',
|
'title': 'Learn Nodejs by building 12 projects [Video]',
|
||||||
},
|
},
|
||||||
'playlist_count': 90,
|
'playlist_count': 90,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://subscription.packtpub.com/video/web_development/9781787122215',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
|
@@ -5,6 +5,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
@@ -75,6 +76,14 @@ class PeriscopeIE(PeriscopeBaseIE):
|
|||||||
'url': broadcast[image],
|
'url': broadcast[image],
|
||||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||||
|
|
||||||
|
width = int_or_none(broadcast.get('width'))
|
||||||
|
height = int_or_none(broadcast.get('height'))
|
||||||
|
|
||||||
|
def add_width_and_height(f):
|
||||||
|
for key, val in (('width', width), ('height', height)):
|
||||||
|
if not f.get(key):
|
||||||
|
f[key] = val
|
||||||
|
|
||||||
video_urls = set()
|
video_urls = set()
|
||||||
formats = []
|
formats = []
|
||||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
|
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
|
||||||
@@ -83,16 +92,21 @@ class PeriscopeIE(PeriscopeBaseIE):
|
|||||||
continue
|
continue
|
||||||
video_urls.add(video_url)
|
video_urls.add(video_url)
|
||||||
if format_id != 'rtmp':
|
if format_id != 'rtmp':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
video_url, token, 'mp4',
|
video_url, token, 'mp4',
|
||||||
entry_protocol='m3u8_native'
|
entry_protocol='m3u8_native'
|
||||||
if state in ('ended', 'timed_out') else 'm3u8',
|
if state in ('ended', 'timed_out') else 'm3u8',
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False)
|
||||||
|
if len(m3u8_formats) == 1:
|
||||||
|
add_width_and_height(m3u8_formats[0])
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
continue
|
continue
|
||||||
formats.append({
|
rtmp_format = {
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
||||||
})
|
}
|
||||||
|
add_width_and_height(rtmp_format)
|
||||||
|
formats.append(rtmp_format)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
109
youtube_dl/extractor/playplustv.py
Normal file
109
youtube_dl/extractor/playplustv.py
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
PUTRequest,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlayPlusTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e',
|
||||||
|
'md5': 'd078cb89d7ab6b9df37ce23c647aef72',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'db8d274a5163424e967f35a30ddafb8e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Capítulo 179 - Final',
|
||||||
|
'description': 'md5:01085d62d8033a1e34121d3c3cabc838',
|
||||||
|
'timestamp': 1529992740,
|
||||||
|
'upload_date': '20180626',
|
||||||
|
},
|
||||||
|
'skip': 'Requires account credential',
|
||||||
|
}
|
||||||
|
_NETRC_MACHINE = 'playplustv'
|
||||||
|
_GEO_COUNTRIES = ['BR']
|
||||||
|
_token = None
|
||||||
|
_profile_id = None
|
||||||
|
|
||||||
|
def _call_api(self, resource, video_id=None, query=None):
|
||||||
|
return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={
|
||||||
|
'Authorization': 'Bearer ' + self._token,
|
||||||
|
}, query=query)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
email, password = self._get_login_info()
|
||||||
|
if email is None:
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
req = PUTRequest(
|
||||||
|
'https://api.playplus.tv/api/web/login', json.dumps({
|
||||||
|
'email': email,
|
||||||
|
'password': password,
|
||||||
|
}).encode(), {
|
||||||
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
|
})
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._token = self._download_json(req, None)['token']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
|
raise ExtractorError(self._parse_json(
|
||||||
|
e.cause.read(), None)['errorMessage'], expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
self._profile = self._call_api('Profiles')['list'][0]['_id']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
project_id, media_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
media = self._call_api(
|
||||||
|
'Media', media_id, {
|
||||||
|
'profileId': self._profile,
|
||||||
|
'projectId': project_id,
|
||||||
|
'mediaId': media_id,
|
||||||
|
})['obj']
|
||||||
|
title = media['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for f in media.get('files', []):
|
||||||
|
f_url = f.get('url')
|
||||||
|
if not f_url:
|
||||||
|
continue
|
||||||
|
file_info = f.get('fileInfo') or {}
|
||||||
|
formats.append({
|
||||||
|
'url': f_url,
|
||||||
|
'width': int_or_none(file_info.get('width')),
|
||||||
|
'height': int_or_none(file_info.get('height')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumb in media.get('thumbs', []):
|
||||||
|
thumb_url = thumb.get('url')
|
||||||
|
if not thumb_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumb_url,
|
||||||
|
'width': int_or_none(thumb.get('width')),
|
||||||
|
'height': int_or_none(thumb.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': media_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': clean_html(media.get('description')) or media.get('shortDescription'),
|
||||||
|
'timestamp': int_or_none(media.get('publishDate'), 1000),
|
||||||
|
'view_count': int_or_none(media.get('numberOfViews')),
|
||||||
|
'comment_count': int_or_none(media.get('numberOfComments')),
|
||||||
|
'tags': media.get('tags'),
|
||||||
|
}
|
@@ -4,9 +4,11 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -14,7 +16,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
|
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
||||||
'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5',
|
'md5': '87f1540746c1d32ec7a2305c12b96b25',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9864',
|
'id': '9864',
|
||||||
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
|
||||||
@@ -23,6 +25,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
|
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
@@ -37,6 +40,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'skip': 'Not available anymore',
|
'skip': 'Not available anymore',
|
||||||
@@ -65,12 +69,14 @@ class PornHdIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, video_url in sources.items():
|
for format_id, video_url in sources.items():
|
||||||
|
video_url = urljoin(url, video_url)
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
height = int_or_none(self._search_regex(
|
height = int_or_none(self._search_regex(
|
||||||
r'^(\d+)[pP]', format_id, 'height', default=None))
|
r'^(\d+)[pP]', format_id, 'height', default=None))
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
'ext': determine_ext(video_url, 'mp4'),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
@@ -85,6 +91,11 @@ class PornHdIE(InfoExtractor):
|
|||||||
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||||
'thumbnail', fatal=False, group='url')
|
'thumbnail', fatal=False, group='url')
|
||||||
|
|
||||||
|
like_count = int_or_none(self._search_regex(
|
||||||
|
(r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes',
|
||||||
|
r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
|
||||||
|
webpage, 'like count', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
@@ -92,6 +103,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
@@ -10,11 +10,12 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
|
from .openload import PhantomJSwrapper
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
orderedSet,
|
orderedSet,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@@ -22,7 +23,29 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PornHubIE(InfoExtractor):
|
class PornHubBaseIE(InfoExtractor):
|
||||||
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
|
def dl(*args, **kwargs):
|
||||||
|
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||||
|
|
||||||
|
webpage, urlh = dl(*args, **kwargs)
|
||||||
|
|
||||||
|
if any(re.search(p, webpage) for p in (
|
||||||
|
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||||
|
r'document\.cookie\s*=\s*["\']RNKEY=',
|
||||||
|
r'document\.location\.reload\(true\)')):
|
||||||
|
url_or_request = args[0]
|
||||||
|
url = (url_or_request.get_full_url()
|
||||||
|
if isinstance(url_or_request, compat_urllib_request.Request)
|
||||||
|
else url_or_request)
|
||||||
|
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||||
|
phantom.get(url, html=webpage)
|
||||||
|
webpage, urlh = dl(*args, **kwargs)
|
||||||
|
|
||||||
|
return webpage, urlh
|
||||||
|
|
||||||
|
|
||||||
|
class PornHubIE(PornHubBaseIE):
|
||||||
IE_DESC = 'PornHub and Thumbzilla'
|
IE_DESC = 'PornHub and Thumbzilla'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
@@ -279,14 +302,12 @@ class PornHubIE(InfoExtractor):
|
|||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
page_params = self._parse_json(self._search_regex(
|
def extract_list(meta_key):
|
||||||
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
div = self._search_regex(
|
||||||
webpage, 'page parameters', group='data', default='{}'),
|
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
% meta_key, webpage, meta_key, default=None)
|
||||||
tags = categories = None
|
if div:
|
||||||
if page_params:
|
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
|
||||||
tags = page_params.get('tags', '').split(',')
|
|
||||||
categories = page_params.get('categories', '').split(',')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -301,13 +322,13 @@ class PornHubIE(InfoExtractor):
|
|||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': tags,
|
'tags': extract_list('tags'),
|
||||||
'categories': categories,
|
'categories': extract_list('categories'),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistBaseIE(InfoExtractor):
|
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||||
def _extract_entries(self, webpage, host):
|
def _extract_entries(self, webpage, host):
|
||||||
# Only process container div with main playlist content skipping
|
# Only process container div with main playlist content skipping
|
||||||
# drop-down menu that uses similar pattern for videos (see
|
# drop-down menu that uses similar pattern for videos (see
|
||||||
|
@@ -4,16 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_text,
|
|
||||||
find_xpath_attr,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
xpath_element,
|
|
||||||
ExtractorError,
|
|
||||||
determine_protocol,
|
|
||||||
unsmuggle_url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -49,107 +45,79 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# with protectionType but not actually DRM protected
|
||||||
|
'url': 'radiocanada:toutv:140872',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '140872',
|
||||||
|
'title': 'Épisode 1',
|
||||||
|
'series': 'District 31',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
_GEO_COUNTRIES = ['CA']
|
||||||
|
_access_token = None
|
||||||
|
_claims = None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _call_api(self, path, video_id=None, app_code=None, query=None):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
if not query:
|
||||||
app_code, video_id = re.match(self._VALID_URL, url).groups()
|
query = {}
|
||||||
|
query.update({
|
||||||
metadata = self._download_xml(
|
'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
|
||||||
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
|
'output': 'json',
|
||||||
video_id, note='Downloading metadata XML', query={
|
})
|
||||||
|
if video_id:
|
||||||
|
query.update({
|
||||||
'appCode': app_code,
|
'appCode': app_code,
|
||||||
'idMedia': video_id,
|
'idMedia': video_id,
|
||||||
})
|
})
|
||||||
|
if self._access_token:
|
||||||
|
query['access_token'] = self._access_token
|
||||||
|
try:
|
||||||
|
return self._download_json(
|
||||||
|
'https://services.radio-canada.ca/media/' + path, video_id, query=query)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
|
||||||
|
data = self._parse_json(e.cause.read().decode(), None)
|
||||||
|
error = data.get('error_description') or data['errorMessage']['text']
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _extract_info(self, app_code, video_id):
|
||||||
|
metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
|
||||||
|
|
||||||
def get_meta(name):
|
def get_meta(name):
|
||||||
el = find_xpath_attr(metadata, './/Meta', 'name', name)
|
for meta in metas:
|
||||||
return el.text if el is not None else None
|
if meta.get('name') == name:
|
||||||
|
text = meta.get('text')
|
||||||
|
if text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
# protectionType does not necessarily mean the video is DRM protected (see
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/18609).
|
||||||
if get_meta('protectionType'):
|
if get_meta('protectionType'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
self.report_warning('This video is probably DRM protected.')
|
||||||
|
|
||||||
device_types = ['ipad']
|
query = {
|
||||||
if not smuggled_data:
|
'connectionType': 'hd',
|
||||||
device_types.append('flash')
|
'deviceType': 'ipad',
|
||||||
device_types.append('android')
|
'multibitrate': 'true',
|
||||||
|
}
|
||||||
formats = []
|
if self._claims:
|
||||||
error = None
|
query['claims'] = self._claims
|
||||||
# TODO: extract f4m formats
|
v_data = self._call_api('validation/v2/', video_id, app_code, query)
|
||||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
v_url = v_data.get('url')
|
||||||
for device_type in device_types:
|
if not v_url:
|
||||||
validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx'
|
error = v_data['message']
|
||||||
query = {
|
if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
|
||||||
'appCode': app_code,
|
raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
|
||||||
'idMedia': video_id,
|
if error == 'Le contenu sélectionné est disponible seulement en premium':
|
||||||
'connectionType': 'broadband',
|
self.raise_login_required(error)
|
||||||
'multibitrate': 'true',
|
|
||||||
'deviceType': device_type,
|
|
||||||
}
|
|
||||||
if smuggled_data:
|
|
||||||
validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
|
|
||||||
query.update(smuggled_data)
|
|
||||||
else:
|
|
||||||
query.update({
|
|
||||||
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
|
|
||||||
'paysJ391wsHjbOJwvCs26toz': 'CA',
|
|
||||||
'bypasslock': 'NZt5K62gRqfc',
|
|
||||||
})
|
|
||||||
v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
|
|
||||||
v_url = xpath_text(v_data, 'url')
|
|
||||||
if not v_url:
|
|
||||||
continue
|
|
||||||
if v_url == 'null':
|
|
||||||
error = xpath_text(v_data, 'message')
|
|
||||||
continue
|
|
||||||
ext = determine_ext(v_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
v_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
else:
|
|
||||||
ext = determine_ext(v_url)
|
|
||||||
bitrates = xpath_element(v_data, 'bitrates')
|
|
||||||
for url_e in bitrates.findall('url'):
|
|
||||||
tbr = int_or_none(url_e.get('bitrate'))
|
|
||||||
if not tbr:
|
|
||||||
continue
|
|
||||||
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
|
|
||||||
protocol = determine_protocol({'url': f_url})
|
|
||||||
f = {
|
|
||||||
'format_id': '%s-%d' % (protocol, tbr),
|
|
||||||
'url': f_url,
|
|
||||||
'ext': 'flv' if protocol == 'rtmp' else ext,
|
|
||||||
'protocol': protocol,
|
|
||||||
'width': int_or_none(url_e.get('width')),
|
|
||||||
'height': int_or_none(url_e.get('height')),
|
|
||||||
'tbr': tbr,
|
|
||||||
}
|
|
||||||
mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
|
|
||||||
if mobj:
|
|
||||||
f.update({
|
|
||||||
'url': mobj.group('url') + mobj.group('auth'),
|
|
||||||
'play_path': mobj.group('playpath'),
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
if protocol == 'rtsp':
|
|
||||||
base_url = self._search_regex(
|
|
||||||
r'rtsp://([^?]+)', f_url, 'base url', default=None)
|
|
||||||
if base_url:
|
|
||||||
base_url = 'http://' + base_url
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
base_url + '/playlist.m3u8', video_id, 'mp4',
|
|
||||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
base_url + '/manifest.f4m', video_id,
|
|
||||||
f4m_id='hds', fatal=False))
|
|
||||||
if not formats and error:
|
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@@ -174,11 +142,14 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_info(*re.match(self._VALID_URL, url).groups())
|
||||||
|
|
||||||
|
|
||||||
class RadioCanadaAudioVideoIE(InfoExtractor):
|
class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||||
'radiocanada:audiovideo'
|
'radiocanada:audiovideo'
|
||||||
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7527184',
|
'id': '7527184',
|
||||||
@@ -191,7 +162,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
|
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
|
||||||
|
@@ -74,11 +74,11 @@ class RaiBaseIE(InfoExtractor):
|
|||||||
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
|
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
media_url, video_id, 'mp4', 'm3u8_native',
|
media_url, video_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m' or platform == 'flash':
|
||||||
manifest_url = update_url_query(
|
manifest_url = update_url_query(
|
||||||
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
|
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
|
||||||
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
|
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
|
||||||
@@ -288,7 +288,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class RaiIE(RaiBaseIE):
|
class RaiIE(RaiBaseIE):
|
||||||
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# var uniquename = "ContentItem-..."
|
# var uniquename = "ContentItem-..."
|
||||||
# data-id="ContentItem-..."
|
# data-id="ContentItem-..."
|
||||||
@@ -375,6 +375,9 @@ class RaiIE(RaiBaseIE):
|
|||||||
# Direct MMS URL
|
# Direct MMS URL
|
||||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_from_content_id(self, content_id, url):
|
def _extract_from_content_id(self, content_id, url):
|
||||||
|
@@ -1,38 +1,46 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .brightcove import BrightcoveLegacyIE
|
from .brightcove import BrightcoveLegacyIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
class RMCDecouverteIE(InfoExtractor):
|
class RMCDecouverteIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)'
|
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET',
|
'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5419055995001',
|
'id': '5983675500001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'UN DELICIEUX PROJET',
|
'title': 'CORVETTE',
|
||||||
'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5',
|
'description': 'md5:c1e8295521e45ffebf635d6a7658f506',
|
||||||
'uploader_id': '1969646226001',
|
'uploader_id': '1969646226001',
|
||||||
'upload_date': '20170502',
|
'upload_date': '20181226',
|
||||||
'timestamp': 1493745308,
|
'timestamp': 1545861635,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'only available for a week',
|
'skip': 'only available for a week',
|
||||||
}
|
}, {
|
||||||
|
# live, geo restricted, bypassable
|
||||||
|
'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
display_id = mobj.group('id') or mobj.group('live_id')
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||||
if brightcove_legacy_url:
|
if brightcove_legacy_url:
|
||||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
|
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
|
||||||
@@ -41,5 +49,7 @@ class RMCDecouverteIE(InfoExtractor):
|
|||||||
brightcove_id = self._search_regex(
|
brightcove_id = self._search_regex(
|
||||||
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
|
smuggle_url(
|
||||||
brightcove_id)
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||||
|
{'geo_countries': ['FR']}),
|
||||||
|
'BrightcoveNew', brightcove_id)
|
||||||
|
@@ -21,7 +21,17 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class RutubeBaseIE(InfoExtractor):
|
class RutubeBaseIE(InfoExtractor):
|
||||||
def _extract_video(self, video, video_id=None, require_title=True):
|
def _download_api_info(self, video_id, query=None):
|
||||||
|
if not query:
|
||||||
|
query = {}
|
||||||
|
query['format'] = 'json'
|
||||||
|
return self._download_json(
|
||||||
|
'http://rutube.ru/api/video/%s/' % video_id,
|
||||||
|
video_id, 'Downloading video JSON',
|
||||||
|
'Unable to download video JSON', query=query)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_info(video, video_id=None, require_title=True):
|
||||||
title = video['title'] if require_title else video.get('title')
|
title = video['title'] if require_title else video.get('title')
|
||||||
|
|
||||||
age_limit = video.get('is_adult')
|
age_limit = video.get('is_adult')
|
||||||
@@ -32,7 +42,7 @@ class RutubeBaseIE(InfoExtractor):
|
|||||||
category = try_get(video, lambda x: x['category']['name'])
|
category = try_get(video, lambda x: x['category']['name'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video.get('id') or video_id,
|
'id': video.get('id') or video_id if video_id else video['id'],
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': video.get('description'),
|
'description': video.get('description'),
|
||||||
'thumbnail': video.get('thumbnail_url'),
|
'thumbnail': video.get('thumbnail_url'),
|
||||||
@@ -47,6 +57,42 @@ class RutubeBaseIE(InfoExtractor):
|
|||||||
'is_live': bool_or_none(video.get('is_livestream')),
|
'is_live': bool_or_none(video.get('is_livestream')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _download_and_extract_info(self, video_id, query=None):
|
||||||
|
return self._extract_info(
|
||||||
|
self._download_api_info(video_id, query=query), video_id)
|
||||||
|
|
||||||
|
def _download_api_options(self, video_id, query=None):
|
||||||
|
if not query:
|
||||||
|
query = {}
|
||||||
|
query['format'] = 'json'
|
||||||
|
return self._download_json(
|
||||||
|
'http://rutube.ru/api/play/options/%s/' % video_id,
|
||||||
|
video_id, 'Downloading options JSON',
|
||||||
|
'Unable to download options JSON',
|
||||||
|
headers=self.geo_verification_headers(), query=query)
|
||||||
|
|
||||||
|
def _extract_formats(self, options, video_id):
|
||||||
|
formats = []
|
||||||
|
for format_id, format_url in options['video_balancer'].items():
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _download_and_extract_formats(self, video_id, query=None):
|
||||||
|
return self._extract_formats(
|
||||||
|
self._download_api_options(video_id, query=query), video_id)
|
||||||
|
|
||||||
|
|
||||||
class RutubeIE(RutubeBaseIE):
|
class RutubeIE(RutubeBaseIE):
|
||||||
IE_NAME = 'rutube'
|
IE_NAME = 'rutube'
|
||||||
@@ -55,13 +101,13 @@ class RutubeIE(RutubeBaseIE):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||||
'md5': '79938ade01294ef7e27574890d0d3769',
|
'md5': '1d24f180fac7a02f3900712e5a5764d6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Раненный кенгуру забежал в аптеку',
|
'title': 'Раненный кенгуру забежал в аптеку',
|
||||||
'description': 'http://www.ntdtv.ru ',
|
'description': 'http://www.ntdtv.ru ',
|
||||||
'duration': 80,
|
'duration': 81,
|
||||||
'uploader': 'NTDRussian',
|
'uploader': 'NTDRussian',
|
||||||
'uploader_id': '29790',
|
'uploader_id': '29790',
|
||||||
'timestamp': 1381943602,
|
'timestamp': 1381943602,
|
||||||
@@ -94,39 +140,12 @@ class RutubeIE(RutubeBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
info = self._download_and_extract_info(video_id)
|
||||||
video = self._download_json(
|
info['formats'] = self._download_and_extract_formats(video_id)
|
||||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
|
||||||
video_id, 'Downloading video JSON')
|
|
||||||
|
|
||||||
info = self._extract_video(video, video_id)
|
|
||||||
|
|
||||||
options = self._download_json(
|
|
||||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
|
||||||
video_id, 'Downloading options JSON',
|
|
||||||
headers=self.geo_verification_headers())
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format_id, format_url in options['video_balancer'].items():
|
|
||||||
ext = determine_ext(format_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
'format_id': format_id,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info['formats'] = formats
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
class RutubeEmbedIE(InfoExtractor):
|
class RutubeEmbedIE(RutubeBaseIE):
|
||||||
IE_NAME = 'rutube:embed'
|
IE_NAME = 'rutube:embed'
|
||||||
IE_DESC = 'Rutube embedded videos'
|
IE_DESC = 'Rutube embedded videos'
|
||||||
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
||||||
@@ -135,7 +154,7 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'timestamp': 1387830582,
|
'timestamp': 1387830582,
|
||||||
'upload_date': '20131223',
|
'upload_date': '20131223',
|
||||||
'uploader_id': '297833',
|
'uploader_id': '297833',
|
||||||
@@ -149,16 +168,26 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/play/embed/8083783',
|
'url': 'http://rutube.ru/play/embed/8083783',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# private video
|
||||||
|
'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
embed_id = self._match_id(url)
|
embed_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, embed_id)
|
# Query may contain private videos token and should be passed to API
|
||||||
|
# requests (see #19163)
|
||||||
canonical_url = self._html_search_regex(
|
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
options = self._download_api_options(embed_id, query)
|
||||||
'Canonical URL')
|
video_id = options['effective_video']
|
||||||
return self.url_result(canonical_url, RutubeIE.ie_key())
|
formats = self._extract_formats(options, video_id)
|
||||||
|
info = self._download_and_extract_info(video_id, query)
|
||||||
|
info.update({
|
||||||
|
'extractor_key': 'Rutube',
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class RutubePlaylistBaseIE(RutubeBaseIE):
|
class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||||
@@ -181,7 +210,7 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
|
|||||||
video_url = url_or_none(result.get('video_url'))
|
video_url = url_or_none(result.get('video_url'))
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
entry = self._extract_video(result, require_title=False)
|
entry = self._extract_info(result, require_title=False)
|
||||||
entry.update({
|
entry.update({
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@@ -15,10 +15,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class SafariBaseIE(InfoExtractor):
|
class SafariBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
_LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
|
||||||
_NETRC_MACHINE = 'safari'
|
_NETRC_MACHINE = 'safari'
|
||||||
|
|
||||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
|
_API_BASE = 'https://learning.oreilly.com/api/v1'
|
||||||
_API_FORMAT = 'json'
|
_API_FORMAT = 'json'
|
||||||
|
|
||||||
LOGGED_IN = False
|
LOGGED_IN = False
|
||||||
@@ -76,7 +76,7 @@ class SafariIE(SafariBaseIE):
|
|||||||
IE_DESC = 'safaribooksonline.com online video'
|
IE_DESC = 'safaribooksonline.com online video'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?safaribooksonline\.com/
|
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
|
||||||
(?:
|
(?:
|
||||||
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
|
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
|
||||||
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
|
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
|
||||||
@@ -104,6 +104,9 @@ class SafariIE(SafariBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
|
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PARTNER_ID = '1926081'
|
_PARTNER_ID = '1926081'
|
||||||
@@ -160,7 +163,7 @@ class SafariIE(SafariBaseIE):
|
|||||||
|
|
||||||
class SafariApiIE(SafariBaseIE):
|
class SafariApiIE(SafariBaseIE):
|
||||||
IE_NAME = 'safari:api'
|
IE_NAME = 'safari:api'
|
||||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||||
@@ -185,7 +188,7 @@ class SafariCourseIE(SafariBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?safaribooksonline\.com/
|
(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
|
||||||
(?:
|
(?:
|
||||||
library/view/[^/]+|
|
library/view/[^/]+|
|
||||||
api/v1/book|
|
api/v1/book|
|
||||||
@@ -213,6 +216,9 @@ class SafariCourseIE(SafariBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
|
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@@ -30,8 +30,5 @@ class SaveFromIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||||
return {
|
|
||||||
'_type': 'url',
|
return self.url_result(mobj.group('url'), video_id=video_id)
|
||||||
'id': video_id,
|
|
||||||
'url': mobj.group('url'),
|
|
||||||
}
|
|
||||||
|
@@ -19,7 +19,7 @@ class ScrippsNetworksWatchIE(AWSIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
watch\.
|
watch\.
|
||||||
(?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
|
(?P<site>geniuskitchen)\.com/
|
||||||
(?:
|
(?:
|
||||||
player\.[A-Z0-9]+\.html\#|
|
player\.[A-Z0-9]+\.html\#|
|
||||||
show/(?:[^/]+/){2}|
|
show/(?:[^/]+/){2}|
|
||||||
@@ -28,38 +28,23 @@ class ScrippsNetworksWatchIE(AWSIE):
|
|||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
|
'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
|
||||||
'md5': '26545fd676d939954c6808274bdb905a',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4173834',
|
'id': '4194875',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Best Ever Treehouses',
|
'title': 'Ample Hills Ice Cream Bike',
|
||||||
'description': "We're searching for the most over the top treehouses.",
|
'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
|
||||||
'uploader': 'ANV',
|
'uploader': 'ANV',
|
||||||
'upload_date': '20170922',
|
'upload_date': '20171011',
|
||||||
'timestamp': 1506056400,
|
'timestamp': 1507698000,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [AnvatoIE.ie_key()],
|
'add_ie': [AnvatoIE.ie_key()],
|
||||||
}, {
|
|
||||||
'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_SNI_TABLE = {
|
_SNI_TABLE = {
|
||||||
'hgtv': 'hgtv',
|
|
||||||
'diynetwork': 'diy',
|
|
||||||
'foodnetwork': 'food',
|
|
||||||
'cookingchanneltv': 'cook',
|
|
||||||
'travelchannel': 'trav',
|
|
||||||
'geniuskitchen': 'genius',
|
'geniuskitchen': 'genius',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,31 +1,44 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class ServusIE(InfoExtractor):
|
class ServusIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
|
_VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
|
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
|
||||||
'md5': '046dee641cda1c4cabe13baef3be2c1c',
|
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'AA-1T6VBU5PW1W12',
|
'id': 'AA-1T6VBU5PW1W12',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Die Grünen aus Volkssicht',
|
'title': 'Die Grünen aus Sicht des Volkes',
|
||||||
'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
|
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
|
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url).upper()
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._search_regex(
|
||||||
|
(r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
|
r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
|
||||||
|
webpage, 'title', default=None,
|
||||||
|
group='title') or self._og_search_title(webpage)
|
||||||
|
title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
@@ -61,7 +61,8 @@ class SixPlayIE(InfoExtractor):
|
|||||||
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for asset in clip_data['assets']:
|
assets = clip_data.get('assets') or []
|
||||||
|
for asset in assets:
|
||||||
asset_url = asset.get('full_physical_path')
|
asset_url = asset.get('full_physical_path')
|
||||||
protocol = asset.get('protocol')
|
protocol = asset.get('protocol')
|
||||||
if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
|
if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
|
||||||
|
@@ -26,7 +26,7 @@ class SkylineWebcamsIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
stream_url = self._search_regex(
|
stream_url = self._search_regex(
|
||||||
r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
|
r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
|
||||||
'stream url', group='url')
|
'stream url', group='url')
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
|
@@ -16,8 +16,10 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -34,7 +36,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||||
(?!stations/track)
|
(?!stations/track)
|
||||||
(?P<uploader>[\w\d-]+)/
|
(?P<uploader>[\w\d-]+)/
|
||||||
(?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
(?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||||
(?P<title>[\w\d-]+)/?
|
(?P<title>[\w\d-]+)/?
|
||||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||||
@@ -50,12 +52,17 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '62986583',
|
'id': '62986583',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'upload_date': '20121011',
|
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||||
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
||||||
'uploader': 'E.T. ExTerrestrial Music',
|
'uploader': 'E.T. ExTerrestrial Music',
|
||||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
'timestamp': 1349920598,
|
||||||
|
'upload_date': '20121011',
|
||||||
'duration': 143,
|
'duration': 143,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# not streamable song
|
# not streamable song
|
||||||
@@ -67,9 +74,14 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'title': 'Goldrushed',
|
'title': 'Goldrushed',
|
||||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||||
'uploader': 'The Royal Concept',
|
'uploader': 'The Royal Concept',
|
||||||
|
'timestamp': 1337635207,
|
||||||
'upload_date': '20120521',
|
'upload_date': '20120521',
|
||||||
'duration': 227,
|
'duration': 30,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp
|
# rtmp
|
||||||
@@ -84,11 +96,16 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'id': '123998367',
|
'id': '123998367',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||||
'uploader': 'jaimeMF',
|
|
||||||
'description': 'test chars: \"\'/\\ä↭',
|
'description': 'test chars: \"\'/\\ä↭',
|
||||||
|
'uploader': 'jaimeMF',
|
||||||
|
'timestamp': 1386604920,
|
||||||
'upload_date': '20131209',
|
'upload_date': '20131209',
|
||||||
'duration': 9,
|
'duration': 9,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link (alt format)
|
# private link (alt format)
|
||||||
@@ -99,11 +116,16 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'id': '123998367',
|
'id': '123998367',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||||
'uploader': 'jaimeMF',
|
|
||||||
'description': 'test chars: \"\'/\\ä↭',
|
'description': 'test chars: \"\'/\\ä↭',
|
||||||
|
'uploader': 'jaimeMF',
|
||||||
|
'timestamp': 1386604920,
|
||||||
'upload_date': '20131209',
|
'upload_date': '20131209',
|
||||||
'duration': 9,
|
'duration': 9,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# downloadable song
|
# downloadable song
|
||||||
@@ -116,9 +138,14 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'title': 'Bus Brakes',
|
'title': 'Bus Brakes',
|
||||||
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
|
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
|
||||||
'uploader': 'oddsamples',
|
'uploader': 'oddsamples',
|
||||||
|
'timestamp': 1389232924,
|
||||||
'upload_date': '20140109',
|
'upload_date': '20140109',
|
||||||
'duration': 17,
|
'duration': 17,
|
||||||
'license': 'cc-by-sa',
|
'license': 'cc-by-sa',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link, downloadable format
|
# private link, downloadable format
|
||||||
@@ -131,9 +158,14 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||||
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||||
'uploader': 'Ori Uplift Music',
|
'uploader': 'Ori Uplift Music',
|
||||||
|
'timestamp': 1504206263,
|
||||||
'upload_date': '20170831',
|
'upload_date': '20170831',
|
||||||
'duration': 7449,
|
'duration': 7449,
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# no album art, use avatar pic for thumbnail
|
# no album art, use avatar pic for thumbnail
|
||||||
@@ -146,10 +178,15 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'title': 'Sideways (Prod. Mad Real)',
|
'title': 'Sideways (Prod. Mad Real)',
|
||||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
'uploader': 'garyvee',
|
'uploader': 'garyvee',
|
||||||
|
'timestamp': 1488152409,
|
||||||
'upload_date': '20170226',
|
'upload_date': '20170226',
|
||||||
'duration': 207,
|
'duration': 207,
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'license': 'all-rights-reserved',
|
'license': 'all-rights-reserved',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -157,7 +194,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
_CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb'
|
_CLIENT_ID = 'NmW1FlPaiL94ueEu7oziOWjYEzZzQDcK'
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
@@ -175,22 +212,33 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
|
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
|
||||||
track_id = compat_str(info['id'])
|
track_id = compat_str(info['id'])
|
||||||
|
title = info['title']
|
||||||
name = full_title or track_id
|
name = full_title or track_id
|
||||||
if quiet:
|
if quiet:
|
||||||
self.report_extraction(name)
|
self.report_extraction(name)
|
||||||
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
|
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
|
||||||
if isinstance(thumbnail, compat_str):
|
if isinstance(thumbnail, compat_str):
|
||||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||||
|
username = try_get(info, lambda x: x['user']['username'], compat_str)
|
||||||
|
|
||||||
|
def extract_count(key):
|
||||||
|
return int_or_none(info.get('%s_count' % key))
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
'uploader': info.get('user', {}).get('username'),
|
'uploader': username,
|
||||||
'upload_date': unified_strdate(info.get('created_at')),
|
'timestamp': unified_timestamp(info.get('created_at')),
|
||||||
'title': info['title'],
|
'title': title,
|
||||||
'description': info.get('description'),
|
'description': info.get('description'),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': int_or_none(info.get('duration'), 1000),
|
'duration': int_or_none(info.get('duration'), 1000),
|
||||||
'webpage_url': info.get('permalink_url'),
|
'webpage_url': info.get('permalink_url'),
|
||||||
'license': info.get('license'),
|
'license': info.get('license'),
|
||||||
|
'view_count': extract_count('playback'),
|
||||||
|
'like_count': extract_count('favoritings'),
|
||||||
|
'comment_count': extract_count('comment'),
|
||||||
|
'repost_count': extract_count('reposts'),
|
||||||
|
'genre': info.get('genre'),
|
||||||
}
|
}
|
||||||
formats = []
|
formats = []
|
||||||
query = {'client_id': self._CLIENT_ID}
|
query = {'client_id': self._CLIENT_ID}
|
||||||
@@ -368,7 +416,6 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
|
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
|
||||||
_API_BASE = 'https://api.soundcloud.com'
|
|
||||||
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
||||||
|
|
||||||
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
||||||
@@ -389,21 +436,30 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
|
|||||||
next_href, playlist_id, 'Downloading track page %s' % (i + 1))
|
next_href, playlist_id, 'Downloading track page %s' % (i + 1))
|
||||||
|
|
||||||
collection = response['collection']
|
collection = response['collection']
|
||||||
if not collection:
|
|
||||||
break
|
|
||||||
|
|
||||||
def resolve_permalink_url(candidates):
|
if not isinstance(collection, list):
|
||||||
|
collection = []
|
||||||
|
|
||||||
|
# Empty collection may be returned, in this case we proceed
|
||||||
|
# straight to next_href
|
||||||
|
|
||||||
|
def resolve_entry(candidates):
|
||||||
for cand in candidates:
|
for cand in candidates:
|
||||||
if isinstance(cand, dict):
|
if not isinstance(cand, dict):
|
||||||
permalink_url = cand.get('permalink_url')
|
continue
|
||||||
entry_id = self._extract_id(cand)
|
permalink_url = url_or_none(cand.get('permalink_url'))
|
||||||
if permalink_url and permalink_url.startswith('http'):
|
if not permalink_url:
|
||||||
return permalink_url, entry_id
|
continue
|
||||||
|
return self.url_result(
|
||||||
|
permalink_url,
|
||||||
|
ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
|
||||||
|
video_id=self._extract_id(cand),
|
||||||
|
video_title=cand.get('title'))
|
||||||
|
|
||||||
for e in collection:
|
for e in collection:
|
||||||
permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
|
entry = resolve_entry((e, e.get('track'), e.get('playlist')))
|
||||||
if permalink_url:
|
if entry:
|
||||||
entries.append(self.url_result(permalink_url, video_id=entry_id))
|
entries.append(entry)
|
||||||
|
|
||||||
next_href = response.get('next_href')
|
next_href = response.get('next_href')
|
||||||
if not next_href:
|
if not next_href:
|
||||||
@@ -429,46 +485,53 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
|||||||
(?:(?:www|m)\.)?soundcloud\.com/
|
(?:(?:www|m)\.)?soundcloud\.com/
|
||||||
(?P<user>[^/]+)
|
(?P<user>[^/]+)
|
||||||
(?:/
|
(?:/
|
||||||
(?P<rsrc>tracks|sets|reposts|likes|spotlight)
|
(?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
|
||||||
)?
|
)?
|
||||||
/?(?:[?#].*)?$
|
/?(?:[?#].*)?$
|
||||||
'''
|
'''
|
||||||
IE_NAME = 'soundcloud:user'
|
IE_NAME = 'soundcloud:user'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler',
|
'url': 'https://soundcloud.com/soft-cell-official',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '207965082',
|
||||||
'title': 'The Akashic Chronicler (All)',
|
'title': 'Soft Cell (All)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 74,
|
'playlist_mincount': 28,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
'url': 'https://soundcloud.com/soft-cell-official/tracks',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '207965082',
|
||||||
'title': 'The Akashic Chronicler (Tracks)',
|
'title': 'Soft Cell (Tracks)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 37,
|
'playlist_mincount': 27,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
|
'url': 'https://soundcloud.com/soft-cell-official/albums',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '207965082',
|
||||||
'title': 'The Akashic Chronicler (Playlists)',
|
'title': 'Soft Cell (Albums)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}, {
|
||||||
|
'url': 'https://soundcloud.com/jcv246/sets',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12982173',
|
||||||
|
'title': 'Jordi / cv (Playlists)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
|
'url': 'https://soundcloud.com/jcv246/reposts',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '12982173',
|
||||||
'title': 'The Akashic Chronicler (Reposts)',
|
'title': 'Jordi / cv (Reposts)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 6,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
|
'url': 'https://soundcloud.com/clalberg/likes',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '114582580',
|
'id': '11817582',
|
||||||
'title': 'The Akashic Chronicler (Likes)',
|
'title': 'clalberg (Likes)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 321,
|
'playlist_mincount': 5,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -479,10 +542,11 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
_BASE_URL_MAP = {
|
_BASE_URL_MAP = {
|
||||||
'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
|
'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
|
'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
|
||||||
}
|
}
|
||||||
@@ -490,6 +554,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
|
|||||||
_TITLE_MAP = {
|
_TITLE_MAP = {
|
||||||
'all': 'All',
|
'all': 'All',
|
||||||
'tracks': 'Tracks',
|
'tracks': 'Tracks',
|
||||||
|
'albums': 'Albums',
|
||||||
'sets': 'Playlists',
|
'sets': 'Playlists',
|
||||||
'reposts': 'Reposts',
|
'reposts': 'Reposts',
|
||||||
'likes': 'Likes',
|
'likes': 'Likes',
|
||||||
|
@@ -5,14 +5,17 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SpankBangIE(InfoExtractor):
|
class SpankBangIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||||
@@ -41,29 +44,71 @@ class SpankBangIE(InfoExtractor):
|
|||||||
# 4k
|
# 4k
|
||||||
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
|
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.spankbang.com/3vvn/play',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://spankbang.com/2y3td/embed/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id, headers={
|
webpage = self._download_webpage(
|
||||||
'Cookie': 'country=US'
|
url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
|
||||||
})
|
video_id, headers={'Cookie': 'country=US'})
|
||||||
|
|
||||||
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
|
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s is not available' % video_id, expected=True)
|
'Video %s is not available' % video_id, expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for mobj in re.finditer(
|
|
||||||
r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
|
def extract_format(format_id, format_url):
|
||||||
webpage):
|
f_url = url_or_none(format_url)
|
||||||
format_id, format_url = mobj.group('id', 'url')
|
if not f_url:
|
||||||
|
return
|
||||||
f = parse_resolution(format_id)
|
f = parse_resolution(format_id)
|
||||||
f.update({
|
f.update({
|
||||||
'url': format_url,
|
'url': f_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
|
STREAM_URL_PREFIX = 'stream_url_'
|
||||||
|
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
|
||||||
|
% STREAM_URL_PREFIX, webpage):
|
||||||
|
extract_format(mobj.group('id', 'url'))
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
stream_key = self._search_regex(
|
||||||
|
r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'stream key', group='value')
|
||||||
|
|
||||||
|
sb_csrf_session = self._get_cookies(
|
||||||
|
'https://spankbang.com')['sb_csrf_session'].value
|
||||||
|
|
||||||
|
stream = self._download_json(
|
||||||
|
'https://spankbang.com/api/videos/stream', video_id,
|
||||||
|
'Downloading stream JSON', data=urlencode_postdata({
|
||||||
|
'id': stream_key,
|
||||||
|
'data': 0,
|
||||||
|
'sb_csrf_session': sb_csrf_session,
|
||||||
|
}), headers={
|
||||||
|
'Referer': url,
|
||||||
|
'X-CSRFToken': sb_csrf_session,
|
||||||
|
})
|
||||||
|
|
||||||
|
for format_id, format_url in stream.items():
|
||||||
|
if format_id.startswith(STREAM_URL_PREFIX):
|
||||||
|
extract_format(
|
||||||
|
format_id[len(STREAM_URL_PREFIX):], format_url)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
@@ -94,3 +139,33 @@ class SpankBangIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SpankBangPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ug0k',
|
||||||
|
'title': 'Big Ass Titties',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 50,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
|
||||||
|
|
||||||
|
entries = [self.url_result(
|
||||||
|
'https://spankbang.com/%s/video' % video_id,
|
||||||
|
ie=SpankBangIE.ie_key(), video_id=video_id)
|
||||||
|
for video_id in orderedSet(re.findall(
|
||||||
|
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
@@ -46,8 +46,12 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
|||||||
_GEO_COUNTRIES = ['US']
|
_GEO_COUNTRIES = ['US']
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
cs = self._parse_json(self._search_regex(
|
root_data = self._parse_json(self._search_regex(
|
||||||
r'window\.__DATA__\s*=\s*({.+})',
|
r'window\.__DATA__\s*=\s*({.+})',
|
||||||
webpage, 'data'), None)['children']
|
webpage, 'data'), None)
|
||||||
c = next(c for c in cs if c.get('type') == 'VideoPlayer')
|
|
||||||
|
def find_sub_data(data, data_type):
|
||||||
|
return next(c for c in data['children'] if c.get('type') == data_type)
|
||||||
|
|
||||||
|
c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
|
||||||
return c['props']['media']['video']['config']['uri']
|
return c['props']['media']['video']['config']['uri']
|
||||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class StreamangoIE(InfoExtractor):
|
class StreamangoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net)/(?:f|embed)/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
|
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
|
||||||
'md5': 'e992787515a182f55e38fc97588d802a',
|
'md5': 'e992787515a182f55e38fc97588d802a',
|
||||||
@@ -38,6 +38,9 @@ class StreamangoIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -27,6 +27,7 @@ class TeachableBaseIE(InfoExtractor):
|
|||||||
'market.saleshacker.com': 'saleshacker',
|
'market.saleshacker.com': 'saleshacker',
|
||||||
'learnability.org': 'learnability',
|
'learnability.org': 'learnability',
|
||||||
'edurila.com': 'edurila',
|
'edurila.com': 'edurila',
|
||||||
|
'courses.workitdaily.com': 'workitdaily',
|
||||||
}
|
}
|
||||||
|
|
||||||
_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
|
_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
|
||||||
@@ -135,7 +136,6 @@ class TeachableIE(TeachableBaseIE):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage, source_url):
|
def _extract_url(webpage, source_url):
|
||||||
if not TeachableIE._is_teachable(webpage):
|
if not TeachableIE._is_teachable(webpage):
|
||||||
print('NOT TEACHABLE')
|
|
||||||
return
|
return
|
||||||
if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
|
if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
|
||||||
return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
|
return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
|
||||||
|
@@ -203,10 +203,8 @@ class TEDIE(InfoExtractor):
|
|||||||
ext_url = None
|
ext_url = None
|
||||||
if service.lower() == 'youtube':
|
if service.lower() == 'youtube':
|
||||||
ext_url = external.get('code')
|
ext_url = external.get('code')
|
||||||
return {
|
|
||||||
'_type': 'url',
|
return self.url_result(ext_url or external['uri'])
|
||||||
'url': ext_url or external['uri'],
|
|
||||||
}
|
|
||||||
|
|
||||||
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
||||||
|
|
||||||
@@ -267,6 +265,8 @@ class TEDIE(InfoExtractor):
|
|||||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||||
'protocol': 'http',
|
'protocol': 'http',
|
||||||
})
|
})
|
||||||
|
if f.get('acodec') == 'none':
|
||||||
|
del f['acodec']
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
audio_download = talk_info.get('audioDownload')
|
audio_download = talk_info.get('audioDownload')
|
||||||
|
@@ -61,8 +61,4 @@ class TestURLIE(InfoExtractor):
|
|||||||
|
|
||||||
self.to_screen('Test URL: %s' % tc['url'])
|
self.to_screen('Test URL: %s' % tc['url'])
|
||||||
|
|
||||||
return {
|
return self.url_result(tc['url'], video_id=video_id)
|
||||||
'_type': 'url',
|
|
||||||
'url': tc['url'],
|
|
||||||
'id': video_id,
|
|
||||||
}
|
|
||||||
|
@@ -96,7 +96,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
cfg_xml = self._download_xml(
|
cfg_xml = self._download_xml(
|
||||||
cfg_url, display_id, 'Downloading metadata',
|
cfg_url, display_id, 'Downloading metadata',
|
||||||
transform_source=fix_xml_ampersands)
|
transform_source=fix_xml_ampersands, headers={'Referer': url})
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
@@ -1,24 +1,21 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .radiocanada import RadioCanadaIE
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
merge_dicts,
|
||||||
urlencode_postdata,
|
|
||||||
extract_attributes,
|
|
||||||
smuggle_url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TouTvIE(InfoExtractor):
|
class TouTvIE(RadioCanadaIE):
|
||||||
_NETRC_MACHINE = 'toutv'
|
_NETRC_MACHINE = 'toutv'
|
||||||
IE_NAME = 'tou.tv'
|
IE_NAME = 'tou.tv'
|
||||||
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
|
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
|
||||||
_access_token = None
|
|
||||||
_claims = None
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
|
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
|
||||||
@@ -41,57 +38,31 @@ class TouTvIE(InfoExtractor):
|
|||||||
'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
|
'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36'
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
email, password = self._get_login_info()
|
email, password = self._get_login_info()
|
||||||
if email is None:
|
if email is None:
|
||||||
return
|
return
|
||||||
state = 'http://ici.tou.tv/'
|
try:
|
||||||
webpage = self._download_webpage(state, None, 'Downloading homepage')
|
self._access_token = self._download_json(
|
||||||
toutvlogin = self._parse_json(self._search_regex(
|
'https://services.radio-canada.ca/toutv/profiling/accounts/login',
|
||||||
r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
|
None, 'Logging in', data=json.dumps({
|
||||||
authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
|
'ClientId': self._CLIENT_KEY,
|
||||||
login_webpage = self._download_webpage(
|
'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20',
|
||||||
authorize_url, None, 'Downloading login page', query={
|
'Email': email,
|
||||||
'client_id': toutvlogin['clientId'],
|
'Password': password,
|
||||||
'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
|
'Scope': 'id.write media-validation.read',
|
||||||
'response_type': 'token',
|
}).encode(), headers={
|
||||||
'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
|
'Authorization': 'client-key ' + self._CLIENT_KEY,
|
||||||
'state': state,
|
'Content-Type': 'application/json;charset=utf-8',
|
||||||
})
|
})['access_token']
|
||||||
|
except ExtractorError as e:
|
||||||
def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
form, form_elem = re.search(
|
error = self._parse_json(e.cause.read().decode(), None)['Message']
|
||||||
r'(?s)((<form[^>]+?%s[^>]*?>).+?</form>)' % form_spec_re, wp).groups()
|
raise ExtractorError(error, expected=True)
|
||||||
form_data = self._hidden_inputs(form)
|
raise
|
||||||
form_url = extract_attributes(form_elem).get('action') or default_form_url
|
self._claims = self._call_api('validation/v2/getClaims')['claims']
|
||||||
return form_url, form_data
|
|
||||||
|
|
||||||
post_url, form_data = extract_form_url_and_data(
|
|
||||||
login_webpage,
|
|
||||||
'https://services.radio-canada.ca/auth/oauth/v2/authorize/login',
|
|
||||||
r'(?:id|name)="Form-login"')
|
|
||||||
form_data.update({
|
|
||||||
'login-email': email,
|
|
||||||
'login-password': password,
|
|
||||||
})
|
|
||||||
consent_webpage = self._download_webpage(
|
|
||||||
post_url, None, 'Logging in', data=urlencode_postdata(form_data))
|
|
||||||
post_url, form_data = extract_form_url_and_data(
|
|
||||||
consent_webpage,
|
|
||||||
'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent')
|
|
||||||
_, urlh = self._download_webpage_handle(
|
|
||||||
post_url, None, 'Following Redirection',
|
|
||||||
data=urlencode_postdata(form_data))
|
|
||||||
self._access_token = self._search_regex(
|
|
||||||
r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
|
||||||
urlh.geturl(), 'access token')
|
|
||||||
self._claims = self._download_json(
|
|
||||||
'https://services.radio-canada.ca/media/validation/v2/getClaims',
|
|
||||||
None, 'Extracting Claims', query={
|
|
||||||
'token': self._access_token,
|
|
||||||
'access_token': self._access_token,
|
|
||||||
})['claims']
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path = self._match_id(url)
|
path = self._match_id(url)
|
||||||
@@ -102,19 +73,10 @@ class TouTvIE(InfoExtractor):
|
|||||||
self.report_warning('This video is probably DRM protected.', path)
|
self.report_warning('This video is probably DRM protected.', path)
|
||||||
video_id = metadata['IdMedia']
|
video_id = metadata['IdMedia']
|
||||||
details = metadata['Details']
|
details = metadata['Details']
|
||||||
title = details['OriginalTitle']
|
|
||||||
video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
|
|
||||||
if self._access_token and self._claims:
|
|
||||||
video_url = smuggle_url(video_url, {
|
|
||||||
'access_token': self._access_token,
|
|
||||||
'claims': self._claims,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return merge_dicts({
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': video_url,
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': details.get('OriginalTitle'),
|
||||||
'thumbnail': details.get('ImageUrl'),
|
'thumbnail': details.get('ImageUrl'),
|
||||||
'duration': int_or_none(details.get('LengthInSeconds')),
|
'duration': int_or_none(details.get('LengthInSeconds')),
|
||||||
}
|
}, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))
|
||||||
|
75
youtube_dl/extractor/trunews.py
Normal file
75
youtube_dl/extractor/trunews.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TruNewsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
|
||||||
|
'md5': 'a19c024c3906ff954fac9b96ce66bb08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5c5a21e65d3c196e1c0020cc',
|
||||||
|
'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
|
||||||
|
'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
|
||||||
|
'duration': 3685,
|
||||||
|
'timestamp': 1549411440,
|
||||||
|
'upload_date': '20190206',
|
||||||
|
},
|
||||||
|
'add_ie': ['Zype'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'https://api.zype.com/videos', display_id, query={
|
||||||
|
'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
|
||||||
|
'per_page': 1,
|
||||||
|
'active': 'true',
|
||||||
|
'friendly_title': display_id,
|
||||||
|
})['response'][0]
|
||||||
|
|
||||||
|
zype_id = video['_id']
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
thumbnails_list = video.get('thumbnails')
|
||||||
|
if isinstance(thumbnails_list, list):
|
||||||
|
for thumbnail in thumbnails_list:
|
||||||
|
if not isinstance(thumbnail, dict):
|
||||||
|
continue
|
||||||
|
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': update_url_query(
|
||||||
|
'https://player.zype.com/embed/%s.js' % zype_id,
|
||||||
|
{'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}),
|
||||||
|
'ie_key': 'Zype',
|
||||||
|
'id': zype_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'description': dict_get(video, ('description', 'ott_description', 'short_description')),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'timestamp': unified_timestamp(video.get('published_at')),
|
||||||
|
'average_rating': float_or_none(video.get('rating')),
|
||||||
|
'view_count': int_or_none(video.get('request_count')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
}
|
@@ -4,44 +4,72 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .turner import TurnerBaseIE
|
from .turner import TurnerBaseIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TruTVIE(TurnerBaseIE):
|
class TruTVIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))'
|
_VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html',
|
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
|
||||||
'md5': '2cdc844f317579fed1a7251b087ff417',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets',
|
'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'You Won\'t Believe These Sports Bets',
|
'title': 'Sunlight-Activated Flower',
|
||||||
'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.',
|
'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
|
||||||
'upload_date': '20130305',
|
},
|
||||||
}
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
auth_required = False
|
|
||||||
if path:
|
if video_id:
|
||||||
data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path
|
path = 'episode'
|
||||||
|
display_id = video_id
|
||||||
else:
|
else:
|
||||||
webpage = self._download_webpage(url, video_id)
|
path = 'series/clip'
|
||||||
video_id = self._search_regex(
|
display_id = clip_slug
|
||||||
r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';",
|
|
||||||
webpage, 'video id', default=video_id)
|
data = self._download_json(
|
||||||
auth_required = self._search_regex(
|
'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
|
||||||
r'TTV\.TVE\.authRequired\s*=\s*(true|false);',
|
display_id)
|
||||||
webpage, 'auth required', default='false') == 'true'
|
video_data = data['episode'] if video_id else data['info']
|
||||||
data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id
|
media_id = video_data['mediaId']
|
||||||
return self._extract_cvp_info(
|
title = video_data['title'].strip()
|
||||||
data_src, path, {
|
|
||||||
'secure': {
|
info = self._extract_ngtv_info(
|
||||||
'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big',
|
media_id, {}, {
|
||||||
'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': url,
|
'url': url,
|
||||||
'site_name': 'truTV',
|
'site_name': 'truTV',
|
||||||
'auth_required': auth_required,
|
'auth_required': video_data.get('isAuthRequired'),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for image in video_data.get('images', []):
|
||||||
|
image_url = image.get('srcUrl')
|
||||||
|
if not image_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'id': media_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': parse_iso8601(video_data.get('publicationDate')),
|
||||||
|
'series': video_data.get('showTitle'),
|
||||||
|
'season_number': int_or_none(video_data.get('seasonNum')),
|
||||||
|
'episode_number': int_or_none(video_data.get('episodeNum')),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user