mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-17 05:38:35 +09:00
Compare commits
346 Commits
2018.09.26
...
2019.03.01
Author | SHA1 | Date | |
---|---|---|---|
![]() |
04c33bdfb3 | ||
![]() |
333f617b12 | ||
![]() |
ff60ec8f02 | ||
![]() |
9d9a8676dc | ||
![]() |
db1c3a9d3f | ||
![]() |
55b8588f0e | ||
![]() |
f0228f56fb | ||
![]() |
8c80603f1a | ||
![]() |
37b239b3b6 | ||
![]() |
caf48f557a | ||
![]() |
77a842c892 | ||
![]() |
c76fc5b22a | ||
![]() |
388cfbd3d8 | ||
![]() |
d93083789b | ||
![]() |
34568dc296 | ||
![]() |
3c9647372e | ||
![]() |
659e93fcf5 | ||
![]() |
c9a0ea6e51 | ||
![]() |
d7d513891b | ||
![]() |
ae65c93a26 | ||
![]() |
ba2e3730d1 | ||
![]() |
2b2da3ba10 | ||
![]() |
794c1b6e02 | ||
![]() |
7bee705d8f | ||
![]() |
6f5c1807f4 | ||
![]() |
985637cbbf | ||
![]() |
7d8b89163c | ||
![]() |
d777f3e81c | ||
![]() |
4c0e0dc9dc | ||
![]() |
f516f44094 | ||
![]() |
e9dee7f1b2 | ||
![]() |
91effe22a0 | ||
![]() |
04eacf5453 | ||
![]() |
f1f5b47255 | ||
![]() |
1211bb6dac | ||
![]() |
4de3cb883c | ||
![]() |
22f5f5c6fc | ||
![]() |
49bd993fd9 | ||
![]() |
f06a1cabe8 | ||
![]() |
241c5d7d38 | ||
![]() |
8fecc7353d | ||
![]() |
5dda1edef9 | ||
![]() |
d2d970d07e | ||
![]() |
48fb963b2f | ||
![]() |
70c3ee1367 | ||
![]() |
07fbfef1c7 | ||
![]() |
eecf788b90 | ||
![]() |
0efcb5a2fe | ||
![]() |
7c5307f4c4 | ||
![]() |
6cc6e0c34d | ||
![]() |
b9bc1cff72 | ||
![]() |
e9fef7ee4e | ||
![]() |
b6423e6ca2 | ||
![]() |
3ef2da2d21 | ||
![]() |
49fe4175ae | ||
![]() |
9613e14a92 | ||
![]() |
15e832ff2a | ||
![]() |
645c4885cf | ||
![]() |
7b0f9df23d | ||
![]() |
c2a0fe2ea7 | ||
![]() |
ce52c7c111 | ||
![]() |
1063b4c707 | ||
![]() |
ca01e5f903 | ||
![]() |
5496754ae4 | ||
![]() |
9868f1ab18 | ||
![]() |
41cff90c41 | ||
![]() |
a2d821d711 | ||
![]() |
6df196f32e | ||
![]() |
41c2c254d3 | ||
![]() |
a81daba231 | ||
![]() |
61ff92e11e | ||
![]() |
1397a790ff | ||
![]() |
7f903dd8bf | ||
![]() |
2b3afe6b0f | ||
![]() |
e71be6ee9f | ||
![]() |
bf8ebc9cfe | ||
![]() |
1fcc91663b | ||
![]() |
30cd1a5f39 | ||
![]() |
458fd30f56 | ||
![]() |
845333acf6 | ||
![]() |
252abb1e8b | ||
![]() |
ae18d58297 | ||
![]() |
1602a240a7 | ||
![]() |
0eba178fce | ||
![]() |
eb35b163ad | ||
![]() |
118afcf52f | ||
![]() |
9713d1d1e0 | ||
![]() |
a1e171233d | ||
![]() |
7d311586ed | ||
![]() |
e118a8794f | ||
![]() |
435e382423 | ||
![]() |
0670bdd8f2 | ||
![]() |
71a1f61700 | ||
![]() |
6510a3aa97 | ||
![]() |
278d061a0c | ||
![]() |
503b604a31 | ||
![]() |
4b85f0f9db | ||
![]() |
19d6991312 | ||
![]() |
07f9febc4b | ||
![]() |
fad4ceb534 | ||
![]() |
6945b9e78f | ||
![]() |
29cfcb43da | ||
![]() |
a1a4607598 | ||
![]() |
73c19aaa9f | ||
![]() |
289ef490f7 | ||
![]() |
6ca3fa898c | ||
![]() |
31fbedc06a | ||
![]() |
15870747f0 | ||
![]() |
fc746c3fdd | ||
![]() |
4e58d9fabb | ||
![]() |
2cc779f497 | ||
![]() |
379306ef55 | ||
![]() |
f28363ad1f | ||
![]() |
2bfc1d9d68 | ||
![]() |
e2dd132f05 | ||
![]() |
79fec976b0 | ||
![]() |
29639b363d | ||
![]() |
f53cecd796 | ||
![]() |
fa4ac365f6 | ||
![]() |
bfc8eeea57 | ||
![]() |
b0d73a7456 | ||
![]() |
4fe54c128a | ||
![]() |
a16c7c033a | ||
![]() |
2f483bc1c3 | ||
![]() |
561b456e2d | ||
![]() |
929ba3997b | ||
![]() |
10026329c2 | ||
![]() |
3b983ee471 | ||
![]() |
f1ab3b7de7 | ||
![]() |
d65f6e734b | ||
![]() |
ed8db0a25c | ||
![]() |
60a899bb7e | ||
![]() |
cbdc688c41 | ||
![]() |
5caa531a1a | ||
![]() |
a64646e417 | ||
![]() |
c469e8808c | ||
![]() |
b64f6e690f | ||
![]() |
a4491dd55c | ||
![]() |
c3e543893b | ||
![]() |
432aba1c5e | ||
![]() |
7c072f00d6 | ||
![]() |
96c186e1fd | ||
![]() |
4ad159c7b0 | ||
![]() |
65615be368 | ||
![]() |
3c1089dba4 | ||
![]() |
6089ff40e7 | ||
![]() |
2543938bbe | ||
![]() |
440863ade1 | ||
![]() |
391256dc0e | ||
![]() |
06b4b90c70 | ||
![]() |
8cb5c2181a | ||
![]() |
0266854f63 | ||
![]() |
bcc334a3c6 | ||
![]() |
e9a50fba86 | ||
![]() |
04fb6928da | ||
![]() |
b7acc83550 | ||
![]() |
de0359c0af | ||
![]() |
c87f65e43d | ||
![]() |
d7c3af7a72 | ||
![]() |
aeb72b3a41 | ||
![]() |
2122d7151d | ||
![]() |
751e051557 | ||
![]() |
d226c560a6 | ||
![]() |
8437f5089f | ||
![]() |
1d803085d7 | ||
![]() |
696f4e4114 | ||
![]() |
0e713dbb11 | ||
![]() |
9b5c8751ee | ||
![]() |
d9f1123c08 | ||
![]() |
3d8eb6beb9 | ||
![]() |
38d15ba7f9 | ||
![]() |
6b688b8942 | ||
![]() |
9d9daed464 | ||
![]() |
32ac3d49ae | ||
![]() |
373941c5f0 | ||
![]() |
4e1ddc8da9 | ||
![]() |
e4d51e751e | ||
![]() |
c2dd2dc086 | ||
![]() |
140a13f5de | ||
![]() |
825cd268a3 | ||
![]() |
63529e935c | ||
![]() |
4273caf5c7 | ||
![]() |
e1a0628797 | ||
![]() |
835e45abab | ||
![]() |
904bb599be | ||
![]() |
65e29cdac3 | ||
![]() |
4ee1845454 | ||
![]() |
cfd13c4c45 | ||
![]() |
386d1fea79 | ||
![]() |
7216e9bff7 | ||
![]() |
4cee62ade0 | ||
![]() |
cbb3e4b14f | ||
![]() |
752582183a | ||
![]() |
1c82122741 | ||
![]() |
50a498a68e | ||
![]() |
252e172dea | ||
![]() |
90046d7761 | ||
![]() |
c8b3751086 | ||
![]() |
21c340b83f | ||
![]() |
c984196cf1 | ||
![]() |
7f41a598b3 | ||
![]() |
8fe104947d | ||
![]() |
0a05cfabb6 | ||
![]() |
13e17cd28e | ||
![]() |
102a4e54c5 | ||
![]() |
6e29458f24 | ||
![]() |
59c3940165 | ||
![]() |
cefe42c412 | ||
![]() |
24cc64254c | ||
![]() |
9e02c2c704 | ||
![]() |
5ee7ae5c75 | ||
![]() |
3ad6dabd33 | ||
![]() |
5f47a60c5d | ||
![]() |
1bab343704 | ||
![]() |
1d88b3e6e6 | ||
![]() |
9235b5091c | ||
![]() |
c3c098dcf2 | ||
![]() |
8c5879715f | ||
![]() |
ebb0449049 | ||
![]() |
dfe0a3a9d2 | ||
![]() |
c976873c5b | ||
![]() |
15699ec8b0 | ||
![]() |
33cc1ea586 | ||
![]() |
ae9d77dab5 | ||
![]() |
8bb0c9cc16 | ||
![]() |
5547014ad9 | ||
![]() |
ab896fa894 | ||
![]() |
1fa59a928e | ||
![]() |
ce18a19be9 | ||
![]() |
1ead840d2c | ||
![]() |
aa374bc78e | ||
![]() |
3430ff9b07 | ||
![]() |
f012823082 | ||
![]() |
16597c2f94 | ||
![]() |
adbbdefc81 | ||
![]() |
053e5b12b2 | ||
![]() |
d9df8f120b | ||
![]() |
ca01d17884 | ||
![]() |
d19600df07 | ||
![]() |
641e86e3cf | ||
![]() |
6864855eb1 | ||
![]() |
d861a9d581 | ||
![]() |
66173211c4 | ||
![]() |
6f2883a2df | ||
![]() |
560020da30 | ||
![]() |
305ce767d5 | ||
![]() |
157eef3e63 | ||
![]() |
bd2d553c7b | ||
![]() |
af60e81e3c | ||
![]() |
a843464a7e | ||
![]() |
6866f24494 | ||
![]() |
4e33e0792a | ||
![]() |
35328915b5 | ||
![]() |
6c882aa899 | ||
![]() |
183417a50f | ||
![]() |
6a6d7f0641 | ||
![]() |
05bd5e9c77 | ||
![]() |
15ed5a2784 | ||
![]() |
2e1280ed43 | ||
![]() |
8578ea4dcb | ||
![]() |
9b27a78a88 | ||
![]() |
964b989dc8 | ||
![]() |
f97c099131 | ||
![]() |
1febf99da1 | ||
![]() |
4167148fa4 | ||
![]() |
5bb0479269 | ||
![]() |
02df855e13 | ||
![]() |
006374e3ae | ||
![]() |
11d19ff503 | ||
![]() |
a640c4d226 | ||
![]() |
d0058c76d5 | ||
![]() |
0919cd4d01 | ||
![]() |
2599956c9f | ||
![]() |
9b9b3501c5 | ||
![]() |
730c0d12a0 | ||
![]() |
f17a24a6df | ||
![]() |
83852e57bf | ||
![]() |
96a91b1551 | ||
![]() |
cab26223bf | ||
![]() |
532782ade1 | ||
![]() |
f81d44aab6 | ||
![]() |
2511eee215 | ||
![]() |
0df514f07e | ||
![]() |
432cd48410 | ||
![]() |
c0345b825f | ||
![]() |
2004e2210b | ||
![]() |
16d896b2a7 | ||
![]() |
22e07ce502 | ||
![]() |
dbdaaa231a | ||
![]() |
38c32dbf19 | ||
![]() |
a085410936 | ||
![]() |
6895ea4d3f | ||
![]() |
faac1c1f70 | ||
![]() |
573531dcfb | ||
![]() |
da56fb631f | ||
![]() |
95e42d7336 | ||
![]() |
cf0db4d997 | ||
![]() |
036f905161 | ||
![]() |
4b6aca17cc | ||
![]() |
c620694c97 | ||
![]() |
061ea3a776 | ||
![]() |
c70ba664f1 | ||
![]() |
f16679e843 | ||
![]() |
b14475724b | ||
![]() |
aa7e974a2a | ||
![]() |
9aac22c195 | ||
![]() |
94db1f7f3b | ||
![]() |
ffa7b2bfee | ||
![]() |
2943397e87 | ||
![]() |
9c4a83a1be | ||
![]() |
9ff558f67f | ||
![]() |
c2fe21efaa | ||
![]() |
476cf548e1 | ||
![]() |
bebef10909 | ||
![]() |
4c237ab787 | ||
![]() |
a1d1c63678 | ||
![]() |
1fafb32984 | ||
![]() |
c901cc38e5 | ||
![]() |
022218f2f0 | ||
![]() |
08c7d3dade | ||
![]() |
5e733b066a | ||
![]() |
7d9e858132 | ||
![]() |
b99b0bcfa0 | ||
![]() |
baeabf7742 | ||
![]() |
582797d780 | ||
![]() |
160c2773f6 | ||
![]() |
ee5fe42e44 | ||
![]() |
f0ee386851 | ||
![]() |
a94e7c195e | ||
![]() |
5d90a8a5f3 | ||
![]() |
19a352854f | ||
![]() |
c9d891f19a | ||
![]() |
d96f976b0c | ||
![]() |
2e7ed29e34 | ||
![]() |
21c1a00dd7 | ||
![]() |
0082f44a08 | ||
![]() |
f60b9803a4 | ||
![]() |
d98cb62e55 | ||
![]() |
05e7c184da | ||
![]() |
66d106f270 | ||
![]() |
3c7da54c92 | ||
![]() |
9795d93316 | ||
![]() |
365343131d | ||
![]() |
85fa80d5f9 | ||
![]() |
245cbb33bc | ||
![]() |
85cd69adcb |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.26**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.09.26
|
||||
[debug] youtube-dl version 2019.03.01
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
12
.travis.yml
12
.travis.yml
@@ -15,6 +15,18 @@ env:
|
||||
- YTDL_TEST_SET=download
|
||||
matrix:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
fast_finish: true
|
||||
|
@@ -152,16 +152,20 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
@@ -173,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
@@ -181,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@@ -257,11 +261,33 @@ title = meta.get('title') or self._og_search_title(webpage)
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
### Regular expressions
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
#### Don't capture groups you don't use
|
||||
|
||||
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
|
||||
|
||||
##### Example
|
||||
|
||||
Don't capture id attribute name here since you can't use it for anything anyway.
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
r'(?:id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
```python
|
||||
r'(id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
|
||||
#### Make regular expressions relaxed and flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
|
||||
|
||||
#### Example
|
||||
##### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
@@ -294,7 +320,49 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
### Long lines policy
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
|
||||
|
||||
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
|
403
ChangeLog
403
ChangeLog
@@ -1,3 +1,406 @@
|
||||
version 2019.03.01
|
||||
|
||||
Core
|
||||
+ [downloader/external] Add support for rate limit and retries for wget
|
||||
* [downloader/external] Fix infinite retries for curl (#19303)
|
||||
|
||||
Extractors
|
||||
* [npo] Fix extraction (#20084)
|
||||
* [francetv:site] Extend video id regex (#20029, #20071)
|
||||
+ [periscope] Extract width and height (#20015)
|
||||
* [servus] Fix extraction (#19297)
|
||||
* [bbccouk] Make subtitles non fatal (#19651)
|
||||
* [metacafe] Fix family filter bypass (#19287)
|
||||
|
||||
|
||||
version 2019.02.18
|
||||
|
||||
Extractors
|
||||
* [tvp:website] Fix and improve extraction
|
||||
+ [tvp] Detect unavailable videos
|
||||
* [tvp] Fix description extraction and make thumbnail optional
|
||||
+ [linuxacademy] Add support for linuxacademy.com (#12207)
|
||||
* [bilibili] Update keys (#19233)
|
||||
* [udemy] Extend URL regular expressions (#14330, #15883)
|
||||
* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
|
||||
* [noovo] Fix extraction (#19230)
|
||||
* [rai] Relax URL regular expression (#19232)
|
||||
+ [vshare] Pass Referer to download request (#19205, #19221)
|
||||
+ [openload] Add support for oload.live (#19222)
|
||||
* [imgur] Use video id as title fallback (#18590)
|
||||
+ [twitch] Add new source format detection approach (#19193)
|
||||
* [tvplayhome] Fix video id extraction (#19190)
|
||||
* [tvplayhome] Fix episode metadata extraction (#19190)
|
||||
* [rutube:embed] Fix extraction (#19163)
|
||||
+ [rutube:embed] Add support private videos (#19163)
|
||||
+ [soundcloud] Extract more metadata
|
||||
+ [trunews] Add support for trunews.com (#19153)
|
||||
+ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
|
||||
|
||||
|
||||
version 2019.02.08
|
||||
|
||||
Core
|
||||
* [utils] Improve JSON-LD regular expression (#18058)
|
||||
* [YoutubeDL] Fallback to ie_key of matching extractor while making
|
||||
download archive id when no explicit ie_key is provided (#19022)
|
||||
|
||||
Extractors
|
||||
+ [malltv] Add support for mall.tv (#18058, #17856)
|
||||
+ [spankbang:playlist] Add support for playlists (#19145)
|
||||
* [spankbang] Extend URL regular expression
|
||||
* [trutv] Fix extraction (#17336)
|
||||
* [toutv] Fix authentication (#16398, #18700)
|
||||
* [pornhub] Fix tags and categories extraction (#13720, #19135)
|
||||
* [pornhd] Fix formats extraction
|
||||
+ [pornhd] Extract like count (#19123, #19125)
|
||||
* [radiocanada] Switch to the new media requests (#19115)
|
||||
+ [teachable] Add support for courses.workitdaily.com (#18871)
|
||||
- [vporn] Remove extractor (#16276)
|
||||
+ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
|
||||
+ [drtuber] Extract duration (#19078)
|
||||
* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
|
||||
* [soundcloud] Update client id
|
||||
* [drtv] Improve preference (#19079)
|
||||
+ [openload] Add support for openload.pw and oload.pw (#18930)
|
||||
+ [openload] Add support for oload.info (#19073)
|
||||
* [crackle] Authorize media detail request (#16931)
|
||||
|
||||
|
||||
version 2019.01.30.1
|
||||
|
||||
Core
|
||||
* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067)
|
||||
|
||||
|
||||
version 2019.01.30
|
||||
|
||||
Core
|
||||
* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding
|
||||
subtitles (#19024, #19042)
|
||||
* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025)
|
||||
|
||||
Extractors
|
||||
* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061)
|
||||
* [drtv] Improve extraction (#19039)
|
||||
+ Add support for EncryptedUri videos
|
||||
+ Extract more metadata
|
||||
* Fix subtitles extraction
|
||||
+ [fox] Add support for locked videos using cookies (#19060)
|
||||
* [fox] Fix extraction for free videos (#19060)
|
||||
+ [zattoo] Add support for tv.salt.ch (#19059)
|
||||
|
||||
|
||||
version 2019.01.27
|
||||
|
||||
Core
|
||||
+ [extractor/common] Extract season in _json_ld
|
||||
* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection
|
||||
(#681)
|
||||
|
||||
Extractors
|
||||
* [vice] Fix extraction for locked videos (#16248)
|
||||
+ [wakanim] Detect DRM protected videos
|
||||
+ [wakanim] Add support for wakanim.tv (#14374)
|
||||
* [usatoday] Fix extraction for videos with custom brightcove partner id
|
||||
(#18990)
|
||||
* [drtv] Fix extraction (#18989)
|
||||
* [nhk] Extend URL regular expression (#18968)
|
||||
* [go] Fix Adobe Pass requests for Disney Now (#18901)
|
||||
+ [openload] Add support for oload.club (#18969)
|
||||
|
||||
|
||||
version 2019.01.24
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Fix negation for string operators in format selection (#18961)
|
||||
|
||||
|
||||
version 2019.01.23
|
||||
|
||||
Core
|
||||
* [utils] Fix urljoin for paths with non-http(s) schemes
|
||||
* [extractor/common] Improve jwplayer relative URL handling (#18892)
|
||||
+ [YoutubeDL] Add negation support for string comparisons in format selection
|
||||
expressions (#18600, #18805)
|
||||
* [extractor/common] Improve HLS video-only format detection (#18923)
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Extend URL regular expression (#18955)
|
||||
* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
|
||||
#17197, #18338 #18842, #18899)
|
||||
+ [vrv] Add support for authentication (#14307)
|
||||
* [videomore:season] Fix extraction
|
||||
* [videomore] Improve extraction (#18908)
|
||||
+ [tnaflix] Pass Referer in metadata request (#18925)
|
||||
* [radiocanada] Relax DRM check (#18608, #18609)
|
||||
* [vimeo] Fix video password verification for videos protected by
|
||||
Referer HTTP header
|
||||
+ [hketv] Add support for hkedcity.net (#18696)
|
||||
+ [streamango] Add support for fruithosts.net (#18710)
|
||||
+ [instagram] Add support for tags (#18757)
|
||||
+ [odnoklassniki] Detect paid videos (#18876)
|
||||
* [ted] Correct acodec for HTTP formats (#18923)
|
||||
* [cartoonnetwork] Fix extraction (#15664, #17224)
|
||||
* [vimeo] Fix extraction for password protected player URLs (#18889)
|
||||
|
||||
|
||||
version 2019.01.17
|
||||
|
||||
Extractors
|
||||
* [youtube] Extend JS player signature function name regular expressions
|
||||
(#18890, #18891, #18893)
|
||||
|
||||
|
||||
version 2019.01.16
|
||||
|
||||
Core
|
||||
+ [test/helper] Add support for maxcount and count collection len checkers
|
||||
* [downloader/hls] Fix uplynk ad skipping (#18824)
|
||||
* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
|
||||
|
||||
Extractors
|
||||
* [youtube] Skip unsupported adaptive stream type (#18804)
|
||||
+ [youtube] Extract DASH formats from player response (#18804)
|
||||
* [funimation] Fix extraction (#14089)
|
||||
* [skylinewebcams] Fix extraction (#18853)
|
||||
+ [curiositystream] Add support for non app URLs
|
||||
+ [bitchute] Check formats (#18833)
|
||||
* [wistia] Extend URL regular expression (#18823)
|
||||
+ [playplustv] Add support for playplus.com (#18789)
|
||||
|
||||
|
||||
version 2019.01.10
|
||||
|
||||
Core
|
||||
* [extractor/common] Use episode name as title in _json_ld
|
||||
+ [extractor/common] Add support for movies in _json_ld
|
||||
* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
|
||||
(#18765)
|
||||
+ [utils] Add language codes replaced in 1989 revision of ISO 639
|
||||
to ISO639Utils (#18765)
|
||||
|
||||
Extractors
|
||||
* [youtube] Extract live HLS URL from player response (#18799)
|
||||
+ [outsidetv] Add support for outsidetv.com (#18774)
|
||||
* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
|
||||
+ [fox] Add support National Geographic (#17985, #15333, #14698)
|
||||
+ [playplustv] Add support for playplus.tv (#18789)
|
||||
* [globo] Set GLBID cookie manually (#17346)
|
||||
+ [gaia] Add support for gaia.com (#14605)
|
||||
* [youporn] Fix title and description extraction (#18748)
|
||||
+ [hungama] Add support for hungama.com (#17402, #18771)
|
||||
* [dtube] Fix extraction (#18741)
|
||||
* [tvnow] Fix and rework extractors and prepare for a switch to the new API
|
||||
(#17245, #18499)
|
||||
* [carambatv:page] Fix extraction (#18739)
|
||||
|
||||
|
||||
version 2019.01.02
|
||||
|
||||
Extractors
|
||||
* [discovery] Use geo verification headers (#17838)
|
||||
+ [packtpub] Add support for subscription.packtpub.com (#18718)
|
||||
* [yourporn] Fix extraction (#18583)
|
||||
+ [acast:channel] Add support for play.acast.com (#18587)
|
||||
+ [extractors] Add missing age limits (#18621)
|
||||
+ [rmcdecouverte] Add support for live stream
|
||||
* [rmcdecouverte] Bypass geo restriction
|
||||
* [rmcdecouverte] Update URL regular expression (#18595, 18697)
|
||||
* [manyvids] Fix extraction (#18604, #18614)
|
||||
* [bitchute] Fix extraction (#18567)
|
||||
|
||||
|
||||
version 2018.12.31
|
||||
|
||||
Extractors
|
||||
+ [bbc] Add support for another embed pattern (#18643)
|
||||
+ [npo:live] Add support for npostart.nl (#18644)
|
||||
* [beeg] Fix extraction (#18610, #18626)
|
||||
* [youtube] Unescape HTML for series (#18641)
|
||||
+ [youtube] Extract more format metadata
|
||||
* [youtube] Detect DRM protected videos (#1774)
|
||||
* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
|
||||
* [youtube] Extend HTML5 player regular expression (#17516)
|
||||
+ [liveleak] Add support for another embed type and restore original
|
||||
format extraction
|
||||
+ [crackle] Extract ISM and HTTP formats
|
||||
+ [twitter] Pass Referer with card request (#18579)
|
||||
* [mediasite] Extend URL regular expression (#18558)
|
||||
+ [lecturio] Add support for lecturio.de (#18562)
|
||||
+ [discovery] Add support for Scripps Networks watch domains (#17947)
|
||||
|
||||
|
||||
version 2018.12.17
|
||||
|
||||
Extractors
|
||||
* [ard:beta] Improve geo restricted videos extraction
|
||||
* [ard:beta] Fix subtitles extraction
|
||||
* [ard:beta] Improve extraction robustness
|
||||
* [ard:beta] Relax URL regular expression (#18441)
|
||||
* [acast] Add support for embed.acast.com and play.acast.com (#18483)
|
||||
* [iprima] Relax URL regular expression (#18515, #18540)
|
||||
* [vrv] Fix initial state extraction (#18553)
|
||||
* [youtube] Fix mark watched (#18546)
|
||||
+ [safari] Add support for learning.oreilly.com (#18510)
|
||||
* [youtube] Fix multifeed extraction (#18531)
|
||||
* [lecturio] Improve subtitles extraction (#18488)
|
||||
* [uol] Fix format URL extraction (#18480)
|
||||
+ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
|
||||
|
||||
|
||||
version 2018.12.09
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Keep session cookies in cookie file between runs
|
||||
* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
|
||||
|
||||
Extractors
|
||||
+ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
|
||||
+ [aenetworks] Add support for historyvault.com (#18460)
|
||||
* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
|
||||
#17223, #18404)
|
||||
* [iprima] Relax URL regular expression (#18453)
|
||||
* [hotstar] Fix video data extraction (#18386)
|
||||
* [ard:mediathek] Fix title and description extraction (#18349, #18371)
|
||||
* [xvideos] Switch to HTTPS (#18422, #18427)
|
||||
+ [lecturio] Add support for lecturio.com (#18405)
|
||||
+ [nrktv:series] Add support for extra materials
|
||||
* [nrktv:season,series] Fix extraction (#17159, #17258)
|
||||
* [nrktv] Relax URL regular expression (#18304, #18387)
|
||||
* [yourporn] Fix extraction (#18424, #18425)
|
||||
* [tbs] Fix info extraction (#18403)
|
||||
+ [gamespot] Add support for review URLs
|
||||
|
||||
|
||||
version 2018.12.03
|
||||
|
||||
Core
|
||||
* [utils] Fix random_birthday to generate existing dates only (#18284)
|
||||
|
||||
Extractors
|
||||
+ [tiktok] Add support for tiktok.com (#18108, #18135)
|
||||
* [pornhub] Use actual URL host for requests (#18359)
|
||||
* [lynda] Fix authentication (#18158, #18217)
|
||||
* [gfycat] Update API endpoint (#18333, #18343)
|
||||
+ [hotstar] Add support for alternative app state layout (#18320)
|
||||
* [azmedien] Fix extraction (#18334, #18336)
|
||||
+ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
|
||||
* [joj] Fix extraction (#18280, #18281)
|
||||
+ [wistia] Add support for fast.wistia.com (#18287)
|
||||
|
||||
|
||||
version 2018.11.23
|
||||
|
||||
Core
|
||||
+ [setup.py] Add more relevant classifiers
|
||||
|
||||
Extractors
|
||||
* [mixcloud] Fallback to hardcoded decryption key (#18016)
|
||||
* [nbc:news] Fix article extraction (#16194)
|
||||
* [foxsports] Fix extraction (#17543)
|
||||
* [loc] Relax regular expression and improve formats extraction
|
||||
+ [ciscolive] Add support for ciscolive.cisco.com (#17984)
|
||||
* [nzz] Relax kaltura regex (#18228)
|
||||
* [sixplay] Fix formats extraction
|
||||
* [bitchute] Improve title extraction
|
||||
* [kaltura] Limit requested MediaEntry fields
|
||||
+ [americastestkitchen] Add support for zype embeds (#18225)
|
||||
+ [pornhub] Add pornhub.net alias
|
||||
* [nova:embed] Fix extraction (#18222)
|
||||
|
||||
|
||||
version 2018.11.18
|
||||
|
||||
Extractors
|
||||
+ [wwe] Extract subtitles
|
||||
+ [wwe] Add support for playlistst (#14781)
|
||||
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||
* [vk] Detect geo restriction (#17767)
|
||||
* [openload] Use original host during extraction (#18211)
|
||||
* [atvat] Fix extraction (#18041)
|
||||
+ [rte] Add support for new API endpoint (#18206)
|
||||
* [tnaflixnetwork:embed] Fix extraction (#18205)
|
||||
* [picarto] Use API and add token support (#16518)
|
||||
+ [zype] Add support for player.zype.com (#18143)
|
||||
* [vivo] Fix extraction (#18139)
|
||||
* [ruutu] Update API endpoint (#18138)
|
||||
|
||||
|
||||
version 2018.11.07
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add another JS signature function name regex (#18091, #18093,
|
||||
#18094)
|
||||
* [facebook] Fix tahoe request (#17171)
|
||||
* [cliphunter] Fix extraction (#18083)
|
||||
+ [youtube:playlist] Add support for invidio.us (#18077)
|
||||
* [zattoo] Arrange API hosts for derived extractors (#18035)
|
||||
+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
|
||||
|
||||
|
||||
version 2018.11.03
|
||||
|
||||
Core
|
||||
* [extractor/common] Ensure response handle is not prematurely closed before
|
||||
it can be read if it matches expected_status (#17195, #17846, #17447)
|
||||
|
||||
Extractors
|
||||
* [laola1tv:embed] Set correct stream access URL scheme (#16341)
|
||||
+ [ehftv] Add support for ehftv.com (#15408)
|
||||
* [azmedien] Adopt to major site redesign (#17745, #17746)
|
||||
+ [twitcasting] Add support for twitcasting.tv (#17981)
|
||||
* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
|
||||
+ [openload] Add support for oload.fun (#18045)
|
||||
* [njpwworld] Fix authentication (#17427)
|
||||
+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
|
||||
* [theplatform] Improve error detection (#13222)
|
||||
* [cnbc] Simplify extraction (#14280, #17110)
|
||||
+ [cbnc] Add support for new URL schema (#14193)
|
||||
* [aparat] Improve extraction and extract more metadata (#17445, #18008)
|
||||
* [aparat] Fix extraction
|
||||
|
||||
|
||||
version 2018.10.29
|
||||
|
||||
Core
|
||||
+ [extractor/common] Add validation for JSON-LD URLs
|
||||
|
||||
Extractors
|
||||
+ [sportbox] Add support for matchtv.ru
|
||||
* [sportbox] Fix extraction (#17978)
|
||||
* [screencast] Fix extraction (#14590, #14617, #17990)
|
||||
+ [openload] Add support for oload.icu
|
||||
+ [ivi] Add support for ivi.tv
|
||||
* [crunchyroll] Improve extraction failsafeness (#17991)
|
||||
* [dailymail] Fix formats extraction (#17976)
|
||||
* [viewster] Reduce format requests
|
||||
* [cwtv] Handle API errors (#17905)
|
||||
+ [rutube] Use geo verification headers (#17897)
|
||||
+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
|
||||
- [tv3] Remove extractor (#10461, #15339)
|
||||
* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
|
||||
+ [openload] Add support for oload.cc (#17823)
|
||||
+ [patreon] Extract post_file URL (#17792)
|
||||
* [patreon] Fix extraction (#14502, #10471)
|
||||
|
||||
|
||||
version 2018.10.05
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Improve authentication (#17762)
|
||||
* [dailymotion] Fix extraction (#17699)
|
||||
* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
|
||||
+ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
|
||||
* [philharmoniedeparis] Fix extraction (#17705)
|
||||
+ [jamendo] Add support for licensing.jamendo.com (#17724)
|
||||
+ [openload] Add support for oload.cloud (#17710)
|
||||
* [pluralsight] Fix subtitles extraction (#17726, #17728)
|
||||
+ [vimeo] Add another config regular expression (#17690)
|
||||
* [spike] Fix Paramount Network extraction (#17677)
|
||||
* [hotstar] Fix extraction (#14694, #14931, #17637)
|
||||
|
||||
|
||||
version 2018.09.26
|
||||
|
||||
Extractors
|
||||
|
96
README.md
96
README.md
@@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
@@ -667,7 +667,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `fps`: Frame rate
|
||||
|
||||
Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||
- `ext`: File extension
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
@@ -675,6 +675,8 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
|
||||
@@ -1024,16 +1026,20 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
@@ -1045,7 +1051,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
@@ -1053,7 +1059,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@@ -1129,11 +1135,33 @@ title = meta.get('title') or self._og_search_title(webpage)
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
### Regular expressions
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
#### Don't capture groups you don't use
|
||||
|
||||
Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
|
||||
|
||||
##### Example
|
||||
|
||||
Don't capture id attribute name here since you can't use it for anything anyway.
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
r'(?:id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
```python
|
||||
r'(id|ID)=(?P<id>\d+)'
|
||||
```
|
||||
|
||||
|
||||
#### Make regular expressions relaxed and flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
|
||||
|
||||
#### Example
|
||||
##### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
@@ -1166,9 +1194,51 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
### Long lines policy
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
|
||||
|
||||
For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
# EMBEDDING YOUTUBE-DL
|
||||
|
||||
|
@@ -33,7 +33,7 @@
|
||||
- **AdobeTVShow**
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
@@ -84,8 +84,6 @@
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
@@ -165,6 +163,8 @@
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **CiscoLiveSearch**
|
||||
- **CiscoLiveSession**
|
||||
- **CJSW**
|
||||
- **cliphunter**
|
||||
- **Clippit**
|
||||
@@ -178,6 +178,7 @@
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNBC**
|
||||
- **CNBCVideo**
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
@@ -251,6 +252,7 @@
|
||||
- **EchoMsk**
|
||||
- **egghead:course**: egghead.io course
|
||||
- **egghead:lesson**: egghead.io lesson
|
||||
- **ehftv**
|
||||
- **eHow**
|
||||
- **EinsUndEinsTV**
|
||||
- **Einthusan**
|
||||
@@ -318,6 +320,7 @@
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
- **Gaia**
|
||||
- **GameInformer**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
@@ -358,9 +361,10 @@
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HitRecord**
|
||||
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
|
||||
- **HornBunny**
|
||||
- **HotNewHipHop**
|
||||
- **HotStar**
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
@@ -368,18 +372,22 @@
|
||||
- **HRTiPlaylist**
|
||||
- **Huajiao**: 花椒直播
|
||||
- **HuffPost**: Huffington Post
|
||||
- **Hungama**
|
||||
- **HungamaSong**
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
- **ign.com**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **ImgurAlbum**
|
||||
- **imgur:album**
|
||||
- **imgur:gallery**
|
||||
- **Ina**
|
||||
- **Inc**
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:tag**: Instagram hashtag search
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **Internazionale**
|
||||
- **InternetVideoArchive**
|
||||
@@ -433,6 +441,9 @@
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **Lecturio**
|
||||
- **LecturioCourse**
|
||||
- **LecturioDeCourse**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **Lenta**
|
||||
@@ -445,6 +456,9 @@
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineTV**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
- **LinuxAcademy**
|
||||
- **LiTV**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
@@ -463,6 +477,7 @@
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MakerTV**
|
||||
- **MallTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
@@ -532,9 +547,8 @@
|
||||
- **MyviEmbed**
|
||||
- **MyVisionTV**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:episodeguide**
|
||||
- **natgeo:video**
|
||||
- **NationalGeographicTV**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@@ -634,6 +648,7 @@
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **OsnatelTV**
|
||||
- **OutsideTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PandaTV**: 熊猫TV
|
||||
@@ -658,6 +673,7 @@
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **play.fm**
|
||||
- **PlayPlusTV**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
@@ -763,6 +779,7 @@
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **SAKTV**
|
||||
- **SaltTV**
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@@ -812,13 +829,14 @@
|
||||
- **southpark.nl**
|
||||
- **southparkstudios.dk**
|
||||
- **SpankBang**
|
||||
- **SpankBangPlaylist**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
- **Spiegel:Article**: Articles on spiegel.de
|
||||
- **Spiegeltv**
|
||||
- **sport.francetvinfo.fr**
|
||||
- **Sport5**
|
||||
- **SportBoxEmbed**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
@@ -849,6 +867,8 @@
|
||||
- **TastyTrade**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**
|
||||
- **TeachableCourse**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **TeachingChannel**
|
||||
@@ -881,6 +901,8 @@
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **TikTok**
|
||||
- **TikTokUser**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
@@ -894,6 +916,7 @@
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **TruNews**
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
@@ -909,7 +932,6 @@
|
||||
- **TV2**
|
||||
- **tv2.hu**
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **TVA**
|
||||
@@ -923,7 +945,9 @@
|
||||
- **TVNet**
|
||||
- **TVNoe**
|
||||
- **TVNow**
|
||||
- **TVNowList**
|
||||
- **TVNowAnnual**
|
||||
- **TVNowNew**
|
||||
- **TVNowSeason**
|
||||
- **TVNowShow**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
@@ -931,6 +955,7 @@
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tweakers**
|
||||
- **TwitCasting**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
- **twitch:profile**
|
||||
@@ -955,8 +980,6 @@
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Upskill**
|
||||
- **UpskillCourse**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USANetwork**
|
||||
@@ -975,6 +998,7 @@
|
||||
- **VevoPlaylist**
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **vhx:embed**
|
||||
- **Viafree**
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
@@ -1037,7 +1061,6 @@
|
||||
- **Voot**
|
||||
- **VoxMedia**
|
||||
- **VoxMediaVolume**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Vrak**
|
||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||
@@ -1051,6 +1074,7 @@
|
||||
- **VVVVID**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Wakanim**
|
||||
- **Walla**
|
||||
- **WalyTV**
|
||||
- **washingtonpost**
|
||||
@@ -1078,6 +1102,7 @@
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
@@ -1137,3 +1162,4 @@
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **Zype**
|
||||
|
9
setup.py
9
setup.py
@@ -124,6 +124,8 @@ setup(
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'License :: Public Domain',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
@@ -132,6 +134,13 @@ setup(
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: Implementation',
|
||||
'Programming Language :: Python :: Implementation :: CPython',
|
||||
'Programming Language :: Python :: Implementation :: IronPython',
|
||||
'Programming Language :: Python :: Implementation :: Jython',
|
||||
'Programming Language :: Python :: Implementation :: PyPy',
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
|
@@ -7,6 +7,7 @@ import json
|
||||
import os.path
|
||||
import re
|
||||
import types
|
||||
import ssl
|
||||
import sys
|
||||
|
||||
import youtube_dl.extractor
|
||||
@@ -152,15 +153,27 @@ def expect_value(self, got, expected, field):
|
||||
isinstance(got, compat_str),
|
||||
'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
|
||||
got = 'md5:' + md5(got)
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
|
||||
self.assertTrue(
|
||||
isinstance(got, (list, dict)),
|
||||
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||
field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
op, _, expected_num = expected.partition(':')
|
||||
expected_num = int(expected_num)
|
||||
if op == 'mincount':
|
||||
assert_func = assertGreaterEqual
|
||||
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
||||
elif op == 'maxcount':
|
||||
assert_func = assertLessEqual
|
||||
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
||||
elif op == 'count':
|
||||
assert_func = assertEqual
|
||||
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
||||
else:
|
||||
assert False
|
||||
assert_func(
|
||||
self, len(got), expected_num,
|
||||
'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got)))
|
||||
msg_tmpl % (expected_num, field, len(got)))
|
||||
return
|
||||
self.assertEqual(
|
||||
expected, got,
|
||||
@@ -236,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None):
|
||||
self.assertTrue(got >= expected, msg)
|
||||
|
||||
|
||||
def assertLessEqual(self, got, expected, msg=None):
|
||||
if not (got <= expected):
|
||||
if msg is None:
|
||||
msg = '%r not less than or equal to %r' % (got, expected)
|
||||
self.assertTrue(got <= expected, msg)
|
||||
|
||||
|
||||
def assertEqual(self, got, expected, msg=None):
|
||||
if not (got == expected):
|
||||
if msg is None:
|
||||
msg = '%r not equal to %r' % (got, expected)
|
||||
self.assertTrue(got == expected, msg)
|
||||
|
||||
|
||||
def expect_warnings(ydl, warnings_re):
|
||||
real_warning = ydl.report_warning
|
||||
|
||||
@@ -244,3 +271,12 @@ def expect_warnings(ydl, warnings_re):
|
||||
real_warning(w)
|
||||
|
||||
ydl.report_warning = _report_warning
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
@@ -9,11 +9,30 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, expect_dict, expect_value
|
||||
from youtube_dl.compat import compat_etree_fromstring
|
||||
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
||||
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
import threading
|
||||
|
||||
|
||||
TEAPOT_RESPONSE_STATUS = 418
|
||||
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
||||
|
||||
|
||||
class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/teapot':
|
||||
self.send_response(TEAPOT_RESPONSE_STATUS)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
@@ -42,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
<meta content='Foo' property=og:foobar>
|
||||
<meta name="og:test1" content='foo > < bar'/>
|
||||
<meta name="og:test2" content="foo >//< bar"/>
|
||||
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
||||
'''
|
||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
@@ -50,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||
self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
|
||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||
@@ -478,7 +499,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
)
|
||||
),
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/18923
|
||||
# https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
|
||||
'ted_18923',
|
||||
'http://hls.ted.com/talks/31241.m3u8',
|
||||
[{
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '600k-Audio',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '68',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '163',
|
||||
'acodec': 'none',
|
||||
'width': 320,
|
||||
'height': 180,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '481',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '769',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '984',
|
||||
'acodec': 'none',
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '1255',
|
||||
'acodec': 'none',
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '1693',
|
||||
'acodec': 'none',
|
||||
'width': 853,
|
||||
'height': 480,
|
||||
}, {
|
||||
'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
|
||||
'format_id': '2462',
|
||||
'acodec': 'none',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
),
|
||||
]
|
||||
|
||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
||||
@@ -743,6 +821,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
for i in range(len(entries)):
|
||||
expect_dict(self, entries[i], expected_entries[i])
|
||||
|
||||
def test_response_with_expected_status_returns_content(self):
|
||||
# Checks for mitigations against the effects of
|
||||
# <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
|
||||
# manifest as `_download_webpage`, `_download_xml`, `_download_json`,
|
||||
# or the underlying `_download_webpage_handle` returning no content
|
||||
# when a response matches `expected_status`.
|
||||
|
||||
httpd = compat_http_server.HTTPServer(
|
||||
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||
port = http_server_port(httpd)
|
||||
server_thread = threading.Thread(target=httpd.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
|
||||
(content, urlh) = self.ie._download_webpage_handle(
|
||||
'http://127.0.0.1:%d/teapot' % port, None,
|
||||
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -239,6 +239,76 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
|
||||
|
||||
def test_format_selection_string_ops(self):
|
||||
formats = [
|
||||
{'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
|
||||
{'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
# equals (=)
|
||||
ydl = YDL({'format': '[format_id=abc-cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not equal (!=)
|
||||
ydl = YDL({'format': '[format_id!=abc-cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# starts with (^=)
|
||||
ydl = YDL({'format': '[format_id^=abc]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not start with (!^=)
|
||||
ydl = YDL({'format': '[format_id!^=abc]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# ends with ($=)
|
||||
ydl = YDL({'format': '[format_id$=cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not end with (!$=)
|
||||
ydl = YDL({'format': '[format_id!$=cba]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
# contains (*=)
|
||||
ydl = YDL({'format': '[format_id*=bc-cb]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'abc-cba')
|
||||
|
||||
# does not contain (!*=)
|
||||
ydl = YDL({'format': '[format_id!*=bc-cb]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'zxc-cxz')
|
||||
|
||||
ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
ydl = YDL({'format': '[format_id!*=-]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
||||
|
34
test/test_YoutubeDLCookieJar.py
Normal file
34
test/test_YoutubeDLCookieJar.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.utils import YoutubeDLCookieJar
|
||||
|
||||
|
||||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
def test_keep_session_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
try:
|
||||
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
|
||||
temp = tf.read().decode('utf-8')
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
|
||||
finally:
|
||||
tf.close()
|
||||
os.remove(tf.name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase):
|
||||
|
||||
def test_compat_expanduser(self):
|
||||
old_home = os.environ.get('HOME')
|
||||
test_str = 'C:\Documents and Settings\тест\Application Data'
|
||||
test_str = r'C:\Documents and Settings\тест\Application Data'
|
||||
compat_setenv('HOME', test_str)
|
||||
self.assertEqual(compat_expanduser('~'), test_str)
|
||||
compat_setenv('HOME', old_home or '')
|
||||
|
@@ -9,26 +9,16 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from test.helper import http_server_port, try_rm
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.downloader.http import HttpFD
|
||||
from youtube_dl.utils import encodeFilename
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
TEST_SIZE = 10 * 1024
|
||||
|
||||
|
||||
|
@@ -8,6 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import http_server_port
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||
import ssl
|
||||
@@ -16,15 +17,6 @@ import threading
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
@@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
|
@@ -507,6 +507,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(urljoin('http://foo.de/', ''), None)
|
||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
|
||||
self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
|
||||
|
||||
def test_url_or_none(self):
|
||||
self.assertEqual(url_or_none(None), None)
|
||||
|
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Netscape HTTP Cookie File
|
||||
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
|
||||
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
|
28
test/testdata/m3u8/ted_18923.m3u8
vendored
Normal file
28
test/testdata/m3u8/ted_18923.m3u8
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
#EXTM3U
|
||||
#EXT-X-VERSION:4
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
|
||||
/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
|
||||
/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
|
||||
/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
|
||||
/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
|
||||
/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
|
||||
/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
|
||||
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
|
||||
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400
|
@@ -82,12 +82,14 @@ from .utils import (
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
subtitles_filename,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
version_tuple,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLCookieJar,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
@@ -558,7 +560,7 @@ class YoutubeDL(object):
|
||||
self.restore_console_title()
|
||||
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save()
|
||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
def trouble(self, message=None, tb=None):
|
||||
"""Determine action to take when a download problem appears.
|
||||
@@ -1062,21 +1064,24 @@ class YoutubeDL(object):
|
||||
if not m:
|
||||
STR_OPERATORS = {
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
'^=': lambda attr, value: attr.startswith(value),
|
||||
'$=': lambda attr, value: attr.endswith(value),
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.search(filter_spec)
|
||||
if m:
|
||||
comparison_value = m.group('value')
|
||||
op = STR_OPERATORS[m.group('op')]
|
||||
str_op = STR_OPERATORS[m.group('op')]
|
||||
if m.group('negation'):
|
||||
op = lambda attr, value: not str_op(attr, value)
|
||||
else:
|
||||
op = str_op
|
||||
|
||||
if not m:
|
||||
raise ValueError('Invalid filter specification %r' % filter_spec)
|
||||
@@ -2056,15 +2061,24 @@ class YoutubeDL(object):
|
||||
self.report_warning('Unable to remove downloaded original file')
|
||||
|
||||
def _make_archive_id(self, info_dict):
|
||||
video_id = info_dict.get('id')
|
||||
if not video_id:
|
||||
return
|
||||
# Future-proof against any change in case
|
||||
# and backwards compatibility with prior versions
|
||||
extractor = info_dict.get('extractor_key')
|
||||
extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
if 'id' in info_dict:
|
||||
extractor = info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
return None # Incomplete video information
|
||||
return extractor.lower() + ' ' + info_dict['id']
|
||||
url = str_or_none(info_dict.get('url'))
|
||||
if not url:
|
||||
return
|
||||
# Try to find matching extractor for the URL and take its ie_key
|
||||
for ie in self._ies:
|
||||
if ie.suitable(url):
|
||||
extractor = ie.ie_key()
|
||||
break
|
||||
else:
|
||||
return
|
||||
return extractor.lower() + ' ' + video_id
|
||||
|
||||
def in_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
@@ -2072,7 +2086,7 @@ class YoutubeDL(object):
|
||||
return False
|
||||
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
if vid_id is None:
|
||||
if not vid_id:
|
||||
return False # Incomplete video information
|
||||
|
||||
try:
|
||||
@@ -2297,10 +2311,9 @@ class YoutubeDL(object):
|
||||
self.cookiejar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
opts_cookiefile = expand_path(opts_cookiefile)
|
||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||
opts_cookiefile)
|
||||
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
||||
if os.access(opts_cookiefile, os.R_OK):
|
||||
self.cookiejar.load()
|
||||
self.cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
|
@@ -121,7 +121,11 @@ class CurlFD(ExternalFD):
|
||||
cmd += self._valueless_option('--silent', 'noprogress')
|
||||
cmd += self._valueless_option('--verbose', 'verbose')
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
cmd += self._option('--retry', 'retries')
|
||||
retry = self._option('--retry', 'retries')
|
||||
if len(retry) == 2:
|
||||
if retry[1] in ('inf', 'infinite'):
|
||||
retry[1] = '2147483647'
|
||||
cmd += retry
|
||||
cmd += self._option('--max-filesize', 'max_filesize')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
@@ -160,6 +164,12 @@ class WgetFD(ExternalFD):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
retry = self._option('--tries', 'retries')
|
||||
if len(retry) == 2:
|
||||
if retry[1] in ('inf', 'infinite'):
|
||||
retry[1] = '0'
|
||||
cmd += retry
|
||||
cmd += self._option('--bind-address', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||
|
@@ -75,10 +75,14 @@ class HlsFD(FragmentFD):
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
def is_ad_fragment(s):
|
||||
def is_ad_fragment_start(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
|
||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
|
||||
def is_ad_fragment_end(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or
|
||||
s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||
|
||||
media_frags = 0
|
||||
ad_frags = 0
|
||||
ad_frag_next = False
|
||||
@@ -87,12 +91,13 @@ class HlsFD(FragmentFD):
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith('#'):
|
||||
if is_ad_fragment(line):
|
||||
ad_frags += 1
|
||||
if is_ad_fragment_start(line):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
continue
|
||||
if ad_frag_next:
|
||||
ad_frag_next = False
|
||||
ad_frags += 1
|
||||
continue
|
||||
media_frags += 1
|
||||
|
||||
@@ -123,7 +128,6 @@ class HlsFD(FragmentFD):
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
if ad_frag_next:
|
||||
ad_frag_next = False
|
||||
continue
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
@@ -196,8 +200,10 @@ class HlsFD(FragmentFD):
|
||||
'start': sub_range_start,
|
||||
'end': sub_range_start + int(splitted_byte_range[0]),
|
||||
}
|
||||
elif is_ad_fragment(line):
|
||||
elif is_ad_fragment_start(line):
|
||||
ad_frag_next = True
|
||||
elif is_ad_fragment_end(line):
|
||||
ad_frag_next = False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@@ -17,25 +17,15 @@ from ..utils import (
|
||||
|
||||
class ACastIE(InfoExtractor):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:embed|www)\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# test with one bling
|
||||
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
|
||||
'md5': 'ada3de5a1e3a2a381327d749854788bb',
|
||||
'info_dict': {
|
||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
||||
'ext': 'mp3',
|
||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'timestamp': 1196172000,
|
||||
'upload_date': '20071127',
|
||||
'duration': 211,
|
||||
'creator': 'Concierge',
|
||||
'series': 'Condé Nast Traveler Podcast',
|
||||
'episode': '"Where Are You?": Taipei 101, Taiwan',
|
||||
}
|
||||
}, {
|
||||
# test with multiple blings
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||
'info_dict': {
|
||||
@@ -50,6 +40,12 @@ class ACastIE(InfoExtractor):
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -83,17 +79,27 @@ class ACastIE(InfoExtractor):
|
||||
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.acast.com/condenasttraveler',
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?acast\.com/|
|
||||
play\.acast\.com/s/
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/todayinfocus',
|
||||
'info_dict': {
|
||||
'id': '50544219-29bb-499e-a083-6087f4cb7797',
|
||||
'title': 'Condé Nast Traveler Podcast',
|
||||
'description': 'md5:98646dee22a5b386626ae31866638fbd',
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||
'playlist_mincount': 35,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE_URL = 'https://play.acast.com/api/'
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@classmethod
|
||||
@@ -106,7 +112,7 @@ class ACastChannelIE(InfoExtractor):
|
||||
channel_slug, note='Download page %d of channel data' % page)
|
||||
for cast in casts:
|
||||
yield self.url_result(
|
||||
'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
|
||||
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
||||
'ACast', cast['id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -22,18 +22,19 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/
|
||||
(?:
|
||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||
specials/(?P<special_display_id>[^/]+)/full-special
|
||||
specials/(?P<special_display_id>[^/]+)/full-special|
|
||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
@@ -80,6 +81,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
@@ -90,9 +94,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
|
@@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
@@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
|
||||
zype_id = ep_meta.get('zype_id')
|
||||
if zype_id:
|
||||
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
|
||||
ie_key = 'Zype'
|
||||
else:
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
|
||||
ie_key = 'Kaltura'
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
@@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'url': embed_url,
|
||||
'ie_key': ie_key,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
@@ -12,59 +13,83 @@ from ..utils import (
|
||||
class AparatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||
'info_dict': {
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
'title': 'تیم گلکسی 11 - زومیت',
|
||||
'age_limit': 0,
|
||||
'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
|
||||
'duration': 231,
|
||||
'timestamp': 1387394859,
|
||||
'upload_date': '20131218',
|
||||
'view_count': int,
|
||||
},
|
||||
# 'skip': 'Extremely unreliable',
|
||||
}
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'https://www.aparat.com/v/8dflw/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
# Provides more metadata
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
file_list = self._parse_json(
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
|
||||
'file list'),
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
|
||||
formats = []
|
||||
for item in file_list[0]:
|
||||
file_url = url_or_none(item.get('file'))
|
||||
if not file_url:
|
||||
continue
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': label or ext,
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height', default=None)),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
for sources in player['multiSRC']:
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
file_url = url_or_none(item.get('src'))
|
||||
if not file_url:
|
||||
continue
|
||||
item_type = item.get('type')
|
||||
if item_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': 'http-%s' % (label or ext),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height',
|
||||
default=None)),
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return {
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@@ -8,20 +8,23 @@ from .generic import GenericIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
qualities,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
qualities,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
@@ -51,8 +54,15 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
media_info = self._download_json(
|
||||
media_info_url, video_id, 'Downloading media JSON')
|
||||
@@ -173,13 +183,18 @@ class ARDMediathekIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'meta description')
|
||||
'description', webpage, 'meta description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+class="teasertext">(.+?)</p>',
|
||||
webpage, 'teaser text', default=None)
|
||||
|
||||
# Thumbnail is sometimes not present.
|
||||
# It is in the mobile version, but that seems to use a different URL
|
||||
@@ -288,7 +303,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
|
||||
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||
@@ -302,12 +317,18 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'upload_date': '20180826',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||
@@ -318,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
'display_id': display_id,
|
||||
}
|
||||
formats = []
|
||||
subtitles = {}
|
||||
geoblocked = False
|
||||
for widget in data.values():
|
||||
if widget.get('_geoblocked'):
|
||||
raise ExtractorError('This video is not available due to geoblocking', expected=True)
|
||||
|
||||
if widget.get('_geoblocked') is True:
|
||||
geoblocked = True
|
||||
if '_duration' in widget:
|
||||
res['duration'] = widget['_duration']
|
||||
res['duration'] = int_or_none(widget['_duration'])
|
||||
if 'clipTitle' in widget:
|
||||
res['title'] = widget['clipTitle']
|
||||
if '_previewImage' in widget:
|
||||
res['thumbnail'] = widget['_previewImage']
|
||||
if 'broadcastedOn' in widget:
|
||||
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
|
||||
res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
|
||||
if 'synopsis' in widget:
|
||||
res['description'] = widget['synopsis']
|
||||
if '_subtitleUrl' in widget:
|
||||
res['subtitles'] = {'de': [{
|
||||
subtitle_url = url_or_none(widget.get('_subtitleUrl'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('de', []).append({
|
||||
'ext': 'ttml',
|
||||
'url': widget['_subtitleUrl'],
|
||||
}]}
|
||||
'url': subtitle_url,
|
||||
})
|
||||
if '_quality' in widget:
|
||||
format_url = widget['_stream']['json'][0]
|
||||
|
||||
if format_url.endswith('.f4m'):
|
||||
format_url = url_or_none(try_get(
|
||||
widget, lambda x: x['_stream']['json'][0]))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=3.11.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif format_url.endswith('m3u8'):
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
format_url, video_id, 'mp4', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
# HTTP formats are not available when geoblocked is True,
|
||||
# other formats are fine though
|
||||
if geoblocked:
|
||||
continue
|
||||
quality = str_or_none(widget.get('_quality'))
|
||||
formats.append({
|
||||
'format_id': 'http-' + widget['_quality'],
|
||||
'format_id': ('http-' + quality) if quality else 'http',
|
||||
'url': format_url,
|
||||
'preference': 10, # Plain HTTP, that's nice
|
||||
})
|
||||
|
||||
if not formats and geoblocked:
|
||||
self.raise_geo_restricted(
|
||||
msg='This video is not available due to geoblocking',
|
||||
countries=['DE'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
res['formats'] = formats
|
||||
res.update({
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
return res
|
||||
|
@@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
||||
webpage, 'player data')), display_id)['config']['initial_video']
|
||||
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||
webpage, 'player data', group='json')),
|
||||
display_id)['config']['initial_video']
|
||||
|
||||
video_id = video_data['id']
|
||||
video_title = video_data['title']
|
||||
|
@@ -62,7 +62,7 @@ class AudiomackIE(InfoExtractor):
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(api_response['url']):
|
||||
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
||||
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
|
||||
|
||||
return {
|
||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||
|
@@ -1,213 +1,86 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AZMedienBaseIE(InfoExtractor):
|
||||
def _kaltura_video(self, partner_id, entry_id):
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
||||
video_id=entry_id)
|
||||
|
||||
|
||||
class AZMedienIE(AZMedienBaseIE):
|
||||
class AZMedienIE(InfoExtractor):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?P<host>
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
[0-9]+-show-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
||||
\#
|
||||
)|
|
||||
\#
|
||||
[^/]+/
|
||||
(?P<id>
|
||||
[^/]+-(?P<article_id>\d+)
|
||||
)
|
||||
(?P<id>[^\#]+)
|
||||
(?:
|
||||
\#video=
|
||||
(?P<kaltura_id>
|
||||
[_0-9a-z]+
|
||||
)
|
||||
)?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'segment'
|
||||
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
||||
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'info_dict': {
|
||||
'id': '1_2444peh4',
|
||||
'id': '1_anruz3wy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
||||
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
||||
'uploader_id': 'TeleZ?ri',
|
||||
'upload_date': '20161218',
|
||||
'timestamp': 1482084490,
|
||||
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
||||
'uploader_id': 'TVOnline',
|
||||
'upload_date': '20180930',
|
||||
'timestamp': 1538328802,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# URL with 'segment' and fragment:
|
||||
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'episode' and fragment:
|
||||
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'show' and fragment:
|
||||
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
||||
webpage, 'kaltura partner id')
|
||||
entry_id = self._html_search_regex(
|
||||
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
||||
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
||||
|
||||
return self._kaltura_video(partner_id, entry_id)
|
||||
|
||||
|
||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?P<id>[0-9]+-
|
||||
(?:
|
||||
show|
|
||||
topic|
|
||||
themen
|
||||
)-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
)?
|
||||
)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'episode'
|
||||
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'info_dict': {
|
||||
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
# URL with 'themen'
|
||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
||||
'info_dict': {
|
||||
'id': '258-themen-tele-m1-classics',
|
||||
'title': 'Tele M1 Classics',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
# URL with 'topic', contains nested playlists
|
||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# URL with 'show' only
|
||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||
'only_matching': True
|
||||
}]
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
entry_id = mobj.group('kaltura_id')
|
||||
|
||||
entries = []
|
||||
if not entry_id:
|
||||
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
|
||||
payload = {
|
||||
'query': '''query VideoContext($articleId: ID!) {
|
||||
article: node(id: $articleId) {
|
||||
... on Article {
|
||||
mainAssetRelation {
|
||||
asset {
|
||||
... on VideoAsset {
|
||||
kalturaId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
|
||||
}
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
data=json.dumps(payload).encode())
|
||||
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id', default=None)
|
||||
|
||||
if partner_id:
|
||||
entries = [
|
||||
self._kaltura_video(partner_id, m.group('id'))
|
||||
for m in re.finditer(
|
||||
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
||||
# ie_key
|
||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
||||
self.url_result(urljoin(url, m.group('url')))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
webpage, 'title',
|
||||
default=strip_or_none(get_element_by_id(
|
||||
'video-title', webpage)), group='title')
|
||||
|
||||
return self.playlist_result(entries, show_id, title)
|
||||
|
||||
|
||||
class AZMedienShowPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien show playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?:
|
||||
all-episodes|
|
||||
alle-episoden
|
||||
)/
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
|
||||
'info_dict': {
|
||||
'id': 'astrotalk',
|
||||
'title': 'TeleZüri: AstroTalk - alle episoden',
|
||||
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
episodes = get_element_by_class('search-mobile-box', webpage)
|
||||
entries = [self.url_result(
|
||||
urljoin(url, m.group('url'))) for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id)
|
||||
|
@@ -1,8 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
import re
|
||||
import xml
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -17,6 +18,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@@ -310,7 +312,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
cc_url = url_or_none(connection.get('href'))
|
||||
if not cc_url:
|
||||
continue
|
||||
captions = self._download_xml(
|
||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||
if not isinstance(captions, xml.etree.ElementTree.Element):
|
||||
continue
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
{
|
||||
@@ -795,6 +803,15 @@ class BBCIE(BBCCoUkIE):
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
'id': 'p06w9tws',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -945,6 +962,15 @@ class BBCIE(BBCCoUkIE):
|
||||
if entries:
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
|
||||
group_id = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||
webpage, 'group id', default=None)
|
||||
if playlist_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
||||
|
@@ -1,15 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
urljoin,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,29 +31,9 @@ class BeegIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cpl_url = self._search_regex(
|
||||
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
|
||||
webpage, 'cpl', default=None, group='url')
|
||||
|
||||
cpl_url = urljoin(url, cpl_url)
|
||||
|
||||
beeg_version, beeg_salt = [None] * 2
|
||||
|
||||
if cpl_url:
|
||||
cpl = self._download_webpage(
|
||||
self._proto_relative_url(cpl_url), video_id,
|
||||
'Downloading cpl JS', fatal=False)
|
||||
if cpl:
|
||||
beeg_version = int_or_none(self._search_regex(
|
||||
r'beeg_version\s*=\s*([^\b]+)', cpl,
|
||||
'beeg version', default=None)) or self._search_regex(
|
||||
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
|
||||
beeg_salt = self._search_regex(
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '2185'
|
||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
@@ -68,37 +43,6 @@ class BeegIE(InfoExtractor):
|
||||
if video:
|
||||
break
|
||||
|
||||
def split(o, e):
|
||||
def cut(s, x):
|
||||
n.append(s[:x])
|
||||
return s[x:]
|
||||
n = []
|
||||
r = len(o) % e
|
||||
if r > 0:
|
||||
o = cut(o, r)
|
||||
while len(o) > e:
|
||||
o = cut(o, e)
|
||||
n.append(o)
|
||||
return n
|
||||
|
||||
def decrypt_key(key):
|
||||
# Reverse engineered from http://static.beeg.com/cpl/1738.js
|
||||
a = beeg_salt
|
||||
e = compat_urllib_parse_unquote(key)
|
||||
o = ''.join([
|
||||
compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
|
||||
for n in range(len(e))])
|
||||
return ''.join(split(o, 3)[::-1])
|
||||
|
||||
def decrypt_url(encrypted_url):
|
||||
encrypted_url = self._proto_relative_url(
|
||||
encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
|
||||
key = self._search_regex(
|
||||
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
|
||||
if not key:
|
||||
return encrypted_url
|
||||
return encrypted_url.replace(key, decrypt_key(key))
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video.items():
|
||||
if not video_url:
|
||||
@@ -108,18 +52,20 @@ class BeegIE(InfoExtractor):
|
||||
if not height:
|
||||
continue
|
||||
formats.append({
|
||||
'url': decrypt_url(video_url),
|
||||
'url': self._proto_relative_url(
|
||||
video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
|
||||
'format_id': format_id,
|
||||
'height': int(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
video_id = video.get('id') or video_id
|
||||
video_id = compat_str(video.get('id') or video_id)
|
||||
display_id = video.get('code')
|
||||
description = video.get('desc')
|
||||
series = video.get('ps_name')
|
||||
|
||||
timestamp = parse_iso8601(video.get('date'), ' ')
|
||||
timestamp = unified_timestamp(video.get('date'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
|
||||
@@ -129,6 +75,7 @@ class BeegIE(InfoExtractor):
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'tags': tags,
|
||||
|
@@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor):
|
||||
}]
|
||||
}]
|
||||
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
|
||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||
_BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
|
||||
|
||||
def _report_error(self, result):
|
||||
if 'message' in result:
|
||||
|
@@ -5,7 +5,10 @@ import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urlencode_postdata
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
@@ -37,16 +40,22 @@ class BitChuteIE(InfoExtractor):
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
})
|
||||
|
||||
title = self._search_regex(
|
||||
title = self._html_search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
format_urls = []
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
format_urls.append(mobj.group('url'))
|
||||
format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
|
||||
|
||||
formats = [
|
||||
{'url': mobj.group('url')}
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
|
||||
{'url': format_url}
|
||||
for format_url in orderedSet(format_urls)]
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
|
@@ -1,8 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
@@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _brightcove_new_url_result(self, publisher_id, video_id):
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
|
||||
def _get_video_info(self, video_id, query, referer=None):
|
||||
headers = {}
|
||||
linkBase = query.get('linkBaseURL')
|
||||
@@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
publisher_id = query.get('publisherId')
|
||||
if publisher_id and publisher_id[0].isdigit():
|
||||
publisher_id = publisher_id[0]
|
||||
if not publisher_id:
|
||||
player_key = query.get('playerKey')
|
||||
if player_key and ',' in player_key[0]:
|
||||
player_key = player_key[0]
|
||||
else:
|
||||
player_id = query.get('playerID')
|
||||
if player_id and player_id[0].isdigit():
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
|
||||
player_page, 'player key', fatal=False)
|
||||
if player_key:
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
return self._brightcove_new_url_result(publisher_id, video_id)
|
||||
raise ExtractorError(
|
||||
'brightcove said: %s' % error_msg, expected=True)
|
||||
|
||||
@@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
else:
|
||||
return ad_info
|
||||
|
||||
if 'url' not in info and not info.get('formats'):
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
if not info.get('url') and not info.get('formats'):
|
||||
uploader_id = info.get('uploader_id')
|
||||
if uploader_id:
|
||||
info.update(self._brightcove_new_url_result(uploader_id, video_id))
|
||||
else:
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
return info
|
||||
|
||||
|
||||
|
@@ -14,6 +14,7 @@ class CamModelsIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
|
||||
'only_matching': True,
|
||||
'age_limit': 18
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -93,4 +94,5 @@ class CamModelsIE(InfoExtractor):
|
||||
'title': self._live_title(user_id),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -20,6 +20,7 @@ class CamTubeIE(InfoExtractor):
|
||||
'duration': 1274,
|
||||
'timestamp': 1528018608,
|
||||
'upload_date': '20180603',
|
||||
'age_limit': 18
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -66,4 +67,5 @@ class CamTubeIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'creator': creator,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -25,6 +25,7 @@ class CamWithHerIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'uploader': 'MileenaK',
|
||||
'upload_date': '20160322',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -84,4 +85,5 @@ class CamWithHerIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18
|
||||
}
|
||||
|
@@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
videomore_url = VideomoreIE._extract_url(webpage)
|
||||
if not videomore_url:
|
||||
videomore_id = self._search_regex(
|
||||
r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
|
||||
default=None)
|
||||
if videomore_id:
|
||||
videomore_url = 'videomore:%s' % videomore_id
|
||||
if videomore_url:
|
||||
title = self._og_search_title(webpage)
|
||||
return {
|
||||
|
@@ -1,20 +1,19 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
||||
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
|
||||
'info_dict': {
|
||||
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
||||
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starfire the Cat Lady',
|
||||
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
||||
'title': 'How to Draw Upgrade',
|
||||
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
||||
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
||||
return self._extract_cvp_info(
|
||||
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
|
||||
},
|
||||
}, {
|
||||
|
||||
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
|
||||
metadata_re = ''
|
||||
if content_re:
|
||||
metadata_re = r'|video_metadata\.content_' + content_re
|
||||
return self._search_regex(
|
||||
r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
|
||||
webpage, name, fatal=fatal)
|
||||
|
||||
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
|
||||
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, {'networkId': 'cartoonnetwork'}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': self._search_regex(
|
||||
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
|
||||
webpage, 'auth required', default='false') == 'true',
|
||||
'auth_required': find_field('authType', 'auth type') != 'unauth',
|
||||
})
|
||||
|
||||
series = find_field(
|
||||
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
|
||||
info.update({
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'series': series,
|
||||
'episode': title,
|
||||
})
|
||||
|
||||
for field in ('season', 'episode'):
|
||||
field_name = field + 'Number'
|
||||
info[field + '_number'] = int_or_none(find_field(
|
||||
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
|
||||
|
||||
return info
|
||||
|
142
youtube_dl/extractor/ciscolive.py
Normal file
142
youtube_dl/extractor/ciscolive.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class CiscoLiveBaseIE(InfoExtractor):
|
||||
# These appear to be constant across all Cisco Live presentations
|
||||
# and are not tied to any user session or event
|
||||
RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
|
||||
RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
|
||||
RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
|
||||
|
||||
HEADERS = {
|
||||
'Origin': 'https://ciscolive.cisco.com',
|
||||
'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
|
||||
'rfWidgetId': RAINFOCUS_WIDGET_ID,
|
||||
}
|
||||
|
||||
def _call_api(self, ep, rf_id, query, referrer, note=None):
|
||||
headers = self.HEADERS.copy()
|
||||
headers['Referer'] = referrer
|
||||
return self._download_json(
|
||||
self.RAINFOCUS_API_URL % ep, rf_id, note=note,
|
||||
data=urlencode_postdata(query), headers=headers)
|
||||
|
||||
def _parse_rf_item(self, rf_item):
|
||||
event_name = rf_item.get('eventName')
|
||||
title = rf_item['title']
|
||||
description = clean_html(rf_item.get('abstract'))
|
||||
presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
|
||||
bc_id = rf_item['videos'][0]['url']
|
||||
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
|
||||
duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
|
||||
location = try_get(rf_item, lambda x: x['times'][0]['room'])
|
||||
|
||||
if duration:
|
||||
duration = duration * 60
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': bc_url,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'creator': presenter_name,
|
||||
'location': location,
|
||||
'series': event_name,
|
||||
}
|
||||
|
||||
|
||||
class CiscoLiveSessionIE(CiscoLiveBaseIE):
|
||||
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
|
||||
'md5': 'c98acf395ed9c9f766941c70f5352e22',
|
||||
'info_dict': {
|
||||
'id': '5803694304001',
|
||||
'ext': 'mp4',
|
||||
'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
|
||||
'description': 'md5:ec4a436019e09a918dec17714803f7cc',
|
||||
'timestamp': 1530305395,
|
||||
'upload_date': '20180629',
|
||||
'uploader_id': '5647924234001',
|
||||
'location': '16B Mezz.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
rf_id = self._match_id(url)
|
||||
rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
|
||||
return self._parse_rf_item(rf_result['items'][0])
|
||||
|
||||
|
||||
class CiscoLiveSearchIE(CiscoLiveBaseIE):
|
||||
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
|
||||
_TESTS = [{
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
|
||||
'info_dict': {
|
||||
'title': 'Search query',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}, {
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
|
||||
|
||||
@staticmethod
|
||||
def _check_bc_id_exists(rf_item):
|
||||
return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
|
||||
|
||||
def _entries(self, query, url):
|
||||
query['size'] = 50
|
||||
query['from'] = 0
|
||||
for page_num in itertools.count(1):
|
||||
results = self._call_api(
|
||||
'search', None, query, url,
|
||||
'Downloading search JSON page %d' % page_num)
|
||||
sl = try_get(results, lambda x: x['sectionList'][0], dict)
|
||||
if sl:
|
||||
results = sl
|
||||
items = results.get('items')
|
||||
if not items or not isinstance(items, list):
|
||||
break
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if not self._check_bc_id_exists(item):
|
||||
continue
|
||||
yield self._parse_rf_item(item)
|
||||
size = int_or_none(results.get('size'))
|
||||
if size is not None:
|
||||
query['size'] = size
|
||||
total = int_or_none(results.get('total'))
|
||||
if total is not None and query['from'] + query['size'] > total:
|
||||
break
|
||||
query['from'] += query['size']
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
query['type'] = 'session'
|
||||
return self.playlist_result(
|
||||
self._entries(query, url), playlist_title='Search query')
|
@@ -1,19 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
_translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
||||
'y': 'l', 'z': 'i',
|
||||
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
|
||||
}
|
||||
|
||||
|
||||
def _decode(s):
|
||||
return ''.join(_translation_table.get(c, c) for c in s)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
@@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, f in gexo_files.items():
|
||||
video_url = f.get('url')
|
||||
video_url = url_or_none(f.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = f.get('fmt')
|
||||
height = f.get('h')
|
||||
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
|
||||
formats.append({
|
||||
'url': _decode(video_url),
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(f.get('w')),
|
||||
'height': int_or_none(height),
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
@@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
|
||||
{'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class CNBCVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||
'info_dict': {
|
||||
'id': '7000031301',
|
||||
'ext': 'mp4',
|
||||
'title': "Trump: I don't necessarily agree with raising rates",
|
||||
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
|
||||
'timestamp': 1531958400,
|
||||
'upload_date': '20180719',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
||||
'video id')
|
||||
return self.url_result(
|
||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
||||
CNBCIE.ie_key())
|
||||
|
@@ -119,11 +119,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
return self.url_result(cnn_url, CNNIE.ie_key())
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
@@ -145,8 +141,4 @@ class CNNArticleIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'http://cnn.com/video/?/video/' + cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
|
||||
|
@@ -69,6 +69,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
urljoin,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -605,6 +606,11 @@ class InfoExtractor(object):
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if isinstance(err, compat_urllib_error.HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
|
||||
if errnote is False:
|
||||
@@ -1052,7 +1058,7 @@ class InfoExtractor(object):
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
||||
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
||||
% {'prop': re.escape(prop)})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
return [
|
||||
@@ -1213,10 +1219,10 @@ class InfoExtractor(object):
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
@@ -1233,17 +1239,30 @@ class InfoExtractor(object):
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type in ('TVEpisode', 'Episode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
'episode': unescapeHTML(e.get('name')),
|
||||
'episode': episode_name,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
})
|
||||
if not info.get('title') and episode_name:
|
||||
info['title'] = episode_name
|
||||
part_of_season = e.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
info.update({
|
||||
'season': unescapeHTML(part_of_season.get('name')),
|
||||
'season_number': int_or_none(part_of_season.get('seasonNumber')),
|
||||
})
|
||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Movie':
|
||||
info.update({
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('dateCreated')),
|
||||
})
|
||||
elif item_type in ('Article', 'NewsArticle'):
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||
@@ -1580,6 +1599,7 @@ class InfoExtractor(object):
|
||||
# References:
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||
# 2. https://github.com/rg3/youtube-dl/issues/12211
|
||||
# 3. https://github.com/rg3/youtube-dl/issues/18923
|
||||
|
||||
# We should try extracting formats only from master playlists [1, 4.3.4],
|
||||
# i.e. playlists that describe available qualities. On the other hand
|
||||
@@ -1651,11 +1671,16 @@ class InfoExtractor(object):
|
||||
rendition = stream_group[0]
|
||||
return rendition.get('NAME') or stream_group_id
|
||||
|
||||
# parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
|
||||
# chance to detect video only formats when EXT-X-STREAM-INF tags
|
||||
# precede EXT-X-MEDIA tags in HLS manifest such as [3].
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_stream_inf = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
@@ -2608,7 +2633,7 @@ class InfoExtractor(object):
|
||||
'id': this_video_id,
|
||||
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
@@ -2635,12 +2660,9 @@ class InfoExtractor(object):
|
||||
for source in jwplayer_sources_data:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = self._proto_relative_url(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
if source_url in urls:
|
||||
source_url = urljoin(
|
||||
base_url, self._proto_relative_url(source.get('file')))
|
||||
if not source_url or source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
|
@@ -1,7 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
@@ -48,6 +51,21 @@ class CrackleIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 480,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -59,13 +77,16 @@ class CrackleIE(InfoExtractor):
|
||||
|
||||
for country in countries:
|
||||
try:
|
||||
# Authorization generation algorithm is reverse engineered from:
|
||||
# https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
|
||||
media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
|
||||
timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
|
||||
h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
|
||||
media = self._download_json(
|
||||
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
|
||||
% (video_id, country), video_id,
|
||||
'Downloading media JSON as %s' % country,
|
||||
'Unable to download media JSON', query={
|
||||
'disableProtocols': 'true',
|
||||
'format': 'json'
|
||||
media_detail_url, video_id, 'Downloading media JSON as %s' % country,
|
||||
'Unable to download media JSON', headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': '|'.join([h, timestamp, '117', '1']),
|
||||
})
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
@@ -95,6 +116,20 @@ class CrackleIE(InfoExtractor):
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif format_url.endswith('.ism/Manifest'):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
mfs_path = e.get('Type')
|
||||
mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
|
||||
if not mfs_info:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media.get('Description')
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree as etree
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
@@ -45,7 +46,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
data['req'] = 'RpcApi' + method
|
||||
data = compat_urllib_parse_urlencode(data).encode('utf-8')
|
||||
return self._download_xml(
|
||||
'http://www.crunchyroll.com/xml/',
|
||||
'https://www.crunchyroll.com/xml/',
|
||||
video_id, note, fatal=False, data=data, headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
@@ -143,7 +144,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
@@ -268,6 +269,9 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-723735',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@@ -398,7 +402,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'Downloading subtitles for ' + sub_name, data={
|
||||
'subtitle_script_id': sub_id,
|
||||
})
|
||||
if sub_doc is None:
|
||||
if not isinstance(sub_doc, etree.Element):
|
||||
continue
|
||||
sid = sub_doc.get('id')
|
||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||
@@ -515,7 +519,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if streamdata is not None:
|
||||
if isinstance(streamdata, etree.Element):
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
@@ -526,7 +530,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if stream_info is not None:
|
||||
if isinstance(stream_info, etree.Element):
|
||||
stream_infos.append(stream_info)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
@@ -598,10 +602,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||
webpage, 'series', fatal=False)
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
|
||||
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
|
||||
season = episode = episode_number = duration = thumbnail = None
|
||||
|
||||
if isinstance(metadata, etree.Element):
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||
thumbnail = xpath_text(metadata, 'episode_image_url')
|
||||
|
||||
if not episode:
|
||||
episode = media_metadata.get('title')
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||
if not thumbnail:
|
||||
thumbnail = media_metadata.get('thumbnail', {}).get('url')
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
@@ -611,8 +627,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'duration': float_or_none(media_metadata.get('duration'), 1000),
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': series,
|
||||
|
@@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
self._handle_errors(result)
|
||||
self._auth_token = result['message']['auth_token']
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
@@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
return self._extract_media_info(media)
|
||||
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}
|
||||
'playlist_mincount': 17,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/series/2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_id = self._match_id(url)
|
||||
@@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
'collections/' + collection_id, collection_id)
|
||||
entries = []
|
||||
for media in collection.get('media', []):
|
||||
entries.append(self._extract_media_info(media))
|
||||
media_id = compat_str(media.get('id'))
|
||||
entries.append(self.url_result(
|
||||
'https://curiositystream.com/video/' + media_id,
|
||||
CuriosityStreamIE.ie_key(), media_id))
|
||||
return self.playlist_result(
|
||||
entries, collection_id,
|
||||
collection.get('title'), collection.get('description'))
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
@@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)['video']
|
||||
video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
|
||||
|
||||
|
@@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor):
|
||||
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
||||
|
||||
video_sources = self._download_json(sources_url, video_id)
|
||||
body = video_sources.get('body')
|
||||
if body:
|
||||
video_sources = body
|
||||
|
||||
formats = []
|
||||
for rendition in video_sources['renditions']:
|
||||
|
@@ -22,7 +22,10 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -171,10 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
player = self._parse_json(player_v5, video_id, fatal=False) or {}
|
||||
metadata = try_get(player, lambda x: x['metadata'], dict)
|
||||
if not metadata:
|
||||
metadata_url = url_or_none(try_get(
|
||||
player, lambda x: x['context']['metadata_template_url1']))
|
||||
if metadata_url:
|
||||
metadata_url = metadata_url.replace(':videoId', video_id)
|
||||
else:
|
||||
metadata_url = update_url_query(
|
||||
'https://www.dailymotion.com/player/metadata/video/%s'
|
||||
% video_id, {
|
||||
'embedder': url,
|
||||
'integration': 'inline',
|
||||
'GK_PV5_NEON': '1',
|
||||
})
|
||||
metadata = self._download_json(
|
||||
metadata_url, video_id, 'Downloading metadata JSON')
|
||||
|
||||
if metadata.get('error', {}).get('type') == 'password_protected':
|
||||
if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password:
|
||||
r = int(metadata['id'][1:], 36)
|
||||
|
@@ -17,16 +17,29 @@ from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocity
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
(?:www\.)?
|
||||
(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocity
|
||||
)|
|
||||
watch\.
|
||||
(?:
|
||||
hgtv|
|
||||
foodnetwork|
|
||||
travelchannel|
|
||||
diynetwork|
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
||||
@@ -71,7 +84,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
||||
'https://%s.com/anonymous' % site, display_id, query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
@@ -81,11 +94,12 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
stream = self._download_json(
|
||||
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
||||
display_id, headers={
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
})
|
||||
display_id, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
|
@@ -4,7 +4,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@@ -65,6 +67,9 @@ class DrTuberIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(video_data.get('duration')) or parse_duration(
|
||||
video_data.get('duration_format'))
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
|
||||
r'<title>([^<]+)\s*@\s+DrTuber',
|
||||
@@ -103,4 +108,5 @@ class DrTuberIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'duration': duration,
|
||||
}
|
||||
|
@@ -1,15 +1,25 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,23 +30,31 @@ class DRTVIE(InfoExtractor):
|
||||
IE_NAME = 'drtv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '7ae17b4e18eb5d29212f424a7511c184',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
'info_dict': {
|
||||
'id': 'klassen-darlig-taber-10',
|
||||
'ext': 'mp4',
|
||||
'title': 'Klassen - Dårlig taber (10)',
|
||||
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
|
||||
'timestamp': 1471991907,
|
||||
'upload_date': '20160823',
|
||||
'timestamp': 1539085800,
|
||||
'upload_date': '20181009',
|
||||
'duration': 606.84,
|
||||
'series': 'Klassen',
|
||||
'season': 'Klassen I',
|
||||
'season_number': 1,
|
||||
'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
|
||||
'episode': 'Episode 10',
|
||||
'episode_number': 10,
|
||||
'release_year': 2016,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# embed
|
||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||
'info_dict': {
|
||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
||||
'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
||||
'title': 'christiania pusher street ryddes drdkrjpo',
|
||||
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
|
||||
'timestamp': 1472800279,
|
||||
'upload_date': '20160902',
|
||||
@@ -45,17 +63,18 @@ class DRTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# with SignLanguage formats
|
||||
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
|
||||
'info_dict': {
|
||||
'id': 'historien-om-danmark-stenalder',
|
||||
'ext': 'mp4',
|
||||
'title': 'Historien om Danmark: Stenalder (1)',
|
||||
'title': 'Historien om Danmark: Stenalder',
|
||||
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
||||
'timestamp': 1490401996,
|
||||
'upload_date': '20170325',
|
||||
'duration': 3502.04,
|
||||
'timestamp': 1546628400,
|
||||
'upload_date': '20190104',
|
||||
'duration': 3502.56,
|
||||
'formats': 'mincount:20',
|
||||
},
|
||||
'params': {
|
||||
@@ -74,20 +93,26 @@ class DRTVIE(InfoExtractor):
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id')
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
programcard = self._download_json(
|
||||
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
data = programcard['Data'][0]
|
||||
if not video_id:
|
||||
video_id = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
|
||||
webpage, 'urn'))
|
||||
|
||||
title = remove_end(self._og_search_title(
|
||||
webpage, default=None), ' | TV | DR') or data['Title']
|
||||
data = self._download_json(
|
||||
'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id,
|
||||
video_id, 'Downloading video JSON', query={'expanded': 'true'})
|
||||
|
||||
title = str_or_none(data.get('Title')) or re.sub(
|
||||
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
|
||||
self._og_search_title(webpage))
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or data.get('Description')
|
||||
|
||||
timestamp = parse_iso8601(data.get('CreatedTime'))
|
||||
timestamp = unified_timestamp(
|
||||
data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
|
||||
|
||||
thumbnail = None
|
||||
duration = None
|
||||
@@ -97,24 +122,62 @@ class DRTVIE(InfoExtractor):
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
assets = []
|
||||
primary_asset = data.get('PrimaryAsset')
|
||||
if isinstance(primary_asset, dict):
|
||||
assets.append(primary_asset)
|
||||
secondary_assets = data.get('SecondaryAssets')
|
||||
if isinstance(secondary_assets, list):
|
||||
for secondary_asset in secondary_assets:
|
||||
if isinstance(secondary_asset, dict):
|
||||
assets.append(secondary_asset)
|
||||
|
||||
def hex_to_bytes(hex):
|
||||
return binascii.a2b_hex(hex.encode('ascii'))
|
||||
|
||||
def decrypt_uri(e):
|
||||
n = int(e[2:10], 16)
|
||||
a = e[10 + n:]
|
||||
data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
|
||||
key = bytes_to_intlist(hashlib.sha256(
|
||||
('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
|
||||
iv = bytes_to_intlist(hex_to_bytes(a))
|
||||
decrypted = aes_cbc_decrypt(data, key, iv)
|
||||
return intlist_to_bytes(
|
||||
decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
|
||||
|
||||
for asset in assets:
|
||||
kind = asset.get('Kind')
|
||||
if kind == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
thumbnail = url_or_none(asset.get('Uri'))
|
||||
elif kind in ('VideoResource', 'AudioResource'):
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
asset_target = asset.get('Target')
|
||||
for link in asset.get('Links', []):
|
||||
uri = link.get('Uri')
|
||||
if not uri:
|
||||
encrypted_uri = link.get('EncryptedUri')
|
||||
if not encrypted_uri:
|
||||
continue
|
||||
try:
|
||||
uri = decrypt_uri(encrypted_uri)
|
||||
except Exception:
|
||||
self.report_warning(
|
||||
'Unable to decrypt EncryptedUri', video_id)
|
||||
continue
|
||||
uri = url_or_none(uri)
|
||||
if not uri:
|
||||
continue
|
||||
target = link.get('Target')
|
||||
format_id = target or ''
|
||||
preference = None
|
||||
if asset_target in ('SpokenSubtitles', 'SignLanguage'):
|
||||
if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
|
||||
preference = -1
|
||||
format_id += '-%s' % asset_target
|
||||
elif asset_target == 'Default':
|
||||
preference = 1
|
||||
else:
|
||||
preference = None
|
||||
if target == 'HDS':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||
@@ -140,19 +203,22 @@ class DRTVIE(InfoExtractor):
|
||||
'vcodec': 'none' if kind == 'AudioResource' else None,
|
||||
'preference': preference,
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
if isinstance(subtitles_list, list):
|
||||
LANGS = {
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
if not subs.get('Uri'):
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': subs['Uri'],
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
|
||||
if isinstance(subtitles_list, list):
|
||||
LANGS = {
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
if not isinstance(subs, dict):
|
||||
continue
|
||||
sub_uri = url_or_none(subs.get('Uri'))
|
||||
if not sub_uri:
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': sub_uri,
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
self.raise_geo_restricted(
|
||||
@@ -170,6 +236,13 @@ class DRTVIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': str_or_none(data.get('SeriesTitle')),
|
||||
'season': str_or_none(data.get('SeasonTitle')),
|
||||
'season_number': int_or_none(data.get('SeasonNumber')),
|
||||
'season_id': str_or_none(data.get('SeasonUrn')),
|
||||
'episode': str_or_none(data.get('EpisodeTitle')),
|
||||
'episode_number': int_or_none(data.get('EpisodeNumber')),
|
||||
'release_year': int_or_none(data.get('ProductionYear')),
|
||||
}
|
||||
|
||||
|
||||
|
@@ -15,16 +15,16 @@ from ..utils import (
|
||||
class DTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
|
||||
_TEST = {
|
||||
'url': 'https://d.tube/#!/v/benswann/zqd630em',
|
||||
'md5': 'a03eaa186618ffa7a3145945543a251e',
|
||||
'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
|
||||
'md5': '9f29088fa08d699a7565ee983f56a06e',
|
||||
'info_dict': {
|
||||
'id': 'zqd630em',
|
||||
'id': 'x380jtr1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
|
||||
'description': 'md5:700d164e066b87f9eac057949e4227c2',
|
||||
'uploader_id': 'benswann',
|
||||
'upload_date': '20180222',
|
||||
'timestamp': 1519328958,
|
||||
'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
|
||||
'description': 'md5:60be222088183be3a42f196f34235776',
|
||||
'uploader_id': 'broncnutz',
|
||||
'upload_date': '20190107',
|
||||
'timestamp': 1546854054,
|
||||
},
|
||||
'params': {
|
||||
'format': '480p',
|
||||
@@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor):
|
||||
def canonical_url(h):
|
||||
if not h:
|
||||
return None
|
||||
return 'https://ipfs.io/ipfs/' + h
|
||||
return 'https://video.dtube.top/ipfs/' + h
|
||||
|
||||
formats = []
|
||||
for q in ('240', '480', '720', '1080', ''):
|
||||
|
@@ -88,11 +88,7 @@ from .awaan import (
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azmedien import (
|
||||
AZMedienIE,
|
||||
AZMedienPlaylistIE,
|
||||
AZMedienShowPlaylistIE,
|
||||
)
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
@@ -198,6 +194,10 @@ from .chirbit import (
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .ciscolive import (
|
||||
CiscoLiveSessionIE,
|
||||
CiscoLiveSearchIE,
|
||||
)
|
||||
from .cjsw import CJSWIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clippit import ClippitIE
|
||||
@@ -209,7 +209,10 @@ from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnbc import CNBCIE
|
||||
from .cnbc import (
|
||||
CNBCIE,
|
||||
CNBCVideoIE,
|
||||
)
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
@@ -408,6 +411,7 @@ from .funk import (
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
@@ -448,6 +452,7 @@ from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hketv import HKETVIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
@@ -466,6 +471,10 @@ from .hrti import (
|
||||
)
|
||||
from .huajiao import HuajiaoIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hungama import (
|
||||
HungamaIE,
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
from .ign import (
|
||||
@@ -480,12 +489,17 @@ from .imdb import (
|
||||
from .imgur import (
|
||||
ImgurIE,
|
||||
ImgurAlbumIE,
|
||||
ImgurGalleryIE,
|
||||
)
|
||||
from .ina import InaIE
|
||||
from .inc import IncIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .instagram import (
|
||||
InstagramIE,
|
||||
InstagramUserIE,
|
||||
InstagramTagIE,
|
||||
)
|
||||
from .internazionale import InternazionaleIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
@@ -540,6 +554,7 @@ from .la7 import LA7IE
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
@@ -549,6 +564,11 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lecturio import (
|
||||
LecturioIE,
|
||||
LecturioCourseIE,
|
||||
LecturioDeCourseIE,
|
||||
)
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
@@ -569,6 +589,11 @@ from .limelight import (
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
)
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .litv import LiTVIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
@@ -595,6 +620,7 @@ from .mailru import (
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .makertv import MakerTVIE
|
||||
from .malltv import MallTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
@@ -670,8 +696,7 @@ from .myvi import (
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicVideoIE,
|
||||
NationalGeographicIE,
|
||||
NationalGeographicEpisodeGuideIE,
|
||||
NationalGeographicTVIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
@@ -814,6 +839,7 @@ from .orf import (
|
||||
ORFOE1IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .outsidetv import OutsideTVIE
|
||||
from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
@@ -842,6 +868,7 @@ from .piksel import PikselIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
@@ -1033,7 +1060,10 @@ from .southpark import (
|
||||
SouthParkEsIE,
|
||||
SouthParkNlIE
|
||||
)
|
||||
from .spankbang import SpankBangIE
|
||||
from .spankbang import (
|
||||
SpankBangIE,
|
||||
SpankBangPlaylistIE,
|
||||
)
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
@@ -1043,7 +1073,7 @@ from .spike import (
|
||||
)
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
@@ -1078,6 +1108,10 @@ from .tass import TassIE
|
||||
from .tastytrade import TastyTradeIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
TeachableIE,
|
||||
TeachableCourseIE,
|
||||
)
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
@@ -1116,6 +1150,10 @@ from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tiktok import (
|
||||
TikTokIE,
|
||||
TikTokUserIE,
|
||||
)
|
||||
from .tinypic import TinyPicIE
|
||||
from .tmz import (
|
||||
TMZIE,
|
||||
@@ -1134,6 +1172,7 @@ from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trunews import TruNewsIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
@@ -1153,7 +1192,6 @@ from .tv2 import (
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv2hu import TV2HuIE
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tv5mondeplus import TV5MondePlusIE
|
||||
from .tva import TVAIE
|
||||
@@ -1172,13 +1210,15 @@ from .tvnet import TVNetIE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvnow import (
|
||||
TVNowIE,
|
||||
TVNowListIE,
|
||||
TVNowNewIE,
|
||||
TVNowSeasonIE,
|
||||
TVNowAnnualIE,
|
||||
TVNowShowIE,
|
||||
)
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
TVPWebsiteIE,
|
||||
)
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
@@ -1190,6 +1230,7 @@ from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitcasting import TwitCastingIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
@@ -1223,10 +1264,6 @@ from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .upskill import (
|
||||
UpskillIE,
|
||||
UpskillCourseIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usanetwork import USANetworkIE
|
||||
@@ -1295,6 +1332,7 @@ from .vimeo import (
|
||||
VimeoReviewIE,
|
||||
VimeoUserIE,
|
||||
VimeoWatchLaterIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
@@ -1330,7 +1368,6 @@ from .voxmedia import (
|
||||
VoxMediaVolumeIE,
|
||||
VoxMediaIE,
|
||||
)
|
||||
from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vrak import VrakIE
|
||||
from .vrv import (
|
||||
@@ -1344,6 +1381,7 @@ from .vuclip import VuClipIE
|
||||
from .vvvvid import VVVVIDIE
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .wakanim import WakanimIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
@@ -1382,6 +1420,7 @@ from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .wwe import WWEIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
@@ -1466,6 +1505,7 @@ from .zattoo import (
|
||||
QuantumTVIE,
|
||||
QuicklineIE,
|
||||
QuicklineLiveIE,
|
||||
SaltTVIE,
|
||||
SAKTVIE,
|
||||
VTXTVIE,
|
||||
WalyTVIE,
|
||||
@@ -1474,3 +1514,4 @@ from .zattoo import (
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zype import ZypeIE
|
||||
|
@@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
|
@@ -1,17 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,6 +34,7 @@ class FOXIE(AdobePassIE):
|
||||
'upload_date': '20170901',
|
||||
'creator': 'FOX',
|
||||
'series': 'Gotham',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -44,48 +48,54 @@ class FOXIE(AdobePassIE):
|
||||
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_HOME_PAGE_URL = 'https://www.fox.com/'
|
||||
_API_KEY = 'abdcbed02c124d393b39e818a4312055'
|
||||
_access_token = None
|
||||
|
||||
def _call_api(self, path, video_id, data=None):
|
||||
headers = {
|
||||
'X-Api-Key': self._API_KEY,
|
||||
}
|
||||
if self._access_token:
|
||||
headers['Authorization'] = 'Bearer ' + self._access_token
|
||||
return self._download_json(
|
||||
'https://api2.fox.com/v2.0/' + path,
|
||||
video_id, data=data, headers=headers)
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._access_token:
|
||||
mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
|
||||
if mvpd_auth:
|
||||
self._access_token = (self._parse_json(compat_urllib_parse_unquote(
|
||||
mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
|
||||
if not self._access_token:
|
||||
self._access_token = self._call_api(
|
||||
'login', None, json.dumps({
|
||||
'deviceId': compat_str(uuid.uuid4()),
|
||||
}).encode())['accessToken']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
|
||||
video_id, headers={
|
||||
'apikey': 'abdcbed02c124d393b39e818a4312055',
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': url,
|
||||
})
|
||||
video = self._call_api('vodplayer/' + video_id, video_id)
|
||||
|
||||
title = video['name']
|
||||
release_url = video['videoRelease']['url']
|
||||
|
||||
description = video.get('description')
|
||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||
video.get('duration')) or parse_duration(video.get('duration'))
|
||||
timestamp = unified_timestamp(video.get('datePublished'))
|
||||
rating = video.get('contentRating')
|
||||
age_limit = parse_age_limit(rating)
|
||||
release_url = video['url']
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
data = try_get(
|
||||
video, lambda x: x['trackingData']['properties'], dict) or {}
|
||||
|
||||
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||
video.get('duration')) or parse_duration(video.get('duration'))
|
||||
timestamp = unified_timestamp(video.get('datePublished'))
|
||||
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||
|
||||
series = video.get('seriesName') or data.get(
|
||||
'seriesName') or data.get('show')
|
||||
season_number = int_or_none(video.get('seasonNumber'))
|
||||
episode = video.get('name')
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
release_year = int_or_none(video.get('releaseYear'))
|
||||
|
||||
if data.get('authRequired'):
|
||||
resource = self._get_mvpd_resource(
|
||||
'fbc-fox', title, video.get('guid'), rating)
|
||||
release_url = update_url_query(
|
||||
release_url, {
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, video_id, 'fbc-fox', resource)
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for doc_rel in video.get('documentReleases', []):
|
||||
@@ -98,36 +108,19 @@ class FOXIE(AdobePassIE):
|
||||
}]
|
||||
break
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'description': video.get('description'),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'age_limit': age_limit,
|
||||
'age_limit': parse_age_limit(video.get('contentRating')),
|
||||
'creator': creator,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'release_year': release_year,
|
||||
'season_number': int_or_none(video.get('seasonNumber')),
|
||||
'episode': video.get('name'),
|
||||
'episode_number': int_or_none(video.get('episodeNumber')),
|
||||
'release_year': int_or_none(video.get('releaseYear')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
urlh = self._request_webpage(HEADRequest(release_url), video_id)
|
||||
video_url = compat_str(urlh.geturl())
|
||||
|
||||
if UplynkPreplayIE.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'ie_key': UplynkPreplayIE.ie_key(),
|
||||
})
|
||||
else:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
@@ -1,43 +1,33 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
'info_dict': {
|
||||
'id': 'bwduI3X_TgUB',
|
||||
'id': '432609859715',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
'upload_date': '20150423',
|
||||
'timestamp': 1429761109,
|
||||
# TODO: fix timestamp
|
||||
'upload_date': '19700101', # '20150423',
|
||||
# 'timestamp': 1429761109,
|
||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
|
||||
webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(update_url_query(
|
||||
config['releaseURL'], {
|
||||
'mbr': 'true',
|
||||
'switch': 'http',
|
||||
}), {'force_smil_url': True}))
|
||||
return self.url_result(
|
||||
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
|
||||
|
@@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
catalogue = None
|
||||
video_id = self._search_regex(
|
||||
r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
|
||||
if not video_id:
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class FreespeechIE(InfoExtractor):
|
||||
@@ -27,8 +28,4 @@ class FreespeechIE(InfoExtractor):
|
||||
r'data-video-url="([^"]+)"',
|
||||
webpage, 'youtube url')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
return self.url_result(youtube_url, YoutubeIE.ie_key())
|
||||
|
@@ -1,6 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
@@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor):
|
||||
|
||||
video_id = title_data.get('id') or self._search_regex([
|
||||
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
|
||||
r'<iframe[^>]+src="/player/(\d+)"',
|
||||
r'<iframe[^>]+src="/player/(\d+)',
|
||||
], webpage, 'video_id', default=None)
|
||||
if not video_id:
|
||||
player_url = self._html_search_meta([
|
||||
@@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor):
|
||||
if self._TOKEN:
|
||||
headers['Authorization'] = 'Token %s' % self._TOKEN
|
||||
sources = self._download_json(
|
||||
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
|
||||
video_id, headers=headers)['items']
|
||||
'https://www.funimation.com/api/showexperience/%s/' % video_id,
|
||||
video_id, headers=headers, query={
|
||||
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
|
||||
})['items']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
|
||||
|
98
youtube_dl/extractor/gaia.py
Normal file
98
youtube_dl/extractor/gaia.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class GaiaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
|
||||
'info_dict': {
|
||||
'id': '89356',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connecting with Universal Consciousness',
|
||||
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
|
||||
'upload_date': '20151116',
|
||||
'timestamp': 1447707266,
|
||||
'duration': 936,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
|
||||
'info_dict': {
|
||||
'id': '89351',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connecting with Universal Consciousness',
|
||||
'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
|
||||
'upload_date': '20151116',
|
||||
'timestamp': 1447707266,
|
||||
'duration': 53,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, vtype = re.search(self._VALID_URL, url).groups()
|
||||
node_id = self._download_json(
|
||||
'https://brooklyn.gaia.com/pathinfo', display_id, query={
|
||||
'path': 'video/' + display_id,
|
||||
})['id']
|
||||
node = self._download_json(
|
||||
'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
|
||||
vdata = node[vtype]
|
||||
media_id = compat_str(vdata['nid'])
|
||||
title = node['title']
|
||||
|
||||
media = self._download_json(
|
||||
'https://brooklyn.gaia.com/media/' + media_id, media_id)
|
||||
formats = self._extract_m3u8_formats(
|
||||
media['mediaUrls']['bcHLS'], media_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
text_tracks = media.get('textTracks', {})
|
||||
for key in ('captions', 'subtitles'):
|
||||
for lang, sub_url in text_tracks.get(key, {}).items():
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
fivestar = node.get('fivestar', {})
|
||||
fields = node.get('fields', {})
|
||||
|
||||
def get_field_value(key, value_key='value'):
|
||||
return try_get(fields, lambda x: x[key][0][value_key])
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
|
||||
'timestamp': int_or_none(node.get('created')),
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(vdata.get('duration')),
|
||||
'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
|
||||
'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
|
||||
'comment_count': int_or_none(node.get('comment_count')),
|
||||
'series': try_get(node, lambda x: x['series']['title'], compat_str),
|
||||
'season_number': int_or_none(get_field_value('season')),
|
||||
'season_id': str_or_none(get_field_value('series_nid', 'nid')),
|
||||
'episode_number': int_or_none(get_field_value('episode')),
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GameSpotIE(OnceIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||
@@ -41,6 +41,9 @@ class GameSpotIE(OnceIE):
|
||||
}, {
|
||||
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .tvc import TVCIE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .smotri import SmotriIE
|
||||
from .myvi import MyviIE
|
||||
from .condenast import CondeNastIE
|
||||
@@ -109,11 +109,13 @@ from .vice import ViceIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .peertube import PeerTubeIE
|
||||
from .teachable import TeachableIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2070,6 +2072,20 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# Zype embed
|
||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
'info_dict': {
|
||||
'id': '5b400b834b32992a310622b9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
},
|
||||
'add_ie': [ZypeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# videojs embed
|
||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||
@@ -2181,10 +2197,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': self.http_scheme() + url,
|
||||
}
|
||||
return self.url_result(self.http_scheme() + url)
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
@@ -2636,9 +2649,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(tvc_url, 'TVC')
|
||||
|
||||
# Look for embedded SportBox player
|
||||
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
||||
sportbox_urls = SportBoxIE._extract_urls(webpage)
|
||||
if sportbox_urls:
|
||||
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
|
||||
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
|
||||
|
||||
# Look for embedded XHamster player
|
||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||
@@ -3097,6 +3110,10 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||
|
||||
teachable_url = TeachableIE._extract_url(webpage, url)
|
||||
if teachable_url:
|
||||
return self.url_result(teachable_url)
|
||||
|
||||
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
||||
if indavideo_urls:
|
||||
return self.playlist_from_matches(
|
||||
@@ -3129,6 +3146,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
||||
|
||||
zype_urls = ZypeIE._extract_urls(webpage)
|
||||
if zype_urls:
|
||||
return self.playlist_from_matches(
|
||||
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
|
@@ -53,7 +53,7 @@ class GfycatIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
gfy = self._download_json(
|
||||
'http://gfycat.com/cajax/get/%s' % video_id,
|
||||
'https://api.gfycat.com/v1/gfycats/%s' % video_id,
|
||||
video_id, 'Downloading video info')
|
||||
if 'error' in gfy:
|
||||
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
||||
|
@@ -72,7 +72,7 @@ class GloboIE(InfoExtractor):
|
||||
return
|
||||
|
||||
try:
|
||||
self._download_json(
|
||||
glb_id = (self._download_json(
|
||||
'https://login.globo.com/api/authentication', None, data=json.dumps({
|
||||
'payload': {
|
||||
'email': email,
|
||||
@@ -81,7 +81,9 @@ class GloboIE(InfoExtractor):
|
||||
},
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
})
|
||||
}) or {}).get('glbId')
|
||||
if glb_id:
|
||||
self._set_cookie('.globo.com', 'GLBID', glb_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
resp = self._parse_json(e.cause.read(), None)
|
||||
|
@@ -25,15 +25,15 @@ class GoIE(AdobePassIE):
|
||||
},
|
||||
'watchdisneychannel': {
|
||||
'brand': '004',
|
||||
'requestor_id': 'Disney',
|
||||
'resource_id': 'Disney',
|
||||
},
|
||||
'watchdisneyjunior': {
|
||||
'brand': '008',
|
||||
'requestor_id': 'DisneyJunior',
|
||||
'resource_id': 'DisneyJunior',
|
||||
},
|
||||
'watchdisneyxd': {
|
||||
'brand': '009',
|
||||
'requestor_id': 'DisneyXD',
|
||||
'resource_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||
@@ -130,8 +130,8 @@ class GoIE(AdobePassIE):
|
||||
'device': '001',
|
||||
}
|
||||
if video_data.get('accesslevel') == '1':
|
||||
requestor_id = site_info['requestor_id']
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id = site_info.get('requestor_id', 'DisneyChannels')
|
||||
resource = site_info.get('resource_id') or self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, None)
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
|
191
youtube_dl/extractor/hketv.py
Normal file
191
youtube_dl/extractor/hketv.py
Normal file
@@ -0,0 +1,191 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class HKETVIE(InfoExtractor):
|
||||
IE_NAME = 'hketv'
|
||||
IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['HK']
|
||||
_VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hkedcity.net/etv/resource/2932360618',
|
||||
'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
|
||||
'info_dict': {
|
||||
'id': '2932360618',
|
||||
'ext': 'mp4',
|
||||
'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
|
||||
'description': 'md5:d5286d05219ef50e0613311cbe96e560',
|
||||
'upload_date': '20181024',
|
||||
'duration': 900,
|
||||
'subtitles': 'count:2',
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}, {
|
||||
'url': 'https://www.hkedcity.net/etv/resource/972641418',
|
||||
'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
|
||||
'info_dict': {
|
||||
'id': '972641418',
|
||||
'ext': 'mp4',
|
||||
'title': '衣冠楚楚 (天使系列之一)',
|
||||
'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
|
||||
'upload_date': '20070109',
|
||||
'duration': 907,
|
||||
'subtitles': {},
|
||||
},
|
||||
'params': {
|
||||
'geo_verification_proxy': '<HK proxy here>',
|
||||
},
|
||||
'skip': 'Geo restricted to HK',
|
||||
}]
|
||||
|
||||
_CC_LANGS = {
|
||||
'中文(繁體中文)': 'zh-Hant',
|
||||
'中文(简体中文)': 'zh-Hans',
|
||||
'English': 'en',
|
||||
'Bahasa Indonesia': 'id',
|
||||
'\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
|
||||
'\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
|
||||
'Tagalog': 'tl',
|
||||
'\u0e44\u0e17\u0e22': 'th',
|
||||
'\u0627\u0631\u062f\u0648': 'ur',
|
||||
}
|
||||
_FORMAT_HEIGHTS = {
|
||||
'SD': 360,
|
||||
'HD': 720,
|
||||
}
|
||||
_APPS_BASE_URL = 'https://apps.hkedcity.net'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = (
|
||||
self._html_search_meta(
|
||||
('ed_title', 'search.ed_title'), webpage, default=None) or
|
||||
self._search_regex(
|
||||
r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'title', default=None, group='url') or
|
||||
self._html_search_regex(
|
||||
r'<h1>([^<]+)</h1>', webpage, 'title', default=None) or
|
||||
self._og_search_title(webpage)
|
||||
)
|
||||
|
||||
file_id = self._search_regex(
|
||||
r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
|
||||
webpage, 'file ID')
|
||||
curr_url = self._search_regex(
|
||||
r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
|
||||
webpage, 'curr URL')
|
||||
data = {
|
||||
'action': 'get_info',
|
||||
'curr_url': curr_url,
|
||||
'file_id': file_id,
|
||||
'video_url': file_id,
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
self._APPS_BASE_URL + '/media/play/handler.php', video_id,
|
||||
data=urlencode_postdata(data),
|
||||
headers=merge_dicts({
|
||||
'Content-Type': 'application/x-www-form-urlencoded'},
|
||||
self.geo_verification_headers()))
|
||||
|
||||
result = response['result']
|
||||
|
||||
if not response.get('success') or not response.get('access'):
|
||||
error = clean_html(response.get('access_err_msg'))
|
||||
if 'Video streaming is not available in your country' in error:
|
||||
self.raise_geo_restricted(
|
||||
msg=error, countries=self._GEO_COUNTRIES)
|
||||
else:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
formats = []
|
||||
|
||||
width = int_or_none(result.get('width'))
|
||||
height = int_or_none(result.get('height'))
|
||||
|
||||
playlist0 = result['playlist'][0]
|
||||
for fmt in playlist0['sources']:
|
||||
file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
|
||||
if not file_url:
|
||||
continue
|
||||
# If we ever wanted to provide the final resolved URL that
|
||||
# does not require cookies, albeit with a shorter lifespan:
|
||||
# urlh = self._downloader.urlopen(file_url)
|
||||
# resolved_url = urlh.geturl()
|
||||
label = fmt.get('label')
|
||||
h = self._FORMAT_HEIGHTS.get(label)
|
||||
w = h * width // height if h and width and height else None
|
||||
formats.append({
|
||||
'format_id': label,
|
||||
'ext': fmt.get('type'),
|
||||
'url': file_url,
|
||||
'width': w,
|
||||
'height': h,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
track_kind = str_or_none(track.get('kind'))
|
||||
if not track_kind or not isinstance(track_kind, compat_str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
track_label = track.get('label')
|
||||
subtitles.setdefault(self._CC_LANGS.get(
|
||||
track_label, track_label), []).append({
|
||||
'url': self._proto_relative_url(track_url),
|
||||
'ext': 'srt',
|
||||
})
|
||||
|
||||
# Likes
|
||||
emotion = self._download_json(
|
||||
'https://emocounter.hkedcity.net/handler.php', video_id,
|
||||
data=urlencode_postdata({
|
||||
'action': 'get_emotion',
|
||||
'data[bucket_id]': 'etv',
|
||||
'data[identifier]': video_id,
|
||||
}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'},
|
||||
fatal=False) or {}
|
||||
like_count = int_or_none(try_get(
|
||||
emotion, lambda x: x['data']['emotion_data'][0]['count']))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
'description', webpage, fatal=False),
|
||||
'upload_date': unified_strdate(self._html_search_meta(
|
||||
'ed_date', webpage, fatal=False), day_first=False),
|
||||
'duration': int_or_none(result.get('length')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
|
||||
'view_count': parse_count(result.get('view_count')),
|
||||
'like_count': like_count,
|
||||
}
|
@@ -1,49 +1,56 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
import hmac
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class HotStarBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
def _download_json(self, *args, **kwargs):
|
||||
response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
|
||||
if response['resultCode'] != 'OK':
|
||||
if kwargs.get('fatal'):
|
||||
raise ExtractorError(
|
||||
response['errorDescription'], expected=True)
|
||||
return None
|
||||
return response['resultObj']
|
||||
|
||||
def _download_content_info(self, content_id):
|
||||
return self._download_json(
|
||||
'https://account.hotstar.com/AVS/besc', content_id, query={
|
||||
'action': 'GetAggregatedContentDetails',
|
||||
'appVersion': '5.0.40',
|
||||
'channel': 'PCTV',
|
||||
'contentId': content_id,
|
||||
})['contentInfo'][0]
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
st = int(time.time())
|
||||
exp = st + 6000
|
||||
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
|
||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||
response = self._download_json(
|
||||
'https://api.hotstar.com/' + path,
|
||||
video_id, headers={
|
||||
'hotstarauth': auth,
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'JIO',
|
||||
}, query={
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
})
|
||||
if response['statusCode'] != 'OK':
|
||||
raise ExtractorError(
|
||||
response['body']['message'], expected=True)
|
||||
return response['body']['results']
|
||||
|
||||
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
||||
# contentData
|
||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||
'info_dict': {
|
||||
'id': '1000076273',
|
||||
'ext': 'mp4',
|
||||
'title': 'On Air With AIB',
|
||||
'title': 'Can You Not Spread Rumours?',
|
||||
'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
|
||||
'timestamp': 1447227000,
|
||||
'timestamp': 1447248600,
|
||||
'upload_date': '20151111',
|
||||
'duration': 381,
|
||||
},
|
||||
@@ -51,6 +58,10 @@ class HotStarIE(HotStarBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# contentDetail
|
||||
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||
'only_matching': True,
|
||||
@@ -58,47 +69,52 @@ class HotStarIE(HotStarBaseIE):
|
||||
'url': 'http://www.hotstar.com/1000000515',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_content_info(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
app_state = self._parse_json(self._search_regex(
|
||||
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
||||
webpage, 'app state'), video_id)
|
||||
video_data = {}
|
||||
getters = list(
|
||||
lambda x, k=k: x['initialState']['content%s' % k]['content']
|
||||
for k in ('Data', 'Detail')
|
||||
)
|
||||
for v in app_state.values():
|
||||
content = try_get(v, getters, dict)
|
||||
if content and content.get('contentId') == video_id:
|
||||
video_data = content
|
||||
break
|
||||
|
||||
title = video_data['episodeTitle']
|
||||
title = video_data['title']
|
||||
|
||||
if video_data.get('encrypted') == 'Y':
|
||||
if video_data.get('drmProtected'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
formats = []
|
||||
for f in ('JIO',):
|
||||
format_data = self._download_json(
|
||||
'http://getcdn.hotstar.com/AVS/besc',
|
||||
video_id, 'Downloading %s JSON metadata' % f,
|
||||
fatal=False, query={
|
||||
'action': 'GetCDN',
|
||||
'asJson': 'Y',
|
||||
'channel': f,
|
||||
'id': video_id,
|
||||
'type': 'VOD',
|
||||
})
|
||||
if format_data:
|
||||
format_url = format_data.get('src')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
# produce broken files
|
||||
continue
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'width': int_or_none(format_data.get('width')),
|
||||
'height': int_or_none(format_data.get('height')),
|
||||
})
|
||||
format_data = self._call_api('h/v1/play', video_id)['item']
|
||||
format_url = format_data['playbackUrl']
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
try:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_geo_restricted(countries=['IN'])
|
||||
raise
|
||||
elif ext == 'f4m':
|
||||
# produce broken files
|
||||
pass
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'width': int_or_none(format_data.get('width')),
|
||||
'height': int_or_none(format_data.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -106,57 +122,43 @@ class HotStarIE(HotStarBaseIE):
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
|
||||
'formats': formats,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': video_data.get('channelId'),
|
||||
'series': video_data.get('showName'),
|
||||
'season': video_data.get('seasonName'),
|
||||
'season_number': int_or_none(video_data.get('seasonNo')),
|
||||
'season_id': video_data.get('seasonId'),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'series': video_data.get('contentTitle'),
|
||||
'episode_number': int_or_none(video_data.get('episodeNo')),
|
||||
}
|
||||
|
||||
|
||||
class HotStarPlaylistIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:playlist'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'info_dict': {
|
||||
'id': '14812',
|
||||
'id': '3_2_26',
|
||||
},
|
||||
'playlist_mincount': 75,
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_ITEM_TYPES = {
|
||||
'episodes': 'EPISODE',
|
||||
'popular-clips': 'CLIPS',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
base_url = mobj.group('url')
|
||||
content_id = mobj.group('content_id')
|
||||
playlist_type = mobj.group('type')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
content_info = self._download_content_info(content_id)
|
||||
playlist_id = compat_str(content_info['categoryId'])
|
||||
|
||||
collection = self._download_json(
|
||||
'https://search.hotstar.com/AVS/besc', playlist_id, query={
|
||||
'action': 'SearchContents',
|
||||
'appVersion': '5.0.40',
|
||||
'channel': 'PCTV',
|
||||
'moreFilters': 'series:%s;' % playlist_id,
|
||||
'query': '*',
|
||||
'searchOrder': 'last_broadcast_date desc,year desc,title asc',
|
||||
'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
|
||||
})
|
||||
collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'%s/_/%s' % (base_url, video['contentId']),
|
||||
'https://www.hotstar.com/%s' % video['contentId'],
|
||||
ie=HotStarIE.ie_key(), video_id=video['contentId'])
|
||||
for video in collection['response']['docs']
|
||||
for video in collection['assets']['items']
|
||||
if video.get('contentId')]
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
117
youtube_dl/extractor/hungama.py
Normal file
117
youtube_dl/extractor/hungama.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class HungamaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?hungama\.com/
|
||||
(?:
|
||||
(?:video|movie)/[^/]+/|
|
||||
tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
|
||||
'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
|
||||
'info_dict': {
|
||||
'id': '2931166',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lucky Ali - Kitni Haseen Zindagi',
|
||||
'track': 'Kitni Haseen Zindagi',
|
||||
'artist': 'Lucky Ali',
|
||||
'album': 'Aks',
|
||||
'release_year': 2000,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id)
|
||||
|
||||
m3u8_url = self._download_json(
|
||||
'https://www.hungama.com/index.php', video_id,
|
||||
data=urlencode_postdata({'content_id': video_id}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, query={
|
||||
'c': 'common',
|
||||
'm': 'get_video_mdn_url',
|
||||
})['stream_url']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class HungamaSongIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
|
||||
'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
|
||||
'info_dict': {
|
||||
'id': '2931166',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lucky Ali - Kitni Haseen Zindagi',
|
||||
'track': 'Kitni Haseen Zindagi',
|
||||
'artist': 'Lucky Ali',
|
||||
'album': 'Aks',
|
||||
'release_year': 2000,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
|
||||
audio_id, query={'_country': 'IN'})[0]
|
||||
|
||||
track = data['song_name']
|
||||
artist = data.get('singer_name')
|
||||
|
||||
m3u8_url = self._download_json(
|
||||
data.get('file') or data['preview_link'],
|
||||
audio_id)['response']['media_url']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
thumbnail = data.get('img_src') or data.get('album_image')
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'track': track,
|
||||
'artist': artist,
|
||||
'album': data.get('album_name'),
|
||||
'release_year': int_or_none(data.get('date')),
|
||||
'formats': formats,
|
||||
}
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
@@ -20,38 +20,23 @@ class ImgurIE(InfoExtractor):
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||
'description': 'Imgur: The magic of the Internet',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://imgur.com/A61SaA1',
|
||||
'info_dict': {
|
||||
'id': 'A61SaA1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||
'description': 'Imgur: The magic of the Internet',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://imgur.com/gallery/YcAQlkx',
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# no title
|
||||
'url': 'https://i.imgur.com/jxBXAMC.gifv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
|
||||
webpage = self._download_webpage(gifv_url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
|
||||
|
||||
width = int_or_none(self._og_search_property(
|
||||
'video:width', webpage, default=None))
|
||||
@@ -72,7 +57,6 @@ class ImgurIE(InfoExtractor):
|
||||
'format_id': m.group('type').partition('/')[2],
|
||||
'url': self._proto_relative_url(m.group('src')),
|
||||
'ext': mimetype2ext(m.group('type')),
|
||||
'acodec': 'none',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'http_headers': {
|
||||
@@ -107,44 +91,64 @@ class ImgurIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'title': self._og_search_title(webpage),
|
||||
'title': self._og_search_title(webpage, default=video_id),
|
||||
}
|
||||
|
||||
|
||||
class ImgurAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
|
||||
class ImgurGalleryIE(InfoExtractor):
|
||||
IE_NAME = 'imgur:gallery'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/gallery/Q95ko',
|
||||
'info_dict': {
|
||||
'id': 'Q95ko',
|
||||
'title': 'Adding faces make every GIF better',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
}, {
|
||||
'url': 'http://imgur.com/a/j6Orj',
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||
'url': 'https://imgur.com/gallery/YcAQlkx',
|
||||
'info_dict': {
|
||||
'id': 'YcAQlkx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
gallery_id = self._match_id(url)
|
||||
|
||||
album_images = self._download_json(
|
||||
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
|
||||
album_id, fatal=False)
|
||||
data = self._download_json(
|
||||
'https://imgur.com/gallery/%s.json' % gallery_id,
|
||||
gallery_id)['data']['image']
|
||||
|
||||
if album_images:
|
||||
data = album_images.get('data')
|
||||
if data and isinstance(data, dict):
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, list):
|
||||
entries = [
|
||||
self.url_result('http://imgur.com/%s' % image['hash'])
|
||||
for image in images if image.get('hash')]
|
||||
return self.playlist_result(entries, album_id)
|
||||
if data.get('is_album'):
|
||||
entries = [
|
||||
self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
|
||||
for image in data['album_images']['images'] if image.get('hash')]
|
||||
return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
|
||||
|
||||
# Fallback to single video
|
||||
return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())
|
||||
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
|
||||
|
||||
|
||||
class ImgurAlbumIE(ImgurGalleryIE):
|
||||
IE_NAME = 'imgur:album'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://imgur.com/a/j6Orj',
|
||||
'info_dict': {
|
||||
'id': 'j6Orj',
|
||||
'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}]
|
||||
|
@@ -227,44 +227,37 @@ class InstagramIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class InstagramUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
|
||||
IE_DESC = 'Instagram user profile'
|
||||
IE_NAME = 'instagram:user'
|
||||
_TEST = {
|
||||
'url': 'https://instagram.com/porsche',
|
||||
'info_dict': {
|
||||
'id': 'porsche',
|
||||
'title': 'porsche',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
'params': {
|
||||
'extract_flat': True,
|
||||
'skip_download': True,
|
||||
'playlistend': 5,
|
||||
}
|
||||
}
|
||||
class InstagramPlaylistIE(InfoExtractor):
|
||||
# A superclass for handling any kind of query based on GraphQL which
|
||||
# results in a playlist.
|
||||
|
||||
_gis_tmpl = None
|
||||
_gis_tmpl = None # used to cache GIS request type
|
||||
|
||||
def _entries(self, data):
|
||||
def _parse_graphql(self, webpage, item_id):
|
||||
# Reads a webpage and returns its GraphQL data.
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
|
||||
item_id)
|
||||
|
||||
def _extract_graphql(self, data, url):
|
||||
# Parses GraphQL queries containing videos and generates a playlist.
|
||||
def get_count(suffix):
|
||||
return int_or_none(try_get(
|
||||
node, lambda x: x['edge_media_' + suffix]['count']))
|
||||
|
||||
uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||
uploader_id = self._match_id(url)
|
||||
csrf_token = data['config']['csrf_token']
|
||||
rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
|
||||
|
||||
self._set_cookie('instagram.com', 'ig_pr', '1')
|
||||
|
||||
cursor = ''
|
||||
for page_num in itertools.count(1):
|
||||
variables = json.dumps({
|
||||
'id': uploader_id,
|
||||
variables = {
|
||||
'first': 12,
|
||||
'after': cursor,
|
||||
})
|
||||
}
|
||||
variables.update(self._query_vars_for(data))
|
||||
variables = json.dumps(variables)
|
||||
|
||||
if self._gis_tmpl:
|
||||
gis_tmpls = [self._gis_tmpl]
|
||||
@@ -276,21 +269,26 @@ class InstagramUserIE(InfoExtractor):
|
||||
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
|
||||
]
|
||||
|
||||
# try all of the ways to generate a GIS query, and not only use the
|
||||
# first one that works, but cache it for future requests
|
||||
for gis_tmpl in gis_tmpls:
|
||||
try:
|
||||
media = self._download_json(
|
||||
json_data = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', uploader_id,
|
||||
'Downloading JSON page %d' % page_num, headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-Instagram-GIS': hashlib.md5(
|
||||
('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
|
||||
}, query={
|
||||
'query_hash': '42323d64886122307be10013ad2dcc44',
|
||||
'query_hash': self._QUERY_HASH,
|
||||
'variables': variables,
|
||||
})['data']['user']['edge_owner_to_timeline_media']
|
||||
})
|
||||
media = self._parse_timeline_from(json_data)
|
||||
self._gis_tmpl = gis_tmpl
|
||||
break
|
||||
except ExtractorError as e:
|
||||
# if it's an error caused by a bad query, and there are
|
||||
# more GIS templates to try, ignore it and keep trying
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if gis_tmpl != gis_tmpls[-1]:
|
||||
continue
|
||||
@@ -348,14 +346,80 @@ class InstagramUserIE(InfoExtractor):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
user_or_tag = self._match_id(url)
|
||||
webpage = self._download_webpage(url, user_or_tag)
|
||||
data = self._parse_graphql(webpage, user_or_tag)
|
||||
|
||||
webpage = self._download_webpage(url, username)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
|
||||
username)
|
||||
self._set_cookie('instagram.com', 'ig_pr', '1')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), username, username)
|
||||
self._extract_graphql(data, url), user_or_tag, user_or_tag)
|
||||
|
||||
|
||||
class InstagramUserIE(InstagramPlaylistIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
|
||||
IE_DESC = 'Instagram user profile'
|
||||
IE_NAME = 'instagram:user'
|
||||
_TEST = {
|
||||
'url': 'https://instagram.com/porsche',
|
||||
'info_dict': {
|
||||
'id': 'porsche',
|
||||
'title': 'porsche',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
'params': {
|
||||
'extract_flat': True,
|
||||
'skip_download': True,
|
||||
'playlistend': 5,
|
||||
}
|
||||
}
|
||||
|
||||
_QUERY_HASH = '42323d64886122307be10013ad2dcc44',
|
||||
|
||||
@staticmethod
|
||||
def _parse_timeline_from(data):
|
||||
# extracts the media timeline data from a GraphQL result
|
||||
return data['data']['user']['edge_owner_to_timeline_media']
|
||||
|
||||
@staticmethod
|
||||
def _query_vars_for(data):
|
||||
# returns a dictionary of variables to add to the timeline query based
|
||||
# on the GraphQL of the original page
|
||||
return {
|
||||
'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
|
||||
}
|
||||
|
||||
|
||||
class InstagramTagIE(InstagramPlaylistIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)'
|
||||
IE_DESC = 'Instagram hashtag search'
|
||||
IE_NAME = 'instagram:tag'
|
||||
_TEST = {
|
||||
'url': 'https://instagram.com/explore/tags/lolcats',
|
||||
'info_dict': {
|
||||
'id': 'lolcats',
|
||||
'title': 'lolcats',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'params': {
|
||||
'extract_flat': True,
|
||||
'skip_download': True,
|
||||
'playlistend': 50,
|
||||
}
|
||||
}
|
||||
|
||||
_QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314',
|
||||
|
||||
@staticmethod
|
||||
def _parse_timeline_from(data):
|
||||
# extracts the media timeline data from a GraphQL result
|
||||
return data['data']['hashtag']['edge_hashtag_to_media']
|
||||
|
||||
@staticmethod
|
||||
def _query_vars_for(data):
|
||||
# returns a dictionary of variables to add to the timeline query based
|
||||
# on the GraphQL of the original page
|
||||
return {
|
||||
'tag_name':
|
||||
data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
|
||||
}
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
@@ -41,6 +41,24 @@ class IPrimaIE(InfoExtractor):
|
||||
# iframe prima.iprima.cz
|
||||
'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.iprima.cz/filmy/desne-rande',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['RU']
|
||||
|
||||
@@ -65,7 +65,11 @@ class IviIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.ivi.tv/watch/33560/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
# Sorted by quality
|
||||
|
@@ -26,8 +26,15 @@ class JamendoBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class JamendoIE(JamendoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
licensing\.jamendo\.com/[^/]+|
|
||||
(?:www\.)?jamendo\.com
|
||||
)
|
||||
/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
||||
'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
|
||||
'info_dict': {
|
||||
@@ -40,14 +47,19 @@ class JamendoIE(JamendoBaseIE):
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._VALID_URL_RE.match(url)
|
||||
track_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_webpage(
|
||||
'https://www.jamendo.com/track/%s/%s' % (track_id, display_id),
|
||||
display_id)
|
||||
|
||||
title, artist, track = self._extract_meta(webpage)
|
||||
|
||||
|
@@ -61,7 +61,7 @@ class JojIE(InfoExtractor):
|
||||
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
|
@@ -7,8 +7,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TESTS = [{
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,10 @@ class JWPlatformIE(InfoExtractor):
|
||||
'upload_date': '20081127',
|
||||
'timestamp': 1227796140,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
@@ -34,5 +37,5 @@ class JWPlatformIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
|
||||
json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
|
||||
return self._parse_jwplayer_data(json_data, video_id)
|
||||
|
@@ -192,6 +192,8 @@ class KalturaIE(InfoExtractor):
|
||||
'entryId': video_id,
|
||||
'service': 'baseentry',
|
||||
'ks': '{1:result:ks}',
|
||||
'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
|
||||
'responseProfile:type': 1,
|
||||
},
|
||||
{
|
||||
'action': 'getbyentryid',
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
|
||||
def _extract_token_url(self, stream_access_url, video_id, data):
|
||||
return self._download_json(
|
||||
stream_access_url, video_id, headers={
|
||||
self._proto_relative_url(stream_access_url, 'https:'), video_id,
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps(data).encode())['data']['stream-access'][0]
|
||||
|
||||
@@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class Laola1TvIE(Laola1TvEmbedIE):
|
||||
class Laola1TvBaseIE(Laola1TvEmbedIE):
|
||||
def _extract_video(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
|
||||
conf = self._parse_json(self._search_regex(
|
||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||
display_id,
|
||||
transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
|
||||
video_id = conf['videoid']
|
||||
|
||||
config = self._download_json(conf['configUrl'], video_id, query={
|
||||
'videoid': video_id,
|
||||
'partnerid': conf['partnerid'],
|
||||
'language': conf.get('language', ''),
|
||||
'portal': conf.get('portalid', ''),
|
||||
})
|
||||
error = config.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_data = config['video']
|
||||
title = video_data['title']
|
||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||
meta = video_data.get('metaInformation')
|
||||
sports = meta.get('sports')
|
||||
categories = sports.split(',') if sports else []
|
||||
|
||||
token_url = self._extract_token_url(
|
||||
video_data['streamAccess'], video_id,
|
||||
video_data['abo']['required'])
|
||||
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class Laola1TvIE(Laola1TvBaseIE):
|
||||
IE_NAME = 'laola1tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||
'info_dict': {
|
||||
@@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._extract_video(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
class EHFTVIE(Laola1TvBaseIE):
|
||||
IE_NAME = 'ehftv'
|
||||
_VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
conf = self._parse_json(self._search_regex(
|
||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||
display_id, js_to_json)
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
|
||||
'info_dict': {
|
||||
'id': '1166761',
|
||||
'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
|
||||
'ext': 'mp4',
|
||||
'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
|
||||
'is_live': False,
|
||||
'categories': ['Handball'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
video_id = conf['videoid']
|
||||
|
||||
config = self._download_json(conf['configUrl'], video_id, query={
|
||||
'videoid': video_id,
|
||||
'partnerid': conf['partnerid'],
|
||||
'language': conf.get('language', ''),
|
||||
'portal': conf.get('portalid', ''),
|
||||
})
|
||||
error = config.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_data = config['video']
|
||||
title = video_data['title']
|
||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||
meta = video_data.get('metaInformation')
|
||||
sports = meta.get('sports')
|
||||
categories = sports.split(',') if sports else []
|
||||
|
||||
token_url = self._extract_token_url(
|
||||
video_data['streamAccess'], video_id,
|
||||
video_data['abo']['required'])
|
||||
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
return self._extract_video(url)
|
||||
|
||||
|
||||
class ITTFIE(InfoExtractor):
|
||||
|
229
youtube_dl/extractor/lecturio.py
Normal file
229
youtube_dl/extractor/lecturio.py
Normal file
@@ -0,0 +1,229 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class LecturioBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://app.lecturio.com/en/login'
|
||||
_NETRC_MACHINE = 'lecturio'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
# Sets some cookies
|
||||
_, urlh = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(url_handle):
|
||||
return self._LOGIN_URL not in compat_str(url_handle.geturl())
|
||||
|
||||
# Already logged in
|
||||
if is_logged(urlh):
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'signin[email]': username,
|
||||
'signin[password]': password,
|
||||
'signin[remember]': 'on',
|
||||
}
|
||||
|
||||
response, urlh = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
# Logged in successfully
|
||||
if is_logged(urlh):
|
||||
return
|
||||
|
||||
errors = self._html_search_regex(
|
||||
r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
|
||||
'errors', default=None)
|
||||
if errors:
|
||||
raise ExtractorError('Unable to login: %s' % errors, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class LecturioIE(LecturioBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https://
|
||||
(?:
|
||||
app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
|
||||
'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870',
|
||||
'info_dict': {
|
||||
'id': '39634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Important Concepts and Terms – Introduction to Microbiology',
|
||||
},
|
||||
'skip': 'Requires lecturio account credentials',
|
||||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_CC_LANGS = {
|
||||
'German': 'de',
|
||||
'English': 'en',
|
||||
'Spanish': 'es',
|
||||
'French': 'fr',
|
||||
'Polish': 'pl',
|
||||
'Russian': 'ru',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id') or mobj.group('id_de')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://app.lecturio.com/en/lecture/%s/player.html' % display_id,
|
||||
display_id)
|
||||
|
||||
lecture_id = self._search_regex(
|
||||
r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id')
|
||||
|
||||
api_url = self._search_regex(
|
||||
r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'api url', group='url')
|
||||
|
||||
video = self._download_json(api_url, display_id)
|
||||
|
||||
title = video['title'].strip()
|
||||
|
||||
formats = []
|
||||
for format_ in video['content']['media']:
|
||||
if not isinstance(format_, dict):
|
||||
continue
|
||||
file_ = format_.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
if ext == 'smil':
|
||||
# smil contains only broken RTMP formats anyway
|
||||
continue
|
||||
file_url = url_or_none(file_)
|
||||
if not file_url:
|
||||
continue
|
||||
label = str_or_none(format_.get('label'))
|
||||
filesize = int_or_none(format_.get('fileSize'))
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': label,
|
||||
'filesize': float_or_none(filesize, invscale=1000)
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
cc = self._parse_json(
|
||||
self._search_regex(
|
||||
r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles',
|
||||
default='{}'), display_id, fatal=False)
|
||||
for cc_label, cc_url in cc.items():
|
||||
cc_url = url_or_none(cc_url)
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = self._search_regex(
|
||||
r'/([a-z]{2})_', cc_url, 'lang',
|
||||
default=cc_label.split()[0] if cc_label else 'en')
|
||||
original_lang = self._search_regex(
|
||||
r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
|
||||
default=None)
|
||||
sub_dict = (automatic_captions
|
||||
if 'auto-translated' in cc_label or original_lang
|
||||
else subtitles)
|
||||
sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': lecture_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
}
|
||||
|
||||
|
||||
class LecturioCourseIE(LecturioBaseIE):
|
||||
_VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course'
|
||||
_TEST = {
|
||||
'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
|
||||
'info_dict': {
|
||||
'id': 'microbiology-introduction',
|
||||
'title': 'Microbiology: Introduction',
|
||||
},
|
||||
'playlist_count': 45,
|
||||
'skip': 'Requires lecturio account credentials',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>',
|
||||
webpage):
|
||||
params = extract_attributes(mobj.group(0))
|
||||
lecture_url = urljoin(url, params.get('data-url'))
|
||||
lecture_id = params.get('data-id')
|
||||
entries.append(self.url_result(
|
||||
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage,
|
||||
'title', default=None)
|
||||
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
|
||||
|
||||
class LecturioDeCourseIE(LecturioBaseIE):
|
||||
_VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
|
||||
_TEST = {
|
||||
'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
|
||||
webpage):
|
||||
lecture_url = urljoin(url, mobj.group('url'))
|
||||
lecture_id = mobj.group('id')
|
||||
entries.append(self.url_result(
|
||||
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
|
||||
|
||||
return self.playlist_result(entries, display_id, title)
|
@@ -16,16 +16,15 @@ from ..utils import (
|
||||
class LibraryOfCongressIE(InfoExtractor):
|
||||
IE_NAME = 'loc'
|
||||
IE_DESC = 'Library of Congress'
|
||||
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9a-z_.]+)'
|
||||
_TESTS = [{
|
||||
# embedded via <div class="media-player"
|
||||
'url': 'http://loc.gov/item/90716351/',
|
||||
'md5': '353917ff7f0255aa6d4b80a034833de8',
|
||||
'md5': '6ec0ae8f07f86731b1b2ff70f046210a',
|
||||
'info_dict': {
|
||||
'id': '90716351',
|
||||
'ext': 'mp4',
|
||||
'title': "Pa's trip to Mars",
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
@@ -57,6 +56,12 @@ class LibraryOfCongressIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.loc.gov/item/ihas.200197114/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.loc.gov/item/afc1981005_afs20503/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -67,12 +72,13 @@ class LibraryOfCongressIE(InfoExtractor):
|
||||
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
||||
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
||||
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
|
||||
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
|
||||
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1',
|
||||
r'data-tab="share-media-(?P<id>[0-9A-F]{32})"'),
|
||||
webpage, 'media id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
||||
video_id)['mediaObject']
|
||||
media_id)['mediaObject']
|
||||
|
||||
derivative = data['derivatives'][0]
|
||||
media_url = derivative['derivativeUrl']
|
||||
@@ -89,25 +95,29 @@ class LibraryOfCongressIE(InfoExtractor):
|
||||
if ext not in ('mp4', 'mp3'):
|
||||
media_url += '.mp4' if is_video else '.mp3'
|
||||
|
||||
if 'vod/mp4:' in media_url:
|
||||
formats = [{
|
||||
'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
|
||||
formats = []
|
||||
if '/vod/mp4:' in media_url:
|
||||
formats.append({
|
||||
'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8',
|
||||
'format_id': 'hls',
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
'quality': 1,
|
||||
}]
|
||||
elif 'vod/mp3:' in media_url:
|
||||
formats = [{
|
||||
'url': media_url.replace('vod/mp3:', ''),
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
})
|
||||
http_format = {
|
||||
'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url),
|
||||
'format_id': 'http',
|
||||
'quality': 1,
|
||||
}
|
||||
if not is_video:
|
||||
http_format['vcodec'] = 'none'
|
||||
formats.append(http_format)
|
||||
|
||||
download_urls = set()
|
||||
for m in re.finditer(
|
||||
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
|
||||
format_id = m.group('id').lower()
|
||||
if format_id == 'gif':
|
||||
if format_id in ('gif', 'jpeg'):
|
||||
continue
|
||||
download_url = m.group('url')
|
||||
if download_url in download_urls:
|
||||
|
181
youtube_dl/extractor/linkedin.py
Normal file
181
youtube_dl/extractor/linkedin.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class LinkedInLearningBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'linkedin'
|
||||
|
||||
def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
|
||||
query = {
|
||||
'courseSlug': course_slug,
|
||||
'fields': fields,
|
||||
'q': 'slugs',
|
||||
}
|
||||
sub = ''
|
||||
if video_slug:
|
||||
query.update({
|
||||
'videoSlug': video_slug,
|
||||
'resolution': '_%s' % resolution,
|
||||
})
|
||||
sub = ' %dp' % resolution
|
||||
api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
|
||||
return self._download_json(
|
||||
api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
|
||||
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
||||
}, query=query)['elements'][0]
|
||||
|
||||
def _get_urn_id(self, video_data):
|
||||
urn = video_data.get('urn')
|
||||
if urn:
|
||||
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
def _get_video_id(self, video_data, course_slug, video_slug):
|
||||
return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
'https://www.linkedin.com/uas/login?trk=learning',
|
||||
None, 'Downloading login page')
|
||||
action_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
|
||||
default='https://www.linkedin.com/uas/login-submit', group='url')
|
||||
data = self._hidden_inputs(login_page)
|
||||
data.update({
|
||||
'session_key': email,
|
||||
'session_password': password,
|
||||
})
|
||||
login_submit_page = self._download_webpage(
|
||||
action_url, None, 'Logging in',
|
||||
data=urlencode_postdata(data))
|
||||
error = self._search_regex(
|
||||
r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>',
|
||||
login_submit_page, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
|
||||
class LinkedInLearningIE(LinkedInLearningBaseIE):
|
||||
IE_NAME = 'linkedin:learning'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true',
|
||||
'md5': 'a1d74422ff0d5e66a792deb996693167',
|
||||
'info_dict': {
|
||||
'id': '90426',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome',
|
||||
'timestamp': 1430396150.82,
|
||||
'upload_date': '20150430',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_slug, video_slug = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
video_data = None
|
||||
formats = []
|
||||
for width, height in ((640, 360), (960, 540), (1280, 720)):
|
||||
video_data = self._call_api(
|
||||
course_slug, 'selectedVideo', video_slug, height)['selectedVideo']
|
||||
|
||||
video_url_data = video_data.get('url') or {}
|
||||
progressive_url = video_url_data.get('progressiveUrl')
|
||||
if progressive_url:
|
||||
formats.append({
|
||||
'format_id': 'progressive-%dp' % height,
|
||||
'url': progressive_url,
|
||||
'height': height,
|
||||
'width': width,
|
||||
'source_preference': 1,
|
||||
})
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
audio_url = video_data.get('audio', {}).get('progressiveUrl')
|
||||
if audio_url:
|
||||
formats.append({
|
||||
'abr': 64,
|
||||
'ext': 'm4a',
|
||||
'format_id': 'audio',
|
||||
'url': audio_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
streaming_url = video_url_data.get('streamingUrl')
|
||||
if streaming_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
streaming_url, video_slug, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
||||
|
||||
return {
|
||||
'id': self._get_video_id(video_data, course_slug, video_slug),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('defaultThumbnail'),
|
||||
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
|
||||
'duration': int_or_none(video_data.get('durationInSeconds')),
|
||||
}
|
||||
|
||||
|
||||
class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
||||
IE_NAME = 'linkedin:learning:course'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals',
|
||||
'info_dict': {
|
||||
'id': 'programming-foundations-fundamentals',
|
||||
'title': 'Programming Foundations: Fundamentals',
|
||||
'description': 'md5:76e580b017694eb89dc8e8923fff5c86',
|
||||
},
|
||||
'playlist_mincount': 61,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_slug = self._match_id(url)
|
||||
course_data = self._call_api(course_slug, 'chapters,description,title')
|
||||
|
||||
entries = []
|
||||
for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
|
||||
chapter_title = chapter.get('title')
|
||||
chapter_id = self._get_urn_id(chapter)
|
||||
for video in chapter.get('videos', []):
|
||||
video_slug = video.get('slug')
|
||||
if not video_slug:
|
||||
continue
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'id': self._get_video_id(video, course_slug, video_slug),
|
||||
'title': video.get('title'),
|
||||
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
|
||||
'chapter': chapter_title,
|
||||
'chapter_number': chapter_number,
|
||||
'chapter_id': chapter_id,
|
||||
'ie_key': LinkedInLearningIE.ie_key(),
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, course_slug,
|
||||
course_data.get('title'),
|
||||
course_data.get('description'))
|
174
youtube_dl/extractor/linuxacademy.py
Normal file
174
youtube_dl/extractor/linuxacademy.py
Normal file
@@ -0,0 +1,174 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
orderedSet,
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class LinuxAcademyIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?linuxacademy\.com/cp/
|
||||
(?:
|
||||
courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
|
||||
modules/view/id/(?P<course_id>\d+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
|
||||
'info_dict': {
|
||||
'id': '1498-2',
|
||||
'ext': 'mp4',
|
||||
'title': "Introduction to the Practitioner's Brief",
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires Linux Academy account credentials',
|
||||
}, {
|
||||
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://linuxacademy.com/cp/modules/view/id/154',
|
||||
'info_dict': {
|
||||
'id': '154',
|
||||
'title': 'AWS Certified Cloud Practitioner',
|
||||
'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
|
||||
},
|
||||
'playlist_count': 41,
|
||||
'skip': 'Requires Linux Academy account credentials',
|
||||
}]
|
||||
|
||||
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
|
||||
_ORIGIN_URL = 'https://linuxacademy.com'
|
||||
_CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
|
||||
_NETRC_MACHINE = 'linuxacademy'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
def random_string():
|
||||
return ''.join([
|
||||
random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
|
||||
for _ in range(32)])
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
|
||||
'client_id': self._CLIENT_ID,
|
||||
'response_type': 'token id_token',
|
||||
'redirect_uri': self._ORIGIN_URL,
|
||||
'scope': 'openid email user_impersonation profile',
|
||||
'audience': self._ORIGIN_URL,
|
||||
'state': random_string(),
|
||||
'nonce': random_string(),
|
||||
})
|
||||
|
||||
login_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'login info', group='value'), None,
|
||||
transform_source=lambda x: compat_b64decode(x).decode('utf-8')
|
||||
)['extraParams']
|
||||
|
||||
login_data.update({
|
||||
'client_id': self._CLIENT_ID,
|
||||
'redirect_uri': self._ORIGIN_URL,
|
||||
'tenant': 'lacausers',
|
||||
'connection': 'Username-Password-Authentication',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'sso': 'true',
|
||||
})
|
||||
|
||||
login_state_url = compat_str(urlh.geturl())
|
||||
|
||||
try:
|
||||
login_page = self._download_webpage(
|
||||
'https://login.linuxacademy.com/usernamepassword/login', None,
|
||||
'Downloading login page', data=json.dumps(login_data).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'https://login.linuxacademy.com',
|
||||
'Referer': login_state_url,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
message = error.get('description') or error['code']
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
raise
|
||||
|
||||
callback_page, urlh = self._download_webpage_handle(
|
||||
'https://login.linuxacademy.com/login/callback', None,
|
||||
'Downloading callback page',
|
||||
data=urlencode_postdata(self._hidden_inputs(login_page)),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Origin': 'https://login.linuxacademy.com',
|
||||
'Referer': login_state_url,
|
||||
})
|
||||
|
||||
access_token = self._search_regex(
|
||||
r'access_token=([^=&]+)', compat_str(urlh.geturl()),
|
||||
'access token')
|
||||
|
||||
self._download_webpage(
|
||||
'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
|
||||
% access_token, None, 'Downloading token validation page')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
|
||||
item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
|
||||
|
||||
webpage = self._download_webpage(url, item_id)
|
||||
|
||||
# course path
|
||||
if course_id:
|
||||
entries = [
|
||||
self.url_result(
|
||||
urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
|
||||
for lesson_url in orderedSet(re.findall(
|
||||
r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
|
||||
webpage))]
|
||||
title = unescapeHTML(self._html_search_regex(
|
||||
(r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
|
||||
r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
|
||||
webpage, 'title', default=None, group='value'))
|
||||
description = unescapeHTML(self._html_search_regex(
|
||||
r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, 'description', default=None, group='value'))
|
||||
return self.playlist_result(entries, course_id, title, description)
|
||||
|
||||
# single video path
|
||||
info = self._extract_jwplayer_data(
|
||||
webpage, item_id, require_title=False, m3u8_id='hls',)
|
||||
title = self._search_regex(
|
||||
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
||||
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'title', group='value')
|
||||
info.update({
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
})
|
||||
return info
|
@@ -87,7 +87,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
|
||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -120,13 +120,27 @@ class LiveLeakIE(InfoExtractor):
|
||||
}
|
||||
|
||||
for idx, info_dict in enumerate(entries):
|
||||
formats = []
|
||||
for a_format in info_dict['formats']:
|
||||
if not a_format.get('height'):
|
||||
a_format['height'] = int_or_none(self._search_regex(
|
||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||
default=None))
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(info_dict['formats'])
|
||||
# Removing '.*.mp4' gives the raw video, which is essentially
|
||||
# the same video without the LiveLeak logo at the top (see
|
||||
# https://github.com/rg3/youtube-dl/pull/4768)
|
||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
||||
if a_format['url'] != orig_url:
|
||||
format_id = a_format.get('format_id')
|
||||
formats.append({
|
||||
'format_id': 'original' + ('-' + format_id if format_id else ''),
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
|
||||
# Don't append entry ID for one-video pages to keep backward compatibility
|
||||
if len(entries) > 1:
|
||||
@@ -146,7 +160,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
|
||||
|
||||
class LiveLeakEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
|
||||
|
||||
# See generic.py for actual test cases
|
||||
_TESTS = [{
|
||||
@@ -158,15 +172,14 @@ class LiveLeakEmbedIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
kind, video_id = mobj.group('kind', 'id')
|
||||
kind, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if kind == 'f':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
liveleak_url = self._search_regex(
|
||||
r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||
webpage, 'LiveLeak URL', group='url')
|
||||
elif kind == 'i':
|
||||
liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
|
||||
else:
|
||||
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
|
||||
|
||||
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
||||
|
@@ -363,7 +363,4 @@ class LivestreamShortenerIE(InfoExtractor):
|
||||
id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, id)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': self._og_search_url(webpage),
|
||||
}
|
||||
return self.url_result(self._og_search_url(webpage))
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class LyndaBaseIE(InfoExtractor):
|
||||
_SIGNIN_URL = 'https://www.lynda.com/signin'
|
||||
_SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
|
||||
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
|
||||
_USER_URL = 'https://www.lynda.com/signin/user'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
|
53
youtube_dl/extractor/malltv.py
Normal file
53
youtube_dl/extractor/malltv.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import merge_dicts
|
||||
|
||||
|
||||
class MallTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'md5': '1c4a37f080e1f3023103a7b43458e518',
|
||||
'info_dict': {
|
||||
'id': 't0zzt0',
|
||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'ext': 'mp4',
|
||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
||||
'duration': 216,
|
||||
'timestamp': 1538870400,
|
||||
'upload_date': '20181007',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
|
||||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
||||
video_id = self._search_regex(
|
||||
SOURCE_RE, webpage, 'video id', group='id')
|
||||
|
||||
media = self._parse_html5_media_entries(
|
||||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
||||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts(media, info, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage, default=None) or display_id,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
})
|
@@ -2,12 +2,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class ManyVidsIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# preview video
|
||||
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||
'info_dict': {
|
||||
@@ -17,7 +23,18 @@ class ManyVidsIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
|
||||
'info_dict': {
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -28,12 +45,41 @@ class ManyVidsIE(InfoExtractor):
|
||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video URL', group='url')
|
||||
|
||||
title = '%s (Preview)' % self._html_search_regex(
|
||||
r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
|
||||
title = self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True)
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
|
||||
mv_token = self._search_regex(
|
||||
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'mv token', default=None, group='value')
|
||||
|
||||
if mv_token:
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, fatal=False, data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||
'view count', default=None))
|
||||
|
||||
@@ -42,7 +88,5 @@ class ManyVidsIE(InfoExtractor):
|
||||
'title': title,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
}],
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -21,7 +21,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MediasiteIE(InfoExtractor):
|
||||
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
||||
_VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
|
||||
@@ -84,7 +84,15 @@ class MediasiteIE(InfoExtractor):
|
||||
'timestamp': 1333983600,
|
||||
'duration': 7794,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
|
||||
|
@@ -1,12 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -144,7 +145,7 @@ class MetacafeIE(InfoExtractor):
|
||||
|
||||
headers = {
|
||||
# Disable family filter
|
||||
'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
|
||||
'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
|
||||
}
|
||||
|
||||
# AnyClip videos require the flashversion cookie so that we get the link
|
||||
|
@@ -161,11 +161,17 @@ class MixcloudIE(InfoExtractor):
|
||||
stream_info = info_json['streamInfo']
|
||||
formats = []
|
||||
|
||||
def decrypt_url(f_url):
|
||||
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
|
||||
decrypted_url = self._decrypt_xor_cipher(k, f_url)
|
||||
if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
|
||||
return decrypted_url
|
||||
|
||||
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||
format_url = stream_info.get(url_key)
|
||||
if not format_url:
|
||||
continue
|
||||
decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
|
||||
decrypted = decrypt_url(compat_b64decode(format_url))
|
||||
if not decrypted:
|
||||
continue
|
||||
if url_key == 'hlsUrl':
|
||||
|
@@ -1,15 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .fox import FOXIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
update_url_query,
|
||||
get_element_by_class,
|
||||
)
|
||||
|
||||
|
||||
@@ -66,130 +61,22 @@ class NationalGeographicVideoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class NationalGeographicIE(ThePlatformIE, AdobePassIE):
|
||||
IE_NAME = 'natgeo'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
|
||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
||||
'info_dict': {
|
||||
'id': 'vKInpacll2pC',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uncovering a Universal Knowledge',
|
||||
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
|
||||
'timestamp': 1458680907,
|
||||
'upload_date': '20160322',
|
||||
'uploader': 'NEWA-FNG-NGTV',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
class NationalGeographicTVIE(FOXIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/',
|
||||
'info_dict': {
|
||||
'id': '6a875e6e734b479beda26438c9f21138',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why Nat Geo? Valley of the Boom',
|
||||
'description': 'The lives of prominent figures in the tech world, including their friendships, rivalries, victories and failures.',
|
||||
'timestamp': 1542662458,
|
||||
'upload_date': '20181119',
|
||||
'age_limit': 14,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
|
||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
||||
'info_dict': {
|
||||
'id': 'Pok5lWCkiEFA',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Stunning Red Bird of Paradise',
|
||||
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
|
||||
'timestamp': 1459362152,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NEWA-FNG-NGTV',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
release_url = self._search_regex(
|
||||
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
|
||||
webpage, 'release url')
|
||||
theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
|
||||
video_id = theplatform_path.split('/')[-1]
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
|
||||
if is_auth == 'auth':
|
||||
auth_resource_id = self._search_regex(
|
||||
r"video_auth_resourceId\s*=\s*'([^']+)'",
|
||||
webpage, 'auth resource id')
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id)
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for key, value in (('switch', 'http'), ('manifest', 'm3u')):
|
||||
tp_query = query.copy()
|
||||
tp_query.update({
|
||||
key: value,
|
||||
})
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._extract_theplatform_metadata(theplatform_path, display_id)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class NationalGeographicEpisodeGuideIE(InfoExtractor):
|
||||
IE_NAME = 'natgeo:episodeguide'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/',
|
||||
'info_dict': {
|
||||
'id': 'the-story-of-god-with-morgan-freeman-season-1',
|
||||
'title': 'The Story of God with Morgan Freeman - Season 1',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
},
|
||||
{
|
||||
'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2',
|
||||
'info_dict': {
|
||||
'id': 'underworld-inc-season-2',
|
||||
'title': 'Underworld, Inc. - Season 2',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
show = get_element_by_class('show', webpage)
|
||||
selected_season = self._search_regex(
|
||||
r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>',
|
||||
webpage, 'selected season')
|
||||
entries = [
|
||||
self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic')
|
||||
for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)]
|
||||
return self.playlist_result(
|
||||
entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')),
|
||||
'%s - %s' % (show, selected_season))
|
||||
}]
|
||||
_HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/'
|
||||
_API_KEY = '238bb0a0c2aba67922c48709ce0c06fd'
|
||||
|
@@ -9,10 +9,8 @@ from .theplatform import ThePlatformIE
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
@@ -269,27 +267,14 @@ class CSNNEIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
|
||||
'''
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
||||
'info_dict': {
|
||||
'id': '52753292',
|
||||
'ext': 'flv',
|
||||
'title': 'Crew emerges after four-month Mars food study',
|
||||
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
||||
'md5': 'af1adfa51312291a017720403826bb64',
|
||||
'info_dict': {
|
||||
'id': 'p_tweet_snow_140529',
|
||||
'id': '269389891880',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||
@@ -313,7 +298,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
||||
'info_dict': {
|
||||
'id': 'nn_netcast_150204',
|
||||
'id': '394064451844',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||
@@ -326,7 +311,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
||||
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
||||
'info_dict': {
|
||||
'id': 'x_lon_vwhorn_150922',
|
||||
'id': '529953347624',
|
||||
'ext': 'mp4',
|
||||
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
||||
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
||||
@@ -339,7 +324,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
||||
'md5': '118d7ca3f0bea6534f119c68ef539f71',
|
||||
'info_dict': {
|
||||
'id': 'tdy_al_space_160420',
|
||||
'id': '669831235788',
|
||||
'ext': 'mp4',
|
||||
'title': 'See the aurora borealis from space in stunning new NASA video',
|
||||
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
||||
@@ -352,7 +337,7 @@ class NBCNewsIE(ThePlatformIE):
|
||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||
'info_dict': {
|
||||
'id': 'n_hayes_Aimm_140801_272214',
|
||||
'id': '314487875924',
|
||||
'ext': 'mp4',
|
||||
'title': 'The chaotic GOP immigration vote',
|
||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||
@@ -374,60 +359,22 @@ class NBCNewsIE(ThePlatformIE):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
if video_id is not None:
|
||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = all_info.find('video')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
'ext': 'flv',
|
||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||
'description': info.find('caption').text,
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
||||
else:
|
||||
# "feature" and "nightly-news" pages use theplatform.com
|
||||
video_id = mobj.group('mpx_id')
|
||||
video_id = self._match_id(url)
|
||||
if not video_id.isdigit():
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
filter_param = 'byId'
|
||||
bootstrap_json = self._search_regex(
|
||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
|
||||
webpage, 'bootstrap json', default=None)
|
||||
if bootstrap_json:
|
||||
bootstrap = self._parse_json(
|
||||
bootstrap_json, video_id, transform_source=unescapeHTML)
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.+});', webpage,
|
||||
'bootstrap json'), video_id)
|
||||
video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id']
|
||||
|
||||
info = None
|
||||
if 'results' in bootstrap:
|
||||
info = bootstrap['results'][0]['video']
|
||||
elif 'video' in bootstrap:
|
||||
info = bootstrap['video']
|
||||
elif 'msnbcVideoInfo' in bootstrap:
|
||||
info = bootstrap['msnbcVideoInfo']['meta']
|
||||
elif 'msnbcThePlatform' in bootstrap:
|
||||
info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
|
||||
else:
|
||||
info = bootstrap
|
||||
|
||||
if 'guid' in info:
|
||||
video_id = info['guid']
|
||||
filter_param = 'byGuid'
|
||||
elif 'mpxId' in info:
|
||||
video_id = info['mpxId']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
||||
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
|
||||
'ie_key': 'ThePlatformFeed',
|
||||
}
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
||||
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}),
|
||||
'ie_key': 'ThePlatformFeed',
|
||||
}
|
||||
|
||||
|
||||
class NBCOlympicsIE(InfoExtractor):
|
||||
|
@@ -5,8 +5,8 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NhkVodIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>[^/]+/[^/?#&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/(?:vod|ondemand)/(?P<id>[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# Videos available only for a limited period of time. Visit
|
||||
# http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
|
||||
'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815',
|
||||
@@ -19,7 +19,10 @@ class NhkVodIE(InfoExtractor):
|
||||
'episode': 'The Kimono as Global Fashion',
|
||||
},
|
||||
'skip': 'Videos available only for a limited period of time',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor):
|
||||
'skip': 'Requires login',
|
||||
}
|
||||
|
||||
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor):
|
||||
if not username:
|
||||
return True
|
||||
|
||||
# Setup session (will set necessary cookies)
|
||||
self._request_webpage(
|
||||
'https://njpwworld.com/', None, note='Setting up session')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
'https://njpwworld.com/auth/login', None,
|
||||
self._LOGIN_URL, None,
|
||||
note='Logging in', errnote='Unable to login',
|
||||
data=urlencode_postdata({'login_id': username, 'pw': password}),
|
||||
headers={'Referer': 'https://njpwworld.com/auth'})
|
||||
headers={'Referer': 'https://front.njpwworld.com/auth'})
|
||||
# /auth/login will return 302 for successful logins
|
||||
if urlh.geturl() == 'https://njpwworld.com/auth/login':
|
||||
if urlh.geturl() == self._LOGIN_URL:
|
||||
self.report_warning('unable to login')
|
||||
return False
|
||||
|
||||
|
@@ -57,7 +57,8 @@ class NoovoIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
bc_url = BrightcoveNewIE._extract_url(self, webpage)
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -89,7 +90,10 @@ class NoovoIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['CA']}),
|
||||
'id': brightcove_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
|
@@ -35,7 +35,7 @@ class NovaEmbedIE(InfoExtractor):
|
||||
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
|
@@ -12,11 +12,16 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
qualities,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -176,9 +181,118 @@ class NPOIE(NPOBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._get_info(video_id)
|
||||
try:
|
||||
return self._get_info(url, video_id)
|
||||
except ExtractorError:
|
||||
return self._get_old_info(video_id)
|
||||
|
||||
def _get_info(self, video_id):
|
||||
def _get_info(self, url, video_id):
|
||||
token = self._download_json(
|
||||
'https://www.npostart.nl/api/token', video_id,
|
||||
'Downloading token', headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})['token']
|
||||
|
||||
player = self._download_json(
|
||||
'https://www.npostart.nl/player/%s' % video_id, video_id,
|
||||
'Downloading player JSON', data=urlencode_postdata({
|
||||
'autoplay': 0,
|
||||
'share': 1,
|
||||
'pageUrl': url,
|
||||
'hasAdConsent': 0,
|
||||
'_token': token,
|
||||
}))
|
||||
|
||||
player_token = player['token']
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
|
||||
streams = self._download_json(
|
||||
'https://start-player.npo.nl/video/%s/streams' % video_id,
|
||||
video_id, 'Downloading %s profile JSON' % profile, fatal=False,
|
||||
query={
|
||||
'profile': profile,
|
||||
'quality': 'npo',
|
||||
'tokenId': player_token,
|
||||
'streamType': 'broadcast',
|
||||
})
|
||||
if not streams:
|
||||
continue
|
||||
stream = streams.get('stream')
|
||||
if not isinstance(stream, dict):
|
||||
continue
|
||||
stream_url = url_or_none(stream.get('src'))
|
||||
if not stream_url or stream_url in format_urls:
|
||||
continue
|
||||
format_urls.add(stream_url)
|
||||
if stream.get('protection') is not None:
|
||||
continue
|
||||
stream_type = stream.get('type')
|
||||
stream_ext = determine_ext(stream_url)
|
||||
if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
stream_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif '.ism/Manifest' in stream_url:
|
||||
formats.extend(self._extract_ism_formats(
|
||||
stream_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
embed_url = url_or_none(player.get('embedUrl'))
|
||||
if embed_url:
|
||||
webpage = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed page', fatal=False)
|
||||
if webpage:
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
|
||||
default='{}'), video_id)
|
||||
if video:
|
||||
title = video.get('episodeTitle')
|
||||
subtitles = {}
|
||||
subtitles_list = video.get('subtitles')
|
||||
if isinstance(subtitles_list, list):
|
||||
for cc in subtitles_list:
|
||||
cc_url = url_or_none(cc.get('src'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = str_or_none(cc.get('language')) or 'nl'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
return merge_dicts({
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': url_or_none(
|
||||
video.get('still_image_url') or video.get('orig_image_url')),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': unified_timestamp(video.get('broadcastDate')),
|
||||
'creator': video.get('channel'),
|
||||
'series': video.get('title'),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video.get('episodeNumber')),
|
||||
'subtitles': subtitles,
|
||||
}, info)
|
||||
|
||||
return info
|
||||
|
||||
def _get_old_info(self, video_id):
|
||||
metadata = self._download_json(
|
||||
'http://e.omroep.nl/metadata/%s' % video_id,
|
||||
video_id,
|
||||
@@ -280,7 +394,7 @@ class NPOIE(NPOBaseIE):
|
||||
# JSON
|
||||
else:
|
||||
video_url = stream_info.get('url')
|
||||
if not video_url or video_url in urls:
|
||||
if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
|
||||
continue
|
||||
urls.add(video_url)
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
@@ -363,7 +477,7 @@ class NPOIE(NPOBaseIE):
|
||||
|
||||
class NPOLiveIE(NPOBaseIE):
|
||||
IE_NAME = 'npo.nl:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.npo.nl/live/npo-1',
|
||||
@@ -380,6 +494,9 @@ class NPOLiveIE(NPOBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.npo.nl/live',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.npostart.nl/live/npo-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -211,13 +211,13 @@ class NRKIE(NRKBaseIE):
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': '2f7f6eeb2aacdd99885f355428715cfa',
|
||||
'md5': '706f34cdf1322577589e369e522b50ef',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 263,
|
||||
'duration': 262,
|
||||
}
|
||||
}, {
|
||||
# audio
|
||||
@@ -248,7 +248,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:tv|radio)\.nrk(?:super)?\.no/
|
||||
(?:serie/[^/]+|program)/
|
||||
(?:serie(?:/[^/]+){1,2}|program)/
|
||||
(?![Ee]pisodes)%s
|
||||
(?:/\d{2}-\d{2}-\d{4})?
|
||||
(?:\#del=(?P<part_id>\d+))?
|
||||
@@ -256,14 +256,14 @@ class NRKTVIE(NRKBaseIE):
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
||||
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål - TV',
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
}, {
|
||||
@@ -301,7 +301,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 772,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
@@ -314,7 +314,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
'id': 'MSPO40010515BH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 6175,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
@@ -326,7 +326,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
}, {
|
||||
@@ -362,6 +362,9 @@ class NRKTVIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
@@ -403,21 +406,35 @@ class NRKTVSerieBaseIE(InfoExtractor):
|
||||
def _extract_series(self, webpage, display_id, fatal=True):
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
|
||||
default='{}' if not fatal else NO_DEFAULT),
|
||||
(r'INITIAL_DATA_*\s*=\s*({.+?})\s*;',
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
|
||||
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
|
||||
display_id, fatal=False)
|
||||
if not config:
|
||||
return
|
||||
return try_get(config, lambda x: x['series'], dict)
|
||||
return try_get(
|
||||
config,
|
||||
(lambda x: x['initialState']['series'], lambda x: x['series']),
|
||||
dict)
|
||||
|
||||
def _extract_seasons(self, seasons):
|
||||
if not isinstance(seasons, list):
|
||||
return []
|
||||
entries = []
|
||||
for season in seasons:
|
||||
entries.extend(self._extract_episodes(season))
|
||||
return entries
|
||||
|
||||
def _extract_episodes(self, season):
|
||||
entries = []
|
||||
if not isinstance(season, dict):
|
||||
return entries
|
||||
episodes = season.get('episodes')
|
||||
if not isinstance(episodes, list):
|
||||
return entries
|
||||
for episode in episodes:
|
||||
return []
|
||||
return self._extract_entries(season.get('episodes'))
|
||||
|
||||
def _extract_entries(self, entry_list):
|
||||
if not isinstance(entry_list, list):
|
||||
return []
|
||||
entries = []
|
||||
for episode in entry_list:
|
||||
nrk_id = episode.get('prfId')
|
||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||
continue
|
||||
@@ -462,7 +479,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# new layout
|
||||
# new layout, seasons
|
||||
'url': 'https://tv.nrk.no/serie/backstage',
|
||||
'info_dict': {
|
||||
'id': 'backstage',
|
||||
@@ -471,20 +488,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
},
|
||||
'playlist_mincount': 60,
|
||||
}, {
|
||||
# old layout
|
||||
# new layout, instalments
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
# old layout
|
||||
'url': 'https://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
@@ -517,11 +535,12 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
description = try_get(
|
||||
series, lambda x: x['titles']['subtitle'], compat_str)
|
||||
entries = []
|
||||
for season in series['seasons']:
|
||||
entries.extend(self._extract_episodes(season))
|
||||
entries.extend(self._extract_seasons(series.get('seasons')))
|
||||
entries.extend(self._extract_entries(series.get('instalments')))
|
||||
entries.extend(self._extract_episodes(series.get('extraMaterial')))
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
# Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
|
||||
# Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
@@ -533,6 +552,9 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
if title:
|
||||
title = self._search_regex(
|
||||
r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
@@ -593,7 +615,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
|
||||
'title': 'Rivertonprisen til Karin Fossum',
|
||||
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _extract_title(self, webpage):
|
||||
|
@@ -11,20 +11,27 @@ from ..utils import (
|
||||
|
||||
class NZZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
|
||||
'info_dict': {
|
||||
'id': '9153',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
|
||||
'info_dict': {
|
||||
'id': '1368112',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
entries = []
|
||||
for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
|
||||
for player_element in re.findall(
|
||||
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
|
||||
player_params = extract_attributes(player_element)
|
||||
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
||||
self.report_warning('Unsupported player type')
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user