mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-18 14:18:37 +09:00
Compare commits
286 Commits
2021.03.25
...
df-fmt-ext
Author | SHA1 | Date | |
---|---|---|---|
![]() |
72c431725a | ||
![]() |
080c5d48ed | ||
![]() |
ba1399d54d | ||
![]() |
195f22f679 | ||
![]() |
fc2beab0e7 | ||
![]() |
1a4fbe8462 | ||
![]() |
c2f9be3e63 | ||
![]() |
604762a9f8 | ||
![]() |
47e70fff8b | ||
![]() |
de39d1281c | ||
![]() |
27ed77aabb | ||
![]() |
c4b19a8816 | ||
![]() |
087ddc2371 | ||
![]() |
65ccb0dd4e | ||
![]() |
a874871801 | ||
![]() |
b7c25959f0 | ||
![]() |
f102e3dc4e | ||
![]() |
a19855f0f5 | ||
![]() |
ce5d36486e | ||
![]() |
d25cf62086 | ||
![]() |
502cefa41f | ||
![]() |
0faa45d6c0 | ||
![]() |
447edc48e6 | ||
![]() |
ee8560d01e | ||
![]() |
7135277fec | ||
![]() |
7bbd5b13d4 | ||
![]() |
c91cbf6072 | ||
![]() |
11b284c81f | ||
![]() |
c94a459a24 | ||
![]() |
6e2626f092 | ||
![]() |
c282e5f8d7 | ||
![]() |
2ced5a7912 | ||
![]() |
82e4eca711 | ||
![]() |
1b1442887e | ||
![]() |
22127b271c | ||
![]() |
d35557a75d | ||
![]() |
9493ffdb8b | ||
![]() |
7009bb9f31 | ||
![]() |
218c423bc0 | ||
![]() |
55c823634d | ||
![]() |
4050e10a4c | ||
![]() |
ed5c44e7b7 | ||
![]() |
0f6422590e | ||
![]() |
4c6fba3765 | ||
![]() |
d619dd712f | ||
![]() |
573b13410e | ||
![]() |
66e58dccc2 | ||
![]() |
556862bc91 | ||
![]() |
a8d5316aaf | ||
![]() |
fd3f3bebd0 | ||
![]() |
46b8ae2f52 | ||
![]() |
538ec65ba7 | ||
![]() |
b0a60ce203 | ||
![]() |
e52e8b8111 | ||
![]() |
d231b56717 | ||
![]() |
e6a836d54c | ||
![]() |
deee741fb1 | ||
![]() |
adb5294177 | ||
![]() |
5f5c127ece | ||
![]() |
090acd58c1 | ||
![]() |
a03b9775d5 | ||
![]() |
8a158a936c | ||
![]() |
11665dd236 | ||
![]() |
cc179df346 | ||
![]() |
0700fde640 | ||
![]() |
811c480f7b | ||
![]() |
3aa94d7945 | ||
![]() |
ef044be34b | ||
![]() |
530f4582d0 | ||
![]() |
1baa0f5f66 | ||
![]() |
9aa8e5340f | ||
![]() |
04fd3289d3 | ||
![]() |
52c3751df7 | ||
![]() |
187a48aee2 | ||
![]() |
be35e5343a | ||
![]() |
c3deca86ae | ||
![]() |
c7965b9fc2 | ||
![]() |
e988fa4523 | ||
![]() |
e27d8d819f | ||
![]() |
ebc627847c | ||
![]() |
a0068bd6be | ||
![]() |
b764dbe773 | ||
![]() |
871645a4a4 | ||
![]() |
1f50a07771 | ||
![]() |
9e5ca66f16 | ||
![]() |
17d295a1ec | ||
![]() |
49c5293014 | ||
![]() |
6508688e88 | ||
![]() |
4194d253c0 | ||
![]() |
f8e543c906 | ||
![]() |
c4d1738316 | ||
![]() |
1f13ccfd7f | ||
![]() |
923292ba64 | ||
![]() |
782bfd26db | ||
![]() |
3472227074 | ||
![]() |
bf23bc0489 | ||
![]() |
85bf26c1d0 | ||
![]() |
d8adca1b66 | ||
![]() |
d02064218b | ||
![]() |
b1297308fb | ||
![]() |
8088ce036a | ||
![]() |
29f7bfc4d7 | ||
![]() |
74f8cc48af | ||
![]() |
8ff961d10f | ||
![]() |
266b6ef185 | ||
![]() |
825d3426c5 | ||
![]() |
47b0c8697a | ||
![]() |
734dfbb4e3 | ||
![]() |
ddc080a562 | ||
![]() |
16a3fe2ba6 | ||
![]() |
c820a284a2 | ||
![]() |
58babe9af7 | ||
![]() |
6d4932f023 | ||
![]() |
92d73ef393 | ||
![]() |
91278f4b6b | ||
![]() |
73e1ab6125 | ||
![]() |
584715a803 | ||
![]() |
e00b0eab1e | ||
![]() |
005339d637 | ||
![]() |
23ad6402a6 | ||
![]() |
9642344965 | ||
![]() |
568c7005d5 | ||
![]() |
5cb4833f40 | ||
![]() |
5197336de6 | ||
![]() |
01824d275b | ||
![]() |
39a98b09a2 | ||
![]() |
f0a05a55c2 | ||
![]() |
4186e81777 | ||
![]() |
b494824286 | ||
![]() |
8248133e5e | ||
![]() |
27dbf6f0ab | ||
![]() |
61d791726f | ||
![]() |
0c0876f790 | ||
![]() |
7a497f1405 | ||
![]() |
5add3f4373 | ||
![]() |
78ce962f4f | ||
![]() |
41f0043983 | ||
![]() |
34c06b16f5 | ||
![]() |
1e677567cd | ||
![]() |
af9e72507e | ||
![]() |
6ca7b77696 | ||
![]() |
9d142109f4 | ||
![]() |
1ca673bd98 | ||
![]() |
e1eae16b56 | ||
![]() |
96f87aaa3b | ||
![]() |
5f5de51a49 | ||
![]() |
39ca35e765 | ||
![]() |
d76d59d99d | ||
![]() |
2c2c2bd348 | ||
![]() |
46e0a729b2 | ||
![]() |
57044eaceb | ||
![]() |
a3373da70c | ||
![]() |
2c4cb134a9 | ||
![]() |
bfe72723d8 | ||
![]() |
ed99d68bdd | ||
![]() |
5014bd67c2 | ||
![]() |
e418823350 | ||
![]() |
b5242da7d2 | ||
![]() |
a803582717 | ||
![]() |
7fb9564420 | ||
![]() |
379f52a495 | ||
![]() |
cb668eb973 | ||
![]() |
751c9ae39a | ||
![]() |
da32828208 | ||
![]() |
2ccee8db74 | ||
![]() |
47f2f2fbe9 | ||
![]() |
03ab02730f | ||
![]() |
4c77a2e538 | ||
![]() |
4131703001 | ||
![]() |
cc21aebe90 | ||
![]() |
57b9a4b4c6 | ||
![]() |
3a7ef27cf3 | ||
![]() |
a7f61feab2 | ||
![]() |
8fe5d54eb7 | ||
![]() |
d156bc8d59 | ||
![]() |
c2350cac24 | ||
![]() |
b224cf39d5 | ||
![]() |
5f85eb820c | ||
![]() |
bb7ac1ed66 | ||
![]() |
fdf91c52a8 | ||
![]() |
943070af4a | ||
![]() |
82f3993ba3 | ||
![]() |
d495292852 | ||
![]() |
2ee6c7f110 | ||
![]() |
6511b8e8d7 | ||
![]() |
f3cd1d9cec | ||
![]() |
e13a01061d | ||
![]() |
24297a42ef | ||
![]() |
1980ff4550 | ||
![]() |
dfbbe2902f | ||
![]() |
e1a9d0ef78 | ||
![]() |
f47627a1c9 | ||
![]() |
efeb9e0fbf | ||
![]() |
e90a890f01 | ||
![]() |
199c645bee | ||
![]() |
503a3744ad | ||
![]() |
ef03721f47 | ||
![]() |
1e8aaa1d15 | ||
![]() |
6423d7054e | ||
![]() |
eb5080286a | ||
![]() |
286e01ce30 | ||
![]() |
8536dcafd8 | ||
![]() |
552b139911 | ||
![]() |
2202cef0e4 | ||
![]() |
a726009987 | ||
![]() |
03afef7538 | ||
![]() |
b797c1cc75 | ||
![]() |
04be55307a | ||
![]() |
504e4d804d | ||
![]() |
1786cd3fe4 | ||
![]() |
b8645c1f58 | ||
![]() |
fe05191b8c | ||
![]() |
0204838163 | ||
![]() |
a0df8a0617 | ||
![]() |
d1b9a5e2ef | ||
![]() |
ff04d43c46 | ||
![]() |
d2f72c40db | ||
![]() |
e33dfb445c | ||
![]() |
94520568b3 | ||
![]() |
273964d190 | ||
![]() |
346dd3b5e8 | ||
![]() |
f5c2c06231 | ||
![]() |
57eaaff5cf | ||
![]() |
999329cf6b | ||
![]() |
c6ab792990 | ||
![]() |
0db79d8181 | ||
![]() |
7e8b3f9439 | ||
![]() |
ac19c3ac80 | ||
![]() |
c4a451bcdd | ||
![]() |
5ad69d3d0e | ||
![]() |
32290307a4 | ||
![]() |
dab83a2597 | ||
![]() |
41920fc80e | ||
![]() |
9f6c03a006 | ||
![]() |
596b26606c | ||
![]() |
f20b505b46 | ||
![]() |
cfee2dfe83 | ||
![]() |
30a3a4c70f | ||
![]() |
a00a7e0cad | ||
![]() |
54558e0baa | ||
![]() |
7c52395479 | ||
![]() |
ea87ed8394 | ||
![]() |
d01e261a15 | ||
![]() |
79e4ccfc4b | ||
![]() |
06159135ef | ||
![]() |
4fb25ff5a3 | ||
![]() |
1b0a13f33c | ||
![]() |
27e5a4464d | ||
![]() |
545d6cb9d0 | ||
![]() |
006eea564d | ||
![]() |
281b8e3443 | ||
![]() |
c0c5134c57 | ||
![]() |
72a2c0a9ed | ||
![]() |
445db582a2 | ||
![]() |
6b116f0c03 | ||
![]() |
70d0d4f9be | ||
![]() |
6b315d96bc | ||
![]() |
25b1287323 | ||
![]() |
760c911299 | ||
![]() |
162bf9e10a | ||
![]() |
6beb1ac65b | ||
![]() |
3ae9c0f410 | ||
![]() |
e165f5641f | ||
![]() |
aee6feb02a | ||
![]() |
654b4f4ff2 | ||
![]() |
1df2596f81 | ||
![]() |
04d4a3b136 | ||
![]() |
392c467f95 | ||
![]() |
c5aa8f36bf | ||
![]() |
3748863070 | ||
![]() |
ca304beb15 | ||
![]() |
e789bb1aa4 | ||
![]() |
14f29f087e | ||
![]() |
b97fb2edac | ||
![]() |
28bab774a0 | ||
![]() |
8f493de9fb | ||
![]() |
207bc35d34 | ||
![]() |
955894e72f | ||
![]() |
287e50b56b | ||
![]() |
da762c4e32 | ||
![]() |
87a8bde777 | ||
![]() |
49fc0a567f | ||
![]() |
cc777dcaa0 | ||
![]() |
c785911870 | ||
![]() |
605e7b5e47 | ||
![]() |
8562218350 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.03.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.03.25**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2021.03.25
|
||||
[debug] youtube-dl version 2021.12.17
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.03.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.03.25**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.03.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.03.25**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.03.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.03.25**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2021.03.25
|
||||
[debug] youtube-dl version 2021.12.17
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.03.25. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.12.17. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.03.25**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.12.17**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
blank_issues_enabled: false
|
13
.github/workflows/ci.yml
vendored
13
.github/workflows/ci.yml
vendored
@@ -15,12 +15,12 @@ jobs:
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# python 3.2 is only available on windows via setup-python
|
||||
- os: windows-latest
|
||||
- os: windows-2019
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
- os: windows-2019
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: download
|
||||
@@ -49,11 +49,18 @@ jobs:
|
||||
- name: Install Jython
|
||||
if: ${{ matrix.python-impl == 'jython' }}
|
||||
run: |
|
||||
wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
||||
wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
||||
java -jar jython-installer.jar -s -d "$HOME/jython"
|
||||
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
||||
- name: Install nose
|
||||
if: ${{ matrix.python-impl != 'jython' }}
|
||||
run: pip install nose
|
||||
- name: Install nose (Jython)
|
||||
if: ${{ matrix.python-impl == 'jython' }}
|
||||
# Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
||||
run: |
|
||||
wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
|
||||
pip install nose-1.3.7-py2-none-any.whl
|
||||
- name: Run tests
|
||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
||||
env:
|
||||
|
@@ -150,7 +150,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). This makes the extractor available for use, as long as the class ends with `IE`.
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
157
ChangeLog
157
ChangeLog
@@ -1,3 +1,160 @@
|
||||
version 2021.12.17
|
||||
|
||||
Core
|
||||
* [postprocessor/ffmpeg] Show ffmpeg output on error (#22680, #29336)
|
||||
|
||||
Extractors
|
||||
* [youtube] Update signature function patterns (#30363, #30366)
|
||||
* [peertube] Only call description endpoint if necessary (#29383)
|
||||
* [periscope] Pass referer to HLS requests (#29419)
|
||||
- [liveleak] Remove extractor (#17625, #24222, #29331)
|
||||
+ [pornhub] Add support for pornhubthbh7ap3u.onion
|
||||
* [pornhub] Detect geo restriction
|
||||
* [pornhub] Dismiss tbr extracted from download URLs (#28927)
|
||||
* [curiositystream:collection] Extend _VALID_URL (#26326, #29117)
|
||||
* [youtube] Make get_video_info processing more robust (#29333)
|
||||
* [youtube] Workaround for get_video_info request (#29333)
|
||||
* [bilibili] Strip uploader name (#29202)
|
||||
* [youtube] Update invidious instance list (#29281)
|
||||
* [umg:de] Update GraphQL API URL (#29304)
|
||||
* [nrk] Switch psapi URL to https (#29344)
|
||||
+ [egghead] Add support for app.egghead.io (#28404, #29303)
|
||||
* [appleconnect] Fix extraction (#29208)
|
||||
+ [orf:tvthek] Add support for MPD formats (#28672, #29236)
|
||||
|
||||
|
||||
version 2021.06.06
|
||||
|
||||
Extractors
|
||||
* [facebook] Improve login required detection
|
||||
* [youporn] Fix formats and view count extraction (#29216)
|
||||
* [orf:tvthek] Fix thumbnails extraction (#29217)
|
||||
* [formula1] Fix extraction (#29206)
|
||||
* [ard] Relax URL regular expression and fix video ids (#22724, #29091)
|
||||
+ [ustream] Detect https embeds (#29133)
|
||||
* [ted] Prefer own formats over external sources (#29142)
|
||||
* [twitch:clips] Improve extraction (#29149)
|
||||
+ [twitch:clips] Add access token query to download URLs (#29136)
|
||||
* [youtube] Fix get_video_info request (#29086, #29165)
|
||||
* [vimeo] Fix vimeo pro embed extraction (#29126)
|
||||
* [redbulltv] Fix embed data extraction (#28770)
|
||||
* [shahid] Relax URL regular expression (#28772, #28930)
|
||||
|
||||
|
||||
version 2021.05.16
|
||||
|
||||
Core
|
||||
* [options] Fix thumbnail option group name (#29042)
|
||||
* [YoutubeDL] Improve extract_info doc (#28946)
|
||||
|
||||
Extractors
|
||||
+ [playstuff] Add support for play.stuff.co.nz (#28901, #28931)
|
||||
* [eroprofile] Fix extraction (#23200, #23626, #29008)
|
||||
+ [vivo] Add support for vivo.st (#29009)
|
||||
+ [generic] Add support for og:audio (#28311, #29015)
|
||||
* [phoenix] Fix extraction (#29057)
|
||||
+ [generic] Add support for sibnet embeds
|
||||
+ [vk] Add support for sibnet embeds (#9500)
|
||||
+ [generic] Add Referer header for direct videojs download URLs (#2879,
|
||||
#20217, #29053)
|
||||
* [orf:radio] Switch download URLs to HTTPS (#29012, #29046)
|
||||
- [blinkx] Remove extractor (#28941)
|
||||
* [medaltv] Relax URL regular expression (#28884)
|
||||
+ [funimation] Add support for optional lang code in URLs (#28950)
|
||||
+ [gdcvault] Add support for HTML5 videos
|
||||
* [dispeak] Improve FLV extraction (#13513, #28970)
|
||||
* [kaltura] Improve iframe extraction (#28969)
|
||||
* [kaltura] Make embed code alternatives actually work
|
||||
* [cda] Improve extraction (#28709, #28937)
|
||||
* [twitter] Improve formats extraction from vmap URL (#28909)
|
||||
* [xtube] Fix formats extraction (#28870)
|
||||
* [svtplay] Improve extraction (#28507, #28876)
|
||||
* [tv2dk] Fix extraction (#28888)
|
||||
|
||||
|
||||
version 2021.04.26
|
||||
|
||||
Extractors
|
||||
+ [xfileshare] Add support for wolfstream.tv (#28858)
|
||||
* [francetvinfo] Improve video id extraction (#28792)
|
||||
* [medaltv] Fix extraction (#28807)
|
||||
* [tver] Redirect all downloads to Brightcove (#28849)
|
||||
* [go] Improve video id extraction (#25207, #25216, #26058)
|
||||
* [youtube] Fix lazy extractors (#28780)
|
||||
+ [bbc] Extract description and timestamp from __INITIAL_DATA__ (#28774)
|
||||
* [cbsnews] Fix extraction for python <3.6 (#23359)
|
||||
|
||||
|
||||
version 2021.04.17
|
||||
|
||||
Core
|
||||
+ [utils] Add support for experimental HTTP response status code
|
||||
308 Permanent Redirect (#27877, #28768)
|
||||
|
||||
Extractors
|
||||
+ [lbry] Add support for HLS videos (#27877, #28768)
|
||||
* [youtube] Fix stretched ratio calculation
|
||||
* [youtube] Improve stretch extraction (#28769)
|
||||
* [youtube:tab] Improve grid extraction (#28725)
|
||||
+ [youtube:tab] Detect series playlist on playlists page (#28723)
|
||||
+ [youtube] Add more invidious instances (#28706)
|
||||
* [pluralsight] Extend anti-throttling timeout (#28712)
|
||||
* [youtube] Improve URL to extractor routing (#27572, #28335, #28742)
|
||||
+ [maoritv] Add support for maoritelevision.com (#24552)
|
||||
+ [youtube:tab] Pass innertube context and x-goog-visitor-id header along with
|
||||
continuation requests (#28702)
|
||||
* [mtv] Fix Viacom A/B Testing Video Player extraction (#28703)
|
||||
+ [pornhub] Extract DASH and HLS formats from get_media end point (#28698)
|
||||
* [cbssports] Fix extraction (#28682)
|
||||
* [jamendo] Fix track extraction (#28686)
|
||||
* [curiositystream] Fix format extraction (#26845, #28668)
|
||||
|
||||
|
||||
version 2021.04.07
|
||||
|
||||
Core
|
||||
* [extractor/common] Use compat_cookies_SimpleCookie for _get_cookies
|
||||
+ [compat] Introduce compat_cookies_SimpleCookie
|
||||
* [extractor/common] Improve JSON-LD author extraction
|
||||
* [extractor/common] Fix _get_cookies on python 2 (#20673, #23256, #20326,
|
||||
#28640)
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix extraction of videos with restricted location (#28685)
|
||||
+ [line] Add support for live.line.me (#17205, #28658)
|
||||
* [vimeo] Improve extraction (#28591)
|
||||
* [youku] Update ccode (#17852, #28447, #28460, #28648)
|
||||
* [youtube] Prefer direct entry metadata over entry metadata from playlist
|
||||
(#28619, #28636)
|
||||
* [screencastomatic] Fix extraction (#11976, #24489)
|
||||
+ [palcomp3] Add support for palcomp3.com (#13120)
|
||||
+ [arnes] Add support for video.arnes.si (#28483)
|
||||
+ [youtube:tab] Add support for hashtags (#28308)
|
||||
|
||||
|
||||
version 2021.04.01
|
||||
|
||||
Extractors
|
||||
* [youtube] Setup CONSENT cookie when needed (#28604)
|
||||
* [vimeo] Fix password protected review extraction (#27591)
|
||||
* [youtube] Improve age-restricted video extraction (#28578)
|
||||
|
||||
|
||||
version 2021.03.31
|
||||
|
||||
Extractors
|
||||
* [vlive] Fix inkey request (#28589)
|
||||
* [francetvinfo] Improve video id extraction (#28584)
|
||||
+ [instagram] Extract duration (#28469)
|
||||
* [instagram] Improve title extraction (#28469)
|
||||
+ [sbs] Add support for ondemand watch URLs (#28566)
|
||||
* [youtube] Fix video's channel extraction (#28562)
|
||||
* [picarto] Fix live stream extraction (#28532)
|
||||
* [vimeo] Fix unlisted video extraction (#28414)
|
||||
* [youtube:tab] Fix playlist/community continuation items extraction (#28266)
|
||||
* [ard] Improve clip id extraction (#22724, #28528)
|
||||
|
||||
|
||||
version 2021.03.25
|
||||
|
||||
Extractors
|
||||
|
12
README.md
12
README.md
@@ -287,7 +287,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Thumbnail images:
|
||||
## Thumbnail Options:
|
||||
--write-thumbnail Write thumbnail image to disk
|
||||
--write-all-thumbnails Write all thumbnail image formats to
|
||||
disk
|
||||
@@ -893,7 +893,7 @@ Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) yout
|
||||
|
||||
### The exe throws an error due to missing `MSVCR100.dll`
|
||||
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2010 Service Pack 1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe).
|
||||
|
||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||
|
||||
@@ -1069,9 +1069,11 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
|
||||
* the test names use the extractor class name **without the trailing `IE`**
|
||||
* tests with `only_matching` key in test's dict are not counted.
|
||||
8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
|
@@ -3,6 +3,7 @@
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **23video**
|
||||
- **247sports**
|
||||
- **24video**
|
||||
- **3qsdn**: 3Q SDN
|
||||
- **3sat**
|
||||
@@ -118,7 +119,6 @@
|
||||
- **BitChuteChannel**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BongaCams**
|
||||
@@ -160,7 +160,8 @@
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **cbssports**
|
||||
- **cbssports:embed**
|
||||
- **CCMA**
|
||||
- **CCTV**: 央视网
|
||||
- **CDA**
|
||||
@@ -463,14 +464,14 @@
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineLive**
|
||||
- **LineLiveChannel**
|
||||
- **LineTV**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
- **LinuxAcademy**
|
||||
- **LiTV**
|
||||
- **LiveJournal**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
@@ -488,6 +489,7 @@
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
- **MaoriTV**
|
||||
- **Markiza**
|
||||
- **MarkizaPage**
|
||||
- **massengeschmack.tv**
|
||||
@@ -679,6 +681,9 @@
|
||||
- **OutsideTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PalcoMP3:artist**
|
||||
- **PalcoMP3:song**
|
||||
- **PalcoMP3:video**
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **ParamountNetwork**
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
@@ -705,6 +710,7 @@
|
||||
- **play.fm**
|
||||
- **player.sky.it**
|
||||
- **PlayPlusTV**
|
||||
- **PlayStuff**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
@@ -1059,6 +1065,7 @@
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.arnes.si**: Arnes Video
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.sky.it**
|
||||
- **video.sky.it:live**
|
||||
@@ -1153,7 +1160,7 @@
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
|
||||
- **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XHamsterUser**
|
||||
|
@@ -128,6 +128,12 @@ def expect_value(self, got, expected, field):
|
||||
self.assertTrue(
|
||||
contains_str in got,
|
||||
'field %s (value: %r) should contain %r' % (field, got, contains_str))
|
||||
elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
|
||||
fn = eval(expected)
|
||||
suite = expected.split(':', 1)[1].strip()
|
||||
self.assertTrue(
|
||||
fn(got),
|
||||
'Expected field %s to meet condition %s, but value %r failed ' % (field, suite, got))
|
||||
elif isinstance(expected, type):
|
||||
self.assertTrue(
|
||||
isinstance(got, expected),
|
||||
@@ -137,7 +143,7 @@ def expect_value(self, got, expected, field):
|
||||
elif isinstance(expected, list) and isinstance(got, list):
|
||||
self.assertEqual(
|
||||
len(expected), len(got),
|
||||
'Expect a list of length %d, but got a list of length %d for field %s' % (
|
||||
'Expected a list of length %d, but got a list of length %d for field %s' % (
|
||||
len(expected), len(got), field))
|
||||
for index, (item_got, item_expected) in enumerate(zip(got, expected)):
|
||||
type_got = type(item_got)
|
||||
|
@@ -18,7 +18,6 @@
|
||||
"noprogress": false,
|
||||
"outtmpl": "%(id)s.%(ext)s",
|
||||
"password": null,
|
||||
"playlistend": -1,
|
||||
"playliststart": 1,
|
||||
"prefer_free_formats": false,
|
||||
"quiet": false,
|
||||
|
@@ -997,6 +997,25 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
self.assertEqual(downloaded['extractor'], 'Video')
|
||||
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||
|
||||
def test_default_times(self):
|
||||
"""Test addition of missing upload/release/_date from /release_/timestamp"""
|
||||
info = {
|
||||
'id': '1234',
|
||||
'url': TEST_URL,
|
||||
'title': 'Title',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1631352900,
|
||||
'release_timestamp': 1632995931,
|
||||
}
|
||||
|
||||
params = {'simulate': True, }
|
||||
ydl = FakeYDL(params)
|
||||
out_info = ydl.process_ie_result(info)
|
||||
self.assertTrue(isinstance(out_info['upload_date'], compat_str))
|
||||
self.assertEqual(out_info['upload_date'], '20210911')
|
||||
self.assertTrue(isinstance(out_info['release_date'], compat_str))
|
||||
self.assertEqual(out_info['release_date'], '20210930')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -8,7 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
|
||||
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text, aes_ecb_encrypt
|
||||
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||
import base64
|
||||
|
||||
@@ -58,6 +58,13 @@ class TestAES(unittest.TestCase):
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
def test_ecb_encrypt(self):
|
||||
data = bytes_to_intlist(self.secret_msg)
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
|
||||
self.assertEqual(
|
||||
encrypted,
|
||||
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -66,18 +66,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
|
||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
|
||||
|
||||
# def test_youtube_search_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
def test_youtube_search_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
|
||||
def test_facebook_matching(self):
|
||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
|
@@ -3,17 +3,18 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import shutil
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import shutil
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.cache import Cache
|
||||
from youtube_dl.utils import version_tuple
|
||||
from youtube_dl.version import __version__
|
||||
|
||||
|
||||
def _is_empty(d):
|
||||
@@ -54,6 +55,17 @@ class TestCache(unittest.TestCase):
|
||||
self.assertFalse(os.path.exists(self.test_dir))
|
||||
self.assertEqual(c.load('test_cache', 'k.'), None)
|
||||
|
||||
def test_cache_validation(self):
|
||||
ydl = FakeYDL({
|
||||
'cachedir': self.test_dir,
|
||||
})
|
||||
c = Cache(ydl)
|
||||
obj = {'x': 1, 'y': ['ä', '\\a', True]}
|
||||
c.store('test_cache', 'k.', obj)
|
||||
self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
|
||||
new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
|
||||
self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_casefold,
|
||||
compat_getenv,
|
||||
compat_setenv,
|
||||
compat_etree_Element,
|
||||
@@ -118,9 +119,21 @@ class TestCompat(unittest.TestCase):
|
||||
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
|
||||
compat_etree_fromstring(xml)
|
||||
|
||||
def test_struct_unpack(self):
|
||||
def test_compat_struct_unpack(self):
|
||||
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
|
||||
|
||||
def test_compat_casefold(self):
|
||||
if hasattr(compat_str, 'casefold'):
|
||||
# don't bother to test str.casefold() (again)
|
||||
return
|
||||
# thanks https://bugs.python.org/file24232/casefolding.patch
|
||||
self.assertEqual(compat_casefold('hello'), 'hello')
|
||||
self.assertEqual(compat_casefold('hELlo'), 'hello')
|
||||
self.assertEqual(compat_casefold('ß'), 'ss')
|
||||
self.assertEqual(compat_casefold('fi'), 'fi')
|
||||
self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
|
||||
self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -33,6 +33,7 @@ from youtube_dl.compat import (
|
||||
from youtube_dl.utils import (
|
||||
DownloadError,
|
||||
ExtractorError,
|
||||
error_to_compat_str,
|
||||
format_bytes,
|
||||
UnavailableVideoError,
|
||||
)
|
||||
@@ -100,27 +101,28 @@ def generator(test_case, tname):
|
||||
|
||||
def print_skipping(reason):
|
||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||
self.skipTest(reason)
|
||||
|
||||
if not ie.working():
|
||||
print_skipping('IE marked as not _WORKING')
|
||||
return
|
||||
|
||||
for tc in test_cases:
|
||||
info_dict = tc.get('info_dict', {})
|
||||
if not (info_dict.get('id') and info_dict.get('ext')):
|
||||
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||
raise Exception('Test definition (%s) requires both \'id\' and \'ext\' keys present to define the output file' % (tname, ))
|
||||
|
||||
if 'skip' in test_case:
|
||||
print_skipping(test_case['skip'])
|
||||
return
|
||||
|
||||
for other_ie in other_ies:
|
||||
if not other_ie.working():
|
||||
print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
|
||||
return
|
||||
|
||||
params = get_params(test_case.get('params', {}))
|
||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
params.setdefault('playlistend', test_case.get('playlist_mincount'))
|
||||
params.setdefault('skip_download', True)
|
||||
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
@@ -160,7 +162,9 @@ def generator(test_case, tname):
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
raise
|
||||
msg = getattr(err, 'msg', error_to_compat_str(err))
|
||||
err.msg = '%s (%s)' % (msg, tname, )
|
||||
raise err
|
||||
|
||||
if try_num == RETRIES:
|
||||
report_warning('%s failed due to network errors, skipping...' % tname)
|
||||
|
@@ -39,6 +39,16 @@ class TestExecution(unittest.TestCase):
|
||||
_, stderr = p.communicate()
|
||||
self.assertFalse(stderr)
|
||||
|
||||
def test_lazy_extractors(self):
|
||||
try:
|
||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
finally:
|
||||
try:
|
||||
os.remove('youtube_dl/extractor/lazy_extractors.py')
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -8,7 +8,12 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.jsinterp import JSInterpreter
|
||||
import math
|
||||
import re
|
||||
|
||||
from youtube_dl.compat import compat_re_Pattern
|
||||
|
||||
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
||||
|
||||
|
||||
class TestJSInterpreter(unittest.TestCase):
|
||||
@@ -19,6 +24,9 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function x3(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x3'), 42)
|
||||
|
||||
jsi = JSInterpreter('function x3(){42}')
|
||||
self.assertEqual(jsi.call_function('x3'), None)
|
||||
|
||||
jsi = JSInterpreter('var x5 = function(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x5'), 42)
|
||||
|
||||
@@ -45,14 +53,32 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function f(){return 1 << 5;}')
|
||||
self.assertEqual(jsi.call_function('f'), 32)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 2 ** 5}')
|
||||
self.assertEqual(jsi.call_function('f'), 32)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 19 & 21;}')
|
||||
self.assertEqual(jsi.call_function('f'), 17)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 11 >> 2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 2)
|
||||
|
||||
jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
|
||||
self.assertEqual(jsi.call_function('f'), 5)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 1 == 2}')
|
||||
self.assertEqual(jsi.call_function('f'), False)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 2)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
|
||||
self.assertEqual(jsi.call_function('f'), 0)
|
||||
|
||||
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
|
||||
self.assertFalse(jsi.call_function('f'))
|
||||
|
||||
def test_array_access(self):
|
||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
|
||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
||||
|
||||
def test_parens(self):
|
||||
@@ -62,6 +88,10 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
|
||||
self.assertEqual(jsi.call_function('f'), 9)
|
||||
|
||||
def test_quotes(self):
|
||||
jsi = JSInterpreter(r'function f(){return "a\"\\("}')
|
||||
self.assertEqual(jsi.call_function('f'), r'a"\(')
|
||||
|
||||
def test_assignments(self):
|
||||
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), 31)
|
||||
@@ -104,13 +134,277 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}''')
|
||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||
|
||||
def test_builtins(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return NaN }
|
||||
''')
|
||||
self.assertTrue(math.isnan(jsi.call_function('x')))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 86000)
|
||||
jsi = JSInterpreter('''
|
||||
function x(dt) { return new Date(dt) - 0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
||||
|
||||
def test_call(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return 2; }
|
||||
function y(a) { return x() + a; }
|
||||
function y(a) { return x() + (a?a:0); }
|
||||
function z() { return y(3); }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('z'), 5)
|
||||
self.assertEqual(jsi.call_function('y'), 2)
|
||||
|
||||
def test_for_loop(self):
|
||||
# function x() { a=0; for (i=0; i-10; i++) {a++} a }
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) {a++} return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 10)
|
||||
|
||||
def test_switch(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x(f) { switch(f){
|
||||
case 1:f+=1;
|
||||
case 2:f+=2;
|
||||
case 3:f+=3;break;
|
||||
case 4:f+=4;
|
||||
default:f=0;
|
||||
} return f }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x', 1), 7)
|
||||
self.assertEqual(jsi.call_function('x', 3), 6)
|
||||
self.assertEqual(jsi.call_function('x', 5), 0)
|
||||
|
||||
def test_switch_default(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x(f) { switch(f){
|
||||
case 2: f+=2;
|
||||
default: f-=1;
|
||||
case 5:
|
||||
case 6: f+=6;
|
||||
case 0: break;
|
||||
case 1: f+=1;
|
||||
} return f }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x', 1), 2)
|
||||
self.assertEqual(jsi.call_function('x', 5), 11)
|
||||
self.assertEqual(jsi.call_function('x', 9), 14)
|
||||
|
||||
def test_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{return 10} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 10)
|
||||
|
||||
def test_catch(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_finally(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} finally {return 42} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_nested_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {try {
|
||||
try{throw 10} finally {throw 42}
|
||||
} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_for_loop_continue(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 0)
|
||||
|
||||
def test_for_loop_break(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 0)
|
||||
|
||||
def test_for_loop_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
||||
return 42 }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_literal_list(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
|
||||
|
||||
def test_comma(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=5; a -= 1, a+=3; return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 7)
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=5; return (a -= 1, a+=3, a); }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 7)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_void(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return void 42; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), None)
|
||||
|
||||
def test_return_function(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [1, function(){return 1}][1] }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x')([]), 1)
|
||||
|
||||
def test_null(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return null; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), None)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [null > 0, null < 0, null == 0, null === 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [null >= 0, null <= 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [True, True])
|
||||
|
||||
def test_undefined(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return undefined === undefined; }
|
||||
''')
|
||||
self.assertTrue(jsi.call_function('x'))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return undefined; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return v; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [True, True, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined >= 0, undefined <= 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, True, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, True, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
||||
''')
|
||||
for y in jsi.call_function('x'):
|
||||
self.assertTrue(math.isnan(y))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return v**0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 1)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
|
||||
|
||||
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_object(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return {}; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), {})
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [42, 0])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a; return a?.qq; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
def test_regex(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), None)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/; return a; }
|
||||
''')
|
||||
self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
||||
|
||||
def test_char_code_at(self):
|
||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
||||
self.assertEqual(jsi.call_function('x', 1), 101)
|
||||
self.assertEqual(jsi.call_function('x', 2), 115)
|
||||
self.assertEqual(jsi.call_function('x', 3), 116)
|
||||
self.assertEqual(jsi.call_function('x', 4), None)
|
||||
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
|
||||
|
||||
def test_bitwise_operators_overflow(self):
|
||||
jsi = JSInterpreter('function x(){return -524999584 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 379882496)
|
||||
|
||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -38,6 +38,9 @@ class BaseTestSubtitles(unittest.TestCase):
|
||||
self.DL = FakeYDL()
|
||||
self.ie = self.IE()
|
||||
self.DL.add_info_extractor(self.ie)
|
||||
if not self.IE.working():
|
||||
print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
|
||||
self.skipTest('IE marked as not _WORKING')
|
||||
|
||||
def getInfoDict(self):
|
||||
info_dict = self.DL.extract_info(self.url, download=False)
|
||||
@@ -56,6 +59,21 @@ class BaseTestSubtitles(unittest.TestCase):
|
||||
|
||||
|
||||
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
# Available subtitles for QRS8MkLhQmM:
|
||||
# Language formats
|
||||
# ru vtt, ttml, srv3, srv2, srv1, json3
|
||||
# fr vtt, ttml, srv3, srv2, srv1, json3
|
||||
# en vtt, ttml, srv3, srv2, srv1, json3
|
||||
# nl vtt, ttml, srv3, srv2, srv1, json3
|
||||
# de vtt, ttml, srv3, srv2, srv1, json3
|
||||
# ko vtt, ttml, srv3, srv2, srv1, json3
|
||||
# it vtt, ttml, srv3, srv2, srv1, json3
|
||||
# zh-Hant vtt, ttml, srv3, srv2, srv1, json3
|
||||
# hi vtt, ttml, srv3, srv2, srv1, json3
|
||||
# pt-BR vtt, ttml, srv3, srv2, srv1, json3
|
||||
# es-MX vtt, ttml, srv3, srv2, srv1, json3
|
||||
# ja vtt, ttml, srv3, srv2, srv1, json3
|
||||
# pl vtt, ttml, srv3, srv2, srv1, json3
|
||||
url = 'QRS8MkLhQmM'
|
||||
IE = YoutubeIE
|
||||
|
||||
@@ -64,41 +82,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||
self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
|
||||
self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
|
||||
self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
|
||||
for lang in ['fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
def test_youtube_subtitles_ttml_format(self):
|
||||
def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'ttml'
|
||||
self.DL.params['subtitlesformat'] = fmt
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
|
||||
self.assertEqual(md5(subtitles[lang]), md5_hash)
|
||||
|
||||
def test_youtube_subtitles_ttml_format(self):
|
||||
self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
|
||||
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'vtt'
|
||||
self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
|
||||
|
||||
def test_youtube_subtitles_json3_format(self):
|
||||
self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
|
||||
|
||||
def _test_automatic_captions(self, url, lang):
|
||||
self.url = url
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslangs'] = [lang]
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||
self.assertTrue(subtitles[lang] is not None)
|
||||
|
||||
def test_youtube_automatic_captions(self):
|
||||
self.url = '8YoUxe5ncPo'
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(subtitles['it'] is not None)
|
||||
# Available automatic captions for 8YoUxe5ncPo:
|
||||
# Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
|
||||
# gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
|
||||
# lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
|
||||
# el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
|
||||
# bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
|
||||
# hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
|
||||
# mt, ms, mr, ug, ta, my, af, sw, is, am,
|
||||
# *it*, iw, sv, ar,
|
||||
# su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
|
||||
# ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
|
||||
# ky, sd
|
||||
# ...
|
||||
self._test_automatic_captions('8YoUxe5ncPo', 'it')
|
||||
|
||||
@unittest.skip('ASR subs all in all supported langs now')
|
||||
def test_youtube_translated_subtitles(self):
|
||||
# This video has a subtitles track, which can be translated
|
||||
self.url = 'Ky9eprVWzlI'
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(subtitles['it'] is not None)
|
||||
# This video has a subtitles track, which can be translated (#4555)
|
||||
self._test_automatic_captions('Ky9eprVWzlI', 'it')
|
||||
|
||||
def test_youtube_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
self.url = 'n5BB19UTcdA'
|
||||
# Available automatic captions for 8YoUxe5ncPo:
|
||||
# ...
|
||||
# 8YoUxe5ncPo has no subtitles
|
||||
self.url = '8YoUxe5ncPo'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
@@ -128,6 +165,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@unittest.skip('IE broken')
|
||||
class TestTedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||
IE = TEDIE
|
||||
@@ -152,18 +190,19 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||
self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
|
||||
self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
self.url = 'http://vimeo.com/56015672'
|
||||
self.url = 'http://vimeo.com/68093876'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@unittest.skip('IE broken')
|
||||
class TestWallaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
||||
IE = WallaIE
|
||||
@@ -185,6 +224,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@unittest.skip('IE broken')
|
||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||
IE = CeskaTelevizeIE
|
||||
@@ -206,6 +246,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
@unittest.skip('IE broken')
|
||||
class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
|
||||
IE = LyndaIE
|
||||
@@ -218,6 +259,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||
|
||||
|
||||
@unittest.skip('IE broken')
|
||||
class TestNPOSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
||||
IE = NPOIE
|
||||
@@ -230,6 +272,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
|
||||
|
||||
|
||||
@unittest.skip('IE broken')
|
||||
class TestMTVSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
|
||||
IE = ComedyCentralIE
|
||||
@@ -253,8 +296,8 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['no']))
|
||||
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
|
||||
self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
|
||||
self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
|
||||
|
||||
|
||||
class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
@@ -277,6 +320,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
|
||||
|
||||
|
||||
@unittest.skip('IE broken - DRM only')
|
||||
class TestVikiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||
IE = VikiIE
|
||||
@@ -303,6 +347,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||
|
||||
|
||||
@unittest.skip('IE broken')
|
||||
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
|
||||
IE = ThePlatformFeedIE
|
||||
@@ -338,7 +383,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
|
||||
self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
|
||||
|
||||
def test_subtitles_in_page(self):
|
||||
self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
|
||||
@@ -346,7 +391,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
|
||||
self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -12,7 +12,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Various small unit tests
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from youtube_dl.utils import (
|
||||
@@ -40,11 +42,14 @@ from youtube_dl.utils import (
|
||||
get_element_by_attribute,
|
||||
get_elements_by_class,
|
||||
get_elements_by_attribute,
|
||||
get_first,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
LazyList,
|
||||
limit_length,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -79,6 +84,8 @@ from youtube_dl.utils import (
|
||||
strip_or_none,
|
||||
subtitles_filename,
|
||||
timeconvert,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -92,6 +99,7 @@ from youtube_dl.utils import (
|
||||
urlencode_postdata,
|
||||
urshift,
|
||||
update_url_query,
|
||||
variadic,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
@@ -112,12 +120,18 @@ from youtube_dl.compat import (
|
||||
compat_getenv,
|
||||
compat_os_name,
|
||||
compat_setenv,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
|
||||
|
||||
class TestUtil(unittest.TestCase):
|
||||
|
||||
# yt-dlp shim
|
||||
def assertCountEqual(self, expected, got, msg='count should be the same'):
|
||||
return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
|
||||
|
||||
def test_timeconvert(self):
|
||||
self.assertTrue(timeconvert('') is None)
|
||||
self.assertTrue(timeconvert('bougrg') is None)
|
||||
@@ -370,6 +384,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
||||
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
||||
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
|
||||
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
|
||||
self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -1475,6 +1492,315 @@ Line 1
|
||||
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||
|
||||
def test_LazyList(self):
|
||||
it = list(range(10))
|
||||
|
||||
self.assertEqual(list(LazyList(it)), it)
|
||||
self.assertEqual(LazyList(it).exhaust(), it)
|
||||
self.assertEqual(LazyList(it)[5], it[5])
|
||||
|
||||
self.assertEqual(LazyList(it)[5:], it[5:])
|
||||
self.assertEqual(LazyList(it)[:5], it[:5])
|
||||
self.assertEqual(LazyList(it)[::2], it[::2])
|
||||
self.assertEqual(LazyList(it)[1::2], it[1::2])
|
||||
self.assertEqual(LazyList(it)[5::-1], it[5::-1])
|
||||
self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
|
||||
self.assertEqual(LazyList(it)[::-1], it[::-1])
|
||||
|
||||
self.assertTrue(LazyList(it))
|
||||
self.assertFalse(LazyList(range(0)))
|
||||
self.assertEqual(len(LazyList(it)), len(it))
|
||||
self.assertEqual(repr(LazyList(it)), repr(it))
|
||||
self.assertEqual(compat_str(LazyList(it)), compat_str(it))
|
||||
|
||||
self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
|
||||
self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
|
||||
self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
|
||||
|
||||
def test_LazyList_laziness(self):
|
||||
|
||||
def test(ll, idx, val, cache):
|
||||
self.assertEqual(ll[idx], val)
|
||||
self.assertEqual(ll._cache, list(cache))
|
||||
|
||||
ll = LazyList(range(10))
|
||||
test(ll, 0, 0, range(1))
|
||||
test(ll, 5, 5, range(6))
|
||||
test(ll, -3, 7, range(10))
|
||||
|
||||
ll = LazyList(range(10), reverse=True)
|
||||
test(ll, -1, 0, range(1))
|
||||
test(ll, 3, 6, range(10))
|
||||
|
||||
ll = LazyList(itertools.count())
|
||||
test(ll, 10, 10, range(11))
|
||||
ll = reversed(ll)
|
||||
test(ll, -15, 14, range(15))
|
||||
|
||||
def test_try_call(self):
|
||||
def total(*x, **kwargs):
|
||||
return sum(x) + sum(kwargs.values())
|
||||
|
||||
self.assertEqual(try_call(None), None,
|
||||
msg='not a fn should give None')
|
||||
self.assertEqual(try_call(lambda: 1), 1,
|
||||
msg='int fn with no expected_type should give int')
|
||||
self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
|
||||
msg='int fn with expected_type int should give int')
|
||||
self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
|
||||
msg='int fn with wrong expected_type should give None')
|
||||
self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
|
||||
msg='fn should accept arglist')
|
||||
self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
|
||||
msg='fn should accept kwargs')
|
||||
self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
|
||||
msg='int fn with no expected_type should give None')
|
||||
self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
|
||||
msg='expect first int result with expected_type int')
|
||||
|
||||
def test_variadic(self):
|
||||
self.assertEqual(variadic(None), (None, ))
|
||||
self.assertEqual(variadic('spam'), ('spam', ))
|
||||
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
||||
|
||||
def test_traverse_obj(self):
|
||||
_TEST_DATA = {
|
||||
100: 100,
|
||||
1.2: 1.2,
|
||||
'str': 'str',
|
||||
'None': None,
|
||||
'...': Ellipsis,
|
||||
'urls': [
|
||||
{'index': 0, 'url': 'https://www.example.com/0'},
|
||||
{'index': 1, 'url': 'https://www.example.com/1'},
|
||||
],
|
||||
'data': (
|
||||
{'index': 2},
|
||||
{'index': 3},
|
||||
),
|
||||
'dict': {},
|
||||
}
|
||||
|
||||
# Test base functionality
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
||||
msg='allow tuple path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
||||
msg='allow list path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
||||
msg='allow iterable path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
||||
msg='single items should be treated as a path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
||||
|
||||
# Test Ellipsis behavior
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
||||
(item for item in _TEST_DATA.values() if item is not None),
|
||||
msg='`...` should give all values except `None`')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
||||
msg='`...` selection for dicts should select all values')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='nested `...` queries should work')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
|
||||
msg='`...` query result should be flattened')
|
||||
|
||||
# Test function as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||
[_TEST_DATA['urls']],
|
||||
msg='function as query key should perform a filter based on (key, value)')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
|
||||
msg='exceptions in the query function should be caught')
|
||||
|
||||
# Test alternative paths
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||
msg='multiple `paths` should be treated as alternative paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
||||
msg='alternatives should exit early')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
||||
msg='alternatives should return `default` if exhausted')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
|
||||
msg='alternatives should track their own branching return')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
|
||||
msg='alternatives on empty objects should search further')
|
||||
|
||||
# Test branch and path nesting
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
||||
msg='tuple as key should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
||||
msg='list as key should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
||||
msg='double nesting in path should be treated as paths')
|
||||
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
||||
msg='do not fail early on branching')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='triple nesting in path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='ellipsis as branch path start gets flattened')
|
||||
|
||||
# Test dictionary as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
||||
msg='dict key should result in a dict with the same keys')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
||||
{0: 'https://www.example.com/0'},
|
||||
msg='dict key should allow paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
||||
{0: ['https://www.example.com/0']},
|
||||
msg='tuple in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/0']},
|
||||
msg='double nesting in dict path should be treated as paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
||||
msg='triple nesting in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
||||
msg='remove `None` values when dict key')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
||||
msg='do not remove `None` values if `default`')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
|
||||
msg='do not remove empty values when dict key')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
|
||||
msg='do not remove empty values when dict key and a default')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
|
||||
msg='if branch in dict key not successful, return `[]`')
|
||||
|
||||
# Testing default parameter behavior
|
||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
||||
msg='default value should be `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
|
||||
msg='chained fails should result in default')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
||||
msg='should not short cirquit on `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
||||
msg='invalid dict key should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
||||
msg='`None` is a deliberate sentinel and should become `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
||||
msg='`IndexError` should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
|
||||
msg='if branched but not successful return `default` if defined, not `[]`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
|
||||
msg='if branched but not successful return `default` even if `default` is `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
|
||||
msg='if branched but not successful return `[]`, not `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
|
||||
msg='if branched but object is empty return `[]`, not `default`')
|
||||
|
||||
# Testing expected_type behavior
|
||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
|
||||
msg='accept matching `expected_type` type')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
|
||||
msg='reject non matching `expected_type` type')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
|
||||
msg='transform type using type function')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
|
||||
expected_type=lambda _: 1 / 0), None,
|
||||
msg='wrap expected_type function in try_call')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
|
||||
msg='eliminate items that expected_type fails on')
|
||||
|
||||
# Test get_all behavior
|
||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
|
||||
msg='if not `get_all`, return only first matching value')
|
||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
|
||||
msg='do not overflatten if not `get_all`')
|
||||
|
||||
# Test casesense behavior
|
||||
_CASESENSE_DATA = {
|
||||
'KeY': 'value0',
|
||||
0: {
|
||||
'KeY': 'value1',
|
||||
0: {'KeY': 'value2'},
|
||||
},
|
||||
# FULLWIDTH LATIN CAPITAL LETTER K
|
||||
'\uff2bey': 'value3',
|
||||
}
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
||||
msg='dict keys should be case sensitive unless `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
||||
casesense=False), 'value0',
|
||||
msg='allow non matching key case if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
|
||||
casesense=False), 'value3',
|
||||
msg='allow non matching Unicode key case if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
||||
casesense=False), ['value1'],
|
||||
msg='allow non matching key case in branch if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
||||
casesense=False), ['value2'],
|
||||
msg='allow non matching key case in branch path if `casesense`')
|
||||
|
||||
# Test traverse_string behavior
|
||||
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
||||
msg='do not traverse into string if not `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
||||
_traverse_string=True), 's',
|
||||
msg='traverse into string if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
||||
_traverse_string=True), '.',
|
||||
msg='traverse into converted data if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
|
||||
_traverse_string=True), list('str'),
|
||||
msg='`...` branching into string should result in list')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||
_traverse_string=True), ['s', 'r'],
|
||||
msg='branching into string should result in list')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
|
||||
_traverse_string=True), list('str'),
|
||||
msg='function branching into string should result in list')
|
||||
|
||||
# Test is_user_input behavior
|
||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
||||
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
|
||||
_is_user_input=True), 3,
|
||||
msg='allow for string indexing if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
|
||||
_is_user_input=True), tuple(range(8))[3:],
|
||||
msg='allow for string slice if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
|
||||
_is_user_input=True), tuple(range(8))[:4:2],
|
||||
msg='allow step in string slice if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
|
||||
_is_user_input=True), range(8),
|
||||
msg='`:` should be treated as `...` if `is_user_input`')
|
||||
with self.assertRaises(TypeError, msg='too many params should result in error'):
|
||||
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
|
||||
|
||||
# Test re.Match as input obj
|
||||
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
|
||||
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
|
||||
msg='`...` on a `re.Match` should give its `groups()`')
|
||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give groupno, value starting at 0')
|
||||
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
||||
msg='str key on a `re.Match` should give group with that name')
|
||||
self.assertEqual(traverse_obj(mobj, 2), '3',
|
||||
msg='int key on a `re.Match` should give group with that name')
|
||||
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
||||
msg='str key on a `re.Match` should respect casesense')
|
||||
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
||||
msg='failing str key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
||||
msg='failing str key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, 8), None,
|
||||
msg='failing int key on a `re.Match` should return `default`')
|
||||
|
||||
def test_get_first(self):
|
||||
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
|
||||
|
||||
def test_join_nonempty(self):
|
||||
self.assertEqual(join_nonempty('a', 'b'), 'a-b')
|
||||
self.assertEqual(join_nonempty(
|
||||
'a', 'b', 'c', 'd',
|
||||
from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
@@ -9,11 +10,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
YoutubeIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,38 +25,23 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_youtube_playlist_noplaylist(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['noplaylist'] = True
|
||||
dl.params['format'] = 'best'
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertEqual(result['_type'], 'url')
|
||||
result = dl.extract_info(result['url'], download=False, ie_key=result.get('ie_key'), process=False)
|
||||
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
|
||||
def test_youtube_course(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = list(result['entries'])
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 50)
|
||||
dl.params['format'] = 'best'
|
||||
ie = YoutubeTabIE(dl)
|
||||
result = dl.extract_info('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8',
|
||||
download=False, ie_key=ie.ie_key(), process=True)
|
||||
entries = (result or {}).get('entries', [{'id': 'not_found', }])
|
||||
self.assertTrue(len(entries) >= 25)
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
print('Skipping: The playlist page gives error 500')
|
||||
return
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
self.assertEqual(original_video['id'], 'tyITL_exICo')
|
||||
|
||||
def test_youtube_flat_playlist_extraction(self):
|
||||
dl = FakeYDL()
|
||||
@@ -67,7 +52,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = list(result['entries'])
|
||||
self.assertTrue(len(entries) == 1)
|
||||
video = entries[0]
|
||||
self.assertEqual(video['_type'], 'url_transparent')
|
||||
self.assertEqual(video['_type'], 'url')
|
||||
self.assertEqual(video['ie_key'], 'Youtube')
|
||||
self.assertEqual(video['id'], 'BaW_jenozKc')
|
||||
self.assertEqual(video['url'], 'BaW_jenozKc')
|
||||
|
26
test/test_youtube_misc.py
Normal file
26
test/test_youtube_misc.py
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
|
||||
|
||||
class TestYoutubeMisc(unittest.TestCase):
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -12,11 +12,13 @@ import io
|
||||
import re
|
||||
import string
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||
|
||||
_TESTS = [
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.jsinterp import JSInterpreter
|
||||
|
||||
_SIG_TESTS = [
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
86,
|
||||
@@ -64,10 +66,82 @@ _TESTS = [
|
||||
)
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
(
|
||||
'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
|
||||
'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js',
|
||||
'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js',
|
||||
'oBo2h5euWy6osrUt', '3DIBbn3qdQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
|
||||
'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
|
||||
'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
|
||||
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
|
||||
'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
|
||||
'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
|
||||
'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
|
||||
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
|
||||
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
|
||||
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js',
|
||||
'5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
|
||||
'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
||||
'-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class TestPlayerInfo(unittest.TestCase):
|
||||
def test_youtube_extract_player_info(self):
|
||||
PLAYER_URLS = (
|
||||
('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
|
||||
@@ -90,40 +164,61 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
||||
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
|
||||
if not os.path.exists(self.TESTDATA_DIR):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
|
||||
def make_tfunc(url, sig_input, expected_sig):
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
|
||||
assert m, '%r should follow URL format' % url
|
||||
test_id = m.group(1)
|
||||
|
||||
def test_func(self):
|
||||
basename = 'player-%s.js' % test_id
|
||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||
|
||||
if not os.path.exists(fn):
|
||||
compat_urlretrieve(url, fn)
|
||||
|
||||
ydl = FakeYDL()
|
||||
ie = YoutubeIE(ydl)
|
||||
with io.open(fn, encoding='utf-8') as testf:
|
||||
jscode = testf.read()
|
||||
func = ie._parse_sig_js(jscode)
|
||||
src_sig = (
|
||||
compat_str(string.printable[:sig_input])
|
||||
if isinstance(sig_input, int) else sig_input)
|
||||
got_sig = func(src_sig)
|
||||
self.assertEqual(got_sig, expected_sig)
|
||||
|
||||
test_func.__name__ = str('test_signature_js_' + test_id)
|
||||
setattr(TestSignature, test_func.__name__, test_func)
|
||||
def tearDown(self):
|
||||
try:
|
||||
for f in os.listdir(self.TESTDATA_DIR):
|
||||
os.remove(f)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
for test_spec in _TESTS:
|
||||
make_tfunc(*test_spec)
|
||||
def t_factory(name, sig_func, url_pattern):
|
||||
def make_tfunc(url, sig_input, expected_sig):
|
||||
m = url_pattern.match(url)
|
||||
assert m, '%r should follow URL format' % url
|
||||
test_id = m.group('id')
|
||||
|
||||
def test_func(self):
|
||||
basename = 'player-{0}-{1}.js'.format(name, test_id)
|
||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||
|
||||
if not os.path.exists(fn):
|
||||
compat_urlretrieve(url, fn)
|
||||
with io.open(fn, encoding='utf-8') as testf:
|
||||
jscode = testf.read()
|
||||
self.assertEqual(sig_func(jscode, sig_input), expected_sig)
|
||||
|
||||
test_func.__name__ = str('test_{0}_js_{1}'.format(name, test_id))
|
||||
setattr(TestSignature, test_func.__name__, test_func)
|
||||
return make_tfunc
|
||||
|
||||
|
||||
def signature(jscode, sig_input):
|
||||
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
|
||||
src_sig = (
|
||||
compat_str(string.printable[:sig_input])
|
||||
if isinstance(sig_input, int) else sig_input)
|
||||
return func(src_sig)
|
||||
|
||||
|
||||
def n_sig(jscode, sig_input):
|
||||
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
|
||||
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
||||
|
||||
|
||||
make_sig_test = t_factory(
|
||||
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
|
||||
for test_spec in _SIG_TESTS:
|
||||
make_sig_test(*test_spec)
|
||||
|
||||
make_nsig_test = t_factory(
|
||||
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
|
||||
for test_spec in _NSIG_TESTS:
|
||||
make_nsig_test(*test_spec)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -73,6 +73,7 @@ from .utils import (
|
||||
PostProcessingError,
|
||||
preferredencoding,
|
||||
prepend_extension,
|
||||
process_communicate_or_kill,
|
||||
register_socks_protocols,
|
||||
render_table,
|
||||
replace_extension,
|
||||
@@ -86,6 +87,7 @@ from .utils import (
|
||||
subtitles_filename,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
variadic,
|
||||
version_tuple,
|
||||
write_json_file,
|
||||
write_string,
|
||||
@@ -720,7 +722,7 @@ class YoutubeDL(object):
|
||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||
return sanitize_path(filename)
|
||||
except ValueError as err:
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict, incomplete):
|
||||
@@ -773,11 +775,20 @@ class YoutubeDL(object):
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||
process=True, force_generic_extractor=False):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
"""
|
||||
Return a list with a dictionary for each video extracted.
|
||||
|
||||
Arguments:
|
||||
url -- URL to extract
|
||||
|
||||
Keyword arguments:
|
||||
download -- whether to download videos during extraction
|
||||
ie_key -- extractor key hint
|
||||
extra_info -- dictionary containing the extra values to add to each result
|
||||
process -- whether to resolve all unresolved references (URLs, playlist items),
|
||||
must be True for download to work.
|
||||
force_generic_extractor -- force using the generic extractor
|
||||
"""
|
||||
|
||||
if not ie_key and force_generic_extractor:
|
||||
ie_key = 'Generic'
|
||||
@@ -1288,57 +1299,46 @@ class YoutubeDL(object):
|
||||
format_spec = selector.selector
|
||||
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
if format_spec == 'all':
|
||||
for f in formats:
|
||||
yield f
|
||||
elif format_spec in ['best', 'worst', None]:
|
||||
format_idx = 0 if format_spec == 'worst' else -1
|
||||
|
||||
def best_worst(fmts, fmt_spec='best'):
|
||||
format_idx = 0 if fmt_spec == 'worst' else -1
|
||||
audiovideo_formats = [
|
||||
f for f in formats
|
||||
f for f in fmts
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
yield audiovideo_formats[format_idx]
|
||||
return audiovideo_formats[format_idx]
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) we will fallback to best/worst
|
||||
# {video,audio}-only format
|
||||
elif ctx['incomplete_formats']:
|
||||
yield formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
return fmts[format_idx]
|
||||
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
if format_spec == 'all':
|
||||
pass
|
||||
elif format_spec in ('best', 'worst', None):
|
||||
formats = best_worst(formats, format_spec)
|
||||
elif format_spec in ('bestaudio', 'worstaudio'):
|
||||
audio_formats = [
|
||||
f for f in formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
yield audio_formats[-1]
|
||||
elif format_spec == 'worstaudio':
|
||||
audio_formats = [
|
||||
f for f in formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
yield audio_formats[0]
|
||||
elif format_spec == 'bestvideo':
|
||||
formats = audio_formats[:1] if format_spec == 'worstaudio' else audio_formats[-1:]
|
||||
elif format_spec in ('bestvideo', 'worstvideo'):
|
||||
video_formats = [
|
||||
f for f in formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
yield video_formats[-1]
|
||||
elif format_spec == 'worstvideo':
|
||||
video_formats = [
|
||||
f for f in formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
yield video_formats[0]
|
||||
formats = video_formats[:1] if format_spec == 'worstvideo' else video_formats[-1:]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
filter_f = lambda f: f['format_id'] == format_spec
|
||||
matches = list(filter(filter_f, formats))
|
||||
if matches:
|
||||
yield matches[-1]
|
||||
formats = best_worst(list(filter(filter_f, formats)))
|
||||
for f in variadic(formats or []):
|
||||
yield f
|
||||
elif selector.type == MERGE:
|
||||
def _merge(formats_info):
|
||||
format_1, format_2 = [f['format_id'] for f in formats_info]
|
||||
@@ -1520,7 +1520,7 @@ class YoutubeDL(object):
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
@@ -1560,9 +1560,6 @@ class YoutubeDL(object):
|
||||
else:
|
||||
formats = info_dict['formats']
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
def is_wellformed(f):
|
||||
url = f.get('url')
|
||||
if not url:
|
||||
@@ -1575,7 +1572,10 @@ class YoutubeDL(object):
|
||||
return True
|
||||
|
||||
# Filter out malformed formats for better extraction robustness
|
||||
formats = list(filter(is_wellformed, formats))
|
||||
formats = list(filter(is_wellformed, formats or []))
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
formats_dict = {}
|
||||
|
||||
@@ -1769,10 +1769,9 @@ class YoutubeDL(object):
|
||||
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
if max_downloads is not None:
|
||||
if self._num_downloads >= int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
|
||||
if self._num_downloads >= max_downloads:
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
# TODO: backward compatibility, to be removed
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
@@ -1897,8 +1896,17 @@ class YoutubeDL(object):
|
||||
|
||||
if not self.params.get('skip_download', False):
|
||||
try:
|
||||
def checked_get_suitable_downloader(info_dict, params):
|
||||
ed_args = params.get('external_downloader_args')
|
||||
dler = get_suitable_downloader(info_dict, params)
|
||||
if ed_args and not params.get('external_downloader_args'):
|
||||
# external_downloader_args was cleared because external_downloader was rejected
|
||||
self.report_warning('Requested external downloader cannot be used: '
|
||||
'ignoring --external-downloader-args.')
|
||||
return dler
|
||||
|
||||
def dl(name, info):
|
||||
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
||||
fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
if self.params.get('verbose'):
|
||||
@@ -2040,9 +2048,12 @@ class YoutubeDL(object):
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError) as err:
|
||||
self.report_error('postprocessing: %s' % str(err))
|
||||
self.report_error('postprocessing: %s' % error_to_compat_str(err))
|
||||
return
|
||||
self.record_download_archive(info_dict)
|
||||
# avoid possible nugatory search for further items (PR #26638)
|
||||
if self._num_downloads >= max_downloads:
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
@@ -2305,7 +2316,7 @@ class YoutubeDL(object):
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
out, err = sp.communicate()
|
||||
out, err = process_communicate_or_kill(sp)
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
||||
|
@@ -8,6 +8,18 @@ from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
def pkcs7_padding(data):
|
||||
"""
|
||||
PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@returns {int[]} padding data
|
||||
"""
|
||||
|
||||
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
|
||||
return data + [remaining_length] * remaining_length
|
||||
|
||||
|
||||
def aes_ctr_decrypt(data, key, counter):
|
||||
"""
|
||||
Decrypt with aes in counter mode
|
||||
@@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv):
|
||||
previous_cipher_block = iv
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
remaining_length = BLOCK_SIZE_BYTES - len(block)
|
||||
block += [remaining_length] * remaining_length
|
||||
block = pkcs7_padding(block)
|
||||
mixed_block = xor(block, previous_cipher_block)
|
||||
|
||||
encrypted_block = aes_encrypt(mixed_block, expanded_key)
|
||||
@@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv):
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def aes_ecb_encrypt(data, key):
|
||||
"""
|
||||
Encrypt with aes in ECB mode. Using PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@param {int[]} key 16/24/32-Byte cipher key
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
encrypted_data = []
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
block = pkcs7_padding(block)
|
||||
|
||||
encrypted_block = aes_encrypt(block, expanded_key)
|
||||
encrypted_data += encrypted_block
|
||||
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def key_expansion(data):
|
||||
"""
|
||||
Generate key schedule
|
||||
@@ -303,7 +336,7 @@ def xor(data1, data2):
|
||||
|
||||
|
||||
def rijndael_mul(a, b):
|
||||
if(a == 0 or b == 0):
|
||||
if (a == 0 or b == 0):
|
||||
return 0
|
||||
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
|
||||
|
||||
|
@@ -10,12 +10,21 @@ import traceback
|
||||
|
||||
from .compat import compat_getenv
|
||||
from .utils import (
|
||||
error_to_compat_str,
|
||||
expand_path,
|
||||
is_outdated_version,
|
||||
try_get,
|
||||
write_json_file,
|
||||
)
|
||||
from .version import __version__
|
||||
|
||||
|
||||
class Cache(object):
|
||||
|
||||
_YTDL_DIR = 'youtube-dl'
|
||||
_VERSION_KEY = _YTDL_DIR + '_version'
|
||||
_DEFAULT_VERSION = '2021.12.17'
|
||||
|
||||
def __init__(self, ydl):
|
||||
self._ydl = ydl
|
||||
|
||||
@@ -23,7 +32,7 @@ class Cache(object):
|
||||
res = self._ydl.params.get('cachedir')
|
||||
if res is None:
|
||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||
res = os.path.join(cache_root, 'youtube-dl')
|
||||
res = os.path.join(cache_root, self._YTDL_DIR)
|
||||
return expand_path(res)
|
||||
|
||||
def _get_cache_fn(self, section, key, dtype):
|
||||
@@ -50,13 +59,22 @@ class Cache(object):
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
write_json_file(data, fn)
|
||||
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._ydl.report_warning(
|
||||
'Writing cache to %r failed: %s' % (fn, tb))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None):
|
||||
def _validate(self, data, min_ver):
|
||||
version = try_get(data, lambda x: x[self._VERSION_KEY])
|
||||
if not version: # Backward compatibility
|
||||
data, version = {'data': data}, self._DEFAULT_VERSION
|
||||
if not is_outdated_version(version, min_ver or '0', assume_new=False):
|
||||
return data['data']
|
||||
self._ydl.to_screen(
|
||||
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None, min_ver=None):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
@@ -66,12 +84,12 @@ class Cache(object):
|
||||
try:
|
||||
try:
|
||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||
return json.load(cachef)
|
||||
return self._validate(json.load(cachef), min_ver)
|
||||
except ValueError:
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = str(oe)
|
||||
file_size = error_to_compat_str(oe)
|
||||
self._ydl.report_warning(
|
||||
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||
except IOError:
|
||||
|
1667
youtube_dl/casefold.py
Normal file
1667
youtube_dl/casefold.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -21,6 +21,23 @@ import subprocess
|
||||
import sys
|
||||
import xml.etree.ElementTree
|
||||
|
||||
# deal with critical unicode/str things first
|
||||
try:
|
||||
# Python 2
|
||||
compat_str, compat_basestring, compat_chr = (
|
||||
unicode, basestring, unichr
|
||||
)
|
||||
from .casefold import casefold as compat_casefold
|
||||
except NameError:
|
||||
compat_str, compat_basestring, compat_chr = (
|
||||
str, str, chr
|
||||
)
|
||||
compat_casefold = lambda s: s.casefold()
|
||||
|
||||
try:
|
||||
import collections.abc as compat_collections_abc
|
||||
except ImportError:
|
||||
import collections as compat_collections_abc
|
||||
|
||||
try:
|
||||
import urllib.request as compat_urllib_request
|
||||
@@ -73,6 +90,15 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import Cookie as compat_cookies
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||
def load(self, rawdata):
|
||||
if isinstance(rawdata, compat_str):
|
||||
rawdata = str(rawdata)
|
||||
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||
else:
|
||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
@@ -2360,11 +2386,6 @@ try:
|
||||
except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
@@ -2495,22 +2516,11 @@ except ImportError: # Python < 3.4
|
||||
|
||||
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
||||
|
||||
try:
|
||||
compat_basestring = basestring # Python 2
|
||||
except NameError:
|
||||
compat_basestring = str
|
||||
|
||||
try:
|
||||
compat_chr = unichr # Python 2
|
||||
except NameError:
|
||||
compat_chr = chr
|
||||
|
||||
try:
|
||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
|
||||
@@ -2877,6 +2887,7 @@ else:
|
||||
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
|
||||
|
||||
def compat_get_terminal_size(fallback=(80, 24)):
|
||||
from .utils import process_communicate_or_kill
|
||||
columns = compat_getenv('COLUMNS')
|
||||
if columns:
|
||||
columns = int(columns)
|
||||
@@ -2893,7 +2904,7 @@ else:
|
||||
sp = subprocess.Popen(
|
||||
['stty', 'size'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = sp.communicate()
|
||||
out, err = process_communicate_or_kill(sp)
|
||||
_lines, _columns = map(int, out.split())
|
||||
except Exception:
|
||||
_columns, _lines = _terminal_size(*fallback)
|
||||
@@ -2953,6 +2964,24 @@ else:
|
||||
compat_Struct = struct.Struct
|
||||
|
||||
|
||||
# compat_map/filter() returning an iterator, supposedly the
|
||||
# same versioning as for zip below
|
||||
try:
|
||||
from future_builtins import map as compat_map
|
||||
except ImportError:
|
||||
try:
|
||||
from itertools import imap as compat_map
|
||||
except ImportError:
|
||||
compat_map = map
|
||||
|
||||
try:
|
||||
from future_builtins import filter as compat_filter
|
||||
except ImportError:
|
||||
try:
|
||||
from itertools import ifilter as compat_filter
|
||||
except ImportError:
|
||||
compat_filter = filter
|
||||
|
||||
try:
|
||||
from future_builtins import zip as compat_zip
|
||||
except ImportError: # not 2.6+ or is 3.x
|
||||
@@ -2962,6 +2991,82 @@ except ImportError: # not 2.6+ or is 3.x
|
||||
compat_zip = zip
|
||||
|
||||
|
||||
# method renamed between Py2/3
|
||||
try:
|
||||
from itertools import zip_longest as compat_itertools_zip_longest
|
||||
except ImportError:
|
||||
from itertools import izip_longest as compat_itertools_zip_longest
|
||||
|
||||
|
||||
# new class in collections
|
||||
try:
|
||||
from collections import ChainMap as compat_collections_chain_map
|
||||
# Py3.3's ChainMap is deficient
|
||||
if sys.version_info < (3, 4):
|
||||
raise ImportError
|
||||
except ImportError:
|
||||
# Py <= 3.3
|
||||
class compat_collections_chain_map(compat_collections_abc.MutableMapping):
|
||||
|
||||
maps = [{}]
|
||||
|
||||
def __init__(self, *maps):
|
||||
self.maps = list(maps) or [{}]
|
||||
|
||||
def __getitem__(self, k):
|
||||
for m in self.maps:
|
||||
if k in m:
|
||||
return m[k]
|
||||
raise KeyError(k)
|
||||
|
||||
def __setitem__(self, k, v):
|
||||
self.maps[0].__setitem__(k, v)
|
||||
return
|
||||
|
||||
def __contains__(self, k):
|
||||
return any((k in m) for m in self.maps)
|
||||
|
||||
def __delitem(self, k):
|
||||
if k in self.maps[0]:
|
||||
del self.maps[0][k]
|
||||
return
|
||||
raise KeyError(k)
|
||||
|
||||
def __delitem__(self, k):
|
||||
self.__delitem(k)
|
||||
|
||||
def __iter__(self):
|
||||
return itertools.chain(*reversed(self.maps))
|
||||
|
||||
def __len__(self):
|
||||
return len(iter(self))
|
||||
|
||||
# to match Py3, don't del directly
|
||||
def pop(self, k, *args):
|
||||
if self.__contains__(k):
|
||||
off = self.__getitem__(k)
|
||||
self.__delitem(k)
|
||||
return off
|
||||
elif len(args) > 0:
|
||||
return args[0]
|
||||
raise KeyError(k)
|
||||
|
||||
def new_child(self, m=None, **kwargs):
|
||||
m = m or {}
|
||||
m.update(kwargs)
|
||||
return compat_collections_chain_map(m, *self.maps)
|
||||
|
||||
@property
|
||||
def parents(self):
|
||||
return compat_collections_chain_map(*(self.maps[1:]))
|
||||
|
||||
|
||||
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
|
||||
compat_re_Pattern = type(re.compile(''))
|
||||
# and on the type of a match
|
||||
compat_re_Match = type(re.match('a', 'a'))
|
||||
|
||||
|
||||
if sys.version_info < (3, 3):
|
||||
def compat_b64decode(s, *args, **kwargs):
|
||||
if isinstance(s, compat_str):
|
||||
@@ -2996,15 +3101,20 @@ __all__ = [
|
||||
'compat_Struct',
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_casefold',
|
||||
'compat_chr',
|
||||
'compat_collections_abc',
|
||||
'compat_collections_chain_map',
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
'compat_cookies_SimpleCookie',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_Element',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
'compat_filter',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
'compat_getpass',
|
||||
@@ -3015,12 +3125,16 @@ __all__ = [
|
||||
'compat_input',
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_itertools_zip_longest',
|
||||
'compat_kwargs',
|
||||
'compat_map',
|
||||
'compat_numeric_types',
|
||||
'compat_ord',
|
||||
'compat_os_name',
|
||||
'compat_parse_qs',
|
||||
'compat_print',
|
||||
'compat_re_Match',
|
||||
'compat_re_Pattern',
|
||||
'compat_realpath',
|
||||
'compat_setenv',
|
||||
'compat_shlex_quote',
|
||||
|
@@ -1,22 +1,31 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
)
|
||||
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}):
|
||||
info_dict['protocol'] = determine_protocol(info_dict)
|
||||
info_copy = info_dict.copy()
|
||||
return _get_suitable_downloader(info_copy, params)
|
||||
|
||||
|
||||
# Some of these require get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .dash import DashSegmentsFD
|
||||
from .f4m import F4mFD
|
||||
from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .rtmp import RtmpFD
|
||||
from .dash import DashSegmentsFD
|
||||
from .rtsp import RtspFD
|
||||
from .ism import IsmFD
|
||||
from .niconico import NiconicoDmcFD
|
||||
from .external import (
|
||||
get_external_downloader,
|
||||
FFmpegFD,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
)
|
||||
|
||||
PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
'm3u8_native': HlsFD,
|
||||
@@ -26,13 +35,12 @@ PROTOCOL_MAP = {
|
||||
'f4m': F4mFD,
|
||||
'http_dash_segments': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
}
|
||||
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}):
|
||||
def _get_suitable_downloader(info_dict, params={}):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
protocol = determine_protocol(info_dict)
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||
# return FFmpegFD
|
||||
@@ -42,7 +50,11 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.can_download(info_dict):
|
||||
return ed
|
||||
# Avoid using unwanted args since external_downloader was rejected
|
||||
if params.get('external_downloader_args'):
|
||||
params['external_downloader_args'] = None
|
||||
|
||||
protocol = info_dict['protocol']
|
||||
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||
return FFmpegFD
|
||||
|
||||
|
@@ -22,6 +22,7 @@ from ..utils import (
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
is_outdated_version,
|
||||
process_communicate_or_kill,
|
||||
)
|
||||
|
||||
|
||||
@@ -104,7 +105,7 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
p = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
_, stderr = process_communicate_or_kill(p)
|
||||
if p.returncode != 0:
|
||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||
return p.returncode
|
||||
@@ -141,7 +142,7 @@ class CurlFD(ExternalFD):
|
||||
|
||||
# curl writes the progress to stderr so don't capture it.
|
||||
p = subprocess.Popen(cmd)
|
||||
p.communicate()
|
||||
process_communicate_or_kill(p)
|
||||
return p.returncode
|
||||
|
||||
|
||||
@@ -336,14 +337,17 @@ class FFmpegFD(ExternalFD):
|
||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
|
||||
try:
|
||||
retval = proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||
except BaseException as e:
|
||||
# subprocess.run would send the SIGKILL signal to ffmpeg and the
|
||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
|
||||
if sys.platform != 'win32':
|
||||
proc.communicate(b'q')
|
||||
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
|
||||
process_communicate_or_kill(proc, b'q')
|
||||
else:
|
||||
proc.kill()
|
||||
proc.wait()
|
||||
raise
|
||||
return retval
|
||||
|
||||
|
66
youtube_dl/downloader/niconico.py
Normal file
66
youtube_dl/downloader/niconico.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
try:
|
||||
import threading
|
||||
except ImportError:
|
||||
threading = None
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..downloader import get_suitable_downloader
|
||||
from ..extractor.niconico import NiconicoIE
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
""" Downloading niconico douga from DMC with heartbeat """
|
||||
|
||||
FD_NAME = 'niconico_dmc'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
||||
|
||||
ie = NiconicoIE(self.ydl)
|
||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||
|
||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
|
||||
if not threading:
|
||||
self.to_screen('[%s] Threading for Heartbeat not available' % self.FD_NAME)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
success = download_complete = False
|
||||
timer = [None]
|
||||
heartbeat_lock = threading.Lock()
|
||||
heartbeat_url = heartbeat_info_dict['url']
|
||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = sanitized_Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
self.ydl.urlopen(request).read()
|
||||
except Exception:
|
||||
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
||||
|
||||
with heartbeat_lock:
|
||||
if not download_complete:
|
||||
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||
timer[0].start()
|
||||
|
||||
heartbeat_info_dict['ping']()
|
||||
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||
try:
|
||||
heartbeat()
|
||||
if type(fd).__name__ == 'HlsFD':
|
||||
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||
success = fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
if heartbeat_lock:
|
||||
with heartbeat_lock:
|
||||
timer[0].cancel()
|
||||
download_complete = True
|
||||
return success
|
@@ -89,11 +89,13 @@ class RtmpFD(FileDownloader):
|
||||
self.to_screen('')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen('[rtmpdump] ' + line)
|
||||
finally:
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
return proc.wait()
|
||||
except BaseException: # Including KeyboardInterrupt
|
||||
proc.kill()
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
return proc.returncode
|
||||
raise
|
||||
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url')
|
||||
|
@@ -31,30 +31,34 @@ from ..utils import (
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
IE_DESC = 'Anime Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'id': '9841',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
'series': 'Blue Exorcist - Kyôto Saga',
|
||||
'duration': 1467,
|
||||
'release_date': '20170106',
|
||||
'title': 'Fruits Basket - Episode 1',
|
||||
'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
|
||||
'series': 'Fruits Basket',
|
||||
'duration': 1437,
|
||||
'release_date': '20190405',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'season_number': 2,
|
||||
'episode': 'Début des hostilités',
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
@@ -82,14 +86,14 @@ class ADNIE(InfoExtractor):
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
subtitle_location, video_id, 'Downloading subtitles data',
|
||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
fatal=False, headers={'Origin': 'https://' + self._BASE})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + '7fac1178830cfe0c')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
@@ -138,9 +142,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if not username:
|
||||
return
|
||||
try:
|
||||
url = self._API_BASE_URL + 'authentication/login'
|
||||
access_token = (self._download_json(
|
||||
self._API_BASE_URL + 'authentication/login', None,
|
||||
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||
url, None, 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'password': password,
|
||||
'rememberMe': False,
|
||||
@@ -153,7 +157,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
self._webpage_read_content(e.cause, url, username),
|
||||
username, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
@@ -211,7 +216,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
error = self._parse_json(
|
||||
self._webpage_read_content(e.cause, links_url, video_id),
|
||||
video_id, fatal=False) or {}
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
@@ -20,8 +20,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/'''
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
_THEPLATFORM_KEY = '43jXaGRQud'
|
||||
_THEPLATFORM_SECRET = 'S10BPXHMlb'
|
||||
_DOMAIN_MAP = {
|
||||
'history.com': ('HISTORY', 'history'),
|
||||
'aetv.com': ('AETV', 'aetv'),
|
||||
|
@@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||
'id': '2800002704436634',
|
||||
'ext': 'mp4',
|
||||
'title': 'CASIMA7.22',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'uploader': 'CASIMA Official Store',
|
||||
'timestamp': 1500717600,
|
||||
'upload_date': '20170722',
|
||||
|
89
youtube_dl/extractor/alsace20tv.py
Normal file
89
youtube_dl/extractor/alsace20tv.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Alsace20TVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
|
||||
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'duration': 1073,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||
video_id) or {}
|
||||
title = info['titre']
|
||||
|
||||
formats = []
|
||||
for res, fmt_url in (info.get('files') or {}).items():
|
||||
formats.extend(
|
||||
self._extract_smil_formats(fmt_url, video_id, fatal=False)
|
||||
if '/smil:_' in fmt_url
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
|
||||
'view_count': int_or_none(info.get('nb_vues')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id, url)
|
||||
|
||||
|
||||
class Alsace20TVEmbedIE(Alsace20TVIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
|
||||
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id)
|
@@ -9,10 +9,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': 'e7c38568a01ea45402570e6029206723',
|
||||
'md5': 'c1d41f72c8bcaf222e089434619316e4',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'ext': 'm4v',
|
||||
@@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
|
||||
|
||||
video_data = self._parse_json(video_json, video_id)
|
||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -3,8 +3,11 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_codecs,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
@@ -14,16 +17,17 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593921600,
|
||||
'duration': 6425,
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
@@ -39,19 +43,40 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id)
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode = ember_data['data']['attributes']
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (ember_data.get('included') or []):
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
|
||||
return {
|
||||
info = [{
|
||||
'id': episode_id,
|
||||
'title': episode['name'],
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
@@ -59,4 +84,10 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
}
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}]
|
||||
self._sort_formats(info)
|
||||
info = info[0]
|
||||
codecs = parse_codecs(info.get('ext', 'mp3'))
|
||||
info.update(codecs)
|
||||
return info
|
||||
|
@@ -249,14 +249,14 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 7.01.2022
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||
'info_dict': {
|
||||
'display_id': 'maischberger-die-woche',
|
||||
'id': '100',
|
||||
'id': 'maischberger-die-woche-video100',
|
||||
'display_id': 'maischberger-die-woche-video100',
|
||||
'ext': 'mp4',
|
||||
'duration': 3687.0,
|
||||
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||
@@ -264,16 +264,25 @@ class ARDIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
|
||||
'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('id')
|
||||
|
||||
player_url = mobj.group('mainurl') + '~playerXml.xml'
|
||||
doc = self._download_xml(player_url, display_id)
|
||||
@@ -323,9 +332,24 @@ class ARDIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
_SUB_FORMATS = (
|
||||
('./dataTimedText', 'ttml'),
|
||||
('./dataTimedTextNoOffset', 'ttml'),
|
||||
('./dataTimedTextVtt', 'vtt'),
|
||||
)
|
||||
|
||||
subtitles = {}
|
||||
for subsel, subext in _SUB_FORMATS:
|
||||
for node in video_node.findall(subsel):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': node.attrib['url'],
|
||||
'ext': subext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': mobj.group('id'),
|
||||
'id': xpath_text(video_node, './videoId', default=display_id),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'display_id': display_id,
|
||||
'title': video_node.find('./title').text,
|
||||
'duration': parse_duration(video_node.find('./duration').text),
|
||||
@@ -335,7 +359,7 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||
@@ -365,22 +389,22 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
if display_id:
|
||||
display_id = display_id.rstrip('/')
|
||||
if not display_id:
|
||||
display_id = video_id
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
display_id, data=json.dumps({
|
||||
video_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client:"%s", clipId: "%s") {
|
||||
playerPage(client: "ard", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
@@ -410,7 +434,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (mobj.group('client'), video_id),
|
||||
}''' % video_id,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
@@ -435,7 +459,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
|
101
youtube_dl/extractor/arnes.py
Normal file
101
youtube_dl/extractor/arnes.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class ArnesIE(InfoExtractor):
|
||||
IE_NAME = 'video.arnes.si'
|
||||
IE_DESC = 'Arnes Video'
|
||||
_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
|
||||
'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
|
||||
'info_dict': {
|
||||
'id': 'a1qrWTOQfVoU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Linearna neodvisnost, definicija',
|
||||
'description': 'Linearna neodvisnost, definicija',
|
||||
'license': 'PRIVATE',
|
||||
'creator': 'Polona Oblak',
|
||||
'timestamp': 1585063725,
|
||||
'upload_date': '20200324',
|
||||
'channel': 'Polona Oblak',
|
||||
'channel_id': 'q6pc04hw24cj',
|
||||
'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
|
||||
'duration': 596.75,
|
||||
'view_count': int,
|
||||
'tags': ['linearna_algebra'],
|
||||
'start_time': 10,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_BASE_URL = 'https://video.arnes.si'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
for media in (video.get('media') or []):
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._BASE_URL + media_url,
|
||||
'format_id': remove_start(media.get('format'), 'FORMAT_'),
|
||||
'format_note': media.get('formatTranslation'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
channel = video.get('channel') or {}
|
||||
channel_id = channel.get('url')
|
||||
thumbnail = video.get('thumbnailUrl')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': self._BASE_URL + thumbnail,
|
||||
'description': video.get('description'),
|
||||
'license': video.get('license'),
|
||||
'creator': video.get('author'),
|
||||
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
'start_time': int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||
}
|
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
@@ -252,3 +253,49 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
|
||||
class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
||||
'info_dict': {
|
||||
'id': 'politics-and-society',
|
||||
'title': 'Politics and society',
|
||||
'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
||||
and super(ArteTVCategoryIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
items = []
|
||||
for video in re.finditer(
|
||||
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
||||
webpage):
|
||||
video = video.group('url')
|
||||
if video == url:
|
||||
continue
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||
items.append(video)
|
||||
|
||||
if items:
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
|
||||
title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
|
||||
|
||||
result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title)
|
||||
if result:
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
if description:
|
||||
result['description'] = description
|
||||
return result
|
||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AudiomackIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:song/|(?=.+/song/))(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack'
|
||||
_TESTS = [
|
||||
# hosted on audiomack
|
||||
@@ -29,25 +29,27 @@ class AudiomackIE(InfoExtractor):
|
||||
}
|
||||
},
|
||||
# audiomack wrapper around soundcloud song
|
||||
# Needs new test URL.
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||
'info_dict': {
|
||||
'id': '258901379',
|
||||
'ext': 'mp3',
|
||||
'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||
'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||
'uploader': 'ILOVEMAKONNEN',
|
||||
'upload_date': '20160414',
|
||||
}
|
||||
'only_matching': True,
|
||||
# 'info_dict': {
|
||||
# 'id': '258901379',
|
||||
# 'ext': 'mp3',
|
||||
# 'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||
# 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||
# 'uploader': 'ILOVEMAKONNEN',
|
||||
# 'upload_date': '20160414',
|
||||
# }
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# URLs end with [uploader name]/song/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
album_url_tag = self._match_id(url).replace('/song/', '/')
|
||||
|
||||
# Request the extended version of the api for extra fields like artist and title
|
||||
api_response = self._download_json(
|
||||
@@ -73,13 +75,13 @@ class AudiomackIE(InfoExtractor):
|
||||
|
||||
|
||||
class AudiomackAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:album/|(?=.+/album/))(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack:album'
|
||||
_TESTS = [
|
||||
# Standard album playlist
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
||||
'playlist_count': 15,
|
||||
'playlist_count': 11,
|
||||
'info_dict':
|
||||
{
|
||||
'id': '812251',
|
||||
@@ -95,24 +97,24 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
|
||||
'id': '837577',
|
||||
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
|
||||
'id': '837580',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
'playliststart': 9,
|
||||
'playlistend': 9,
|
||||
'playliststart': 2,
|
||||
'playlistend': 2,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# URLs end with [uploader name]/album/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
album_url_tag = self._match_id(url).replace('/album/', '/')
|
||||
result = {'_type': 'playlist', 'entries': []}
|
||||
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
||||
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
||||
@@ -134,7 +136,7 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
# Pull out the album metadata and add to result (if it exists)
|
||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||
if apikey in api_response and resultkey not in result:
|
||||
result[resultkey] = api_response[apikey]
|
||||
result[resultkey] = compat_str(api_response[apikey])
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': compat_str(api_response.get('id', song_id)),
|
||||
|
@@ -11,6 +11,8 @@ from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -25,8 +27,10 @@ from ..utils import (
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
@@ -36,7 +40,7 @@ from ..utils import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@@ -392,9 +396,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, programme_id, mpd_id=format_id, fatal=False))
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
# TODO: let expected_status be passed into _extract_xxx_formats() instead
|
||||
try:
|
||||
fmts = self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
@@ -761,23 +773,44 @@ class BBCIE(BBCCoUkIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# custom redirection to www.bbc.com
|
||||
# also, video with window.__INITIAL_DATA__
|
||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 'p02xzws1',
|
||||
'ext': 'mp4',
|
||||
'title': "Pluto may have 'nitrogen glaciers'",
|
||||
'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# bbcthreeConfig
|
||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||
'info_dict': {
|
||||
'id': 'p06556y7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
||||
'title': 'Things Not To Say to people that live on council estates',
|
||||
'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
|
||||
'duration': 360,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
@@ -1150,9 +1183,16 @@ class BBCIE(BBCCoUkIE):
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
initial_data = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
if initial_data is None:
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
||||
'preload state', default={})
|
||||
else:
|
||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
@@ -1164,19 +1204,39 @@ class BBCIE(BBCCoUkIE):
|
||||
continue
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
self._sort_formats(formats)
|
||||
item_desc = None
|
||||
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
|
||||
if blocks:
|
||||
summary = []
|
||||
for block in blocks:
|
||||
text = try_get(block, lambda x: x['model']['text'], compat_str)
|
||||
if text:
|
||||
summary.append(text)
|
||||
if summary:
|
||||
item_desc = '\n\n'.join(summary)
|
||||
item_time = None
|
||||
for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
|
||||
if try_get(meta, lambda x: x['label']) == 'Published':
|
||||
item_time = unified_timestamp(meta.get('timestamp'))
|
||||
break
|
||||
entries.append({
|
||||
'id': item_id,
|
||||
'title': item_title,
|
||||
'thumbnail': item.get('holdingImageUrl'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') != 'media':
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
|
59
youtube_dl/extractor/bigo.py
Normal file
59
youtube_dl/extractor/bigo.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, urlencode_postdata
|
||||
|
||||
|
||||
class BigoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigo\.tv/(?:[a-z]{2,}/)?(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bigo.tv/ja/221338632',
|
||||
'info_dict': {
|
||||
'id': '6576287577575737440',
|
||||
'title': '土よ〜💁♂️ 休憩室/REST room',
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': '✨Shin💫',
|
||||
'uploader_id': '221338632',
|
||||
'is_live': True,
|
||||
},
|
||||
'skip': 'livestream',
|
||||
}, {
|
||||
'url': 'https://www.bigo.tv/th/Tarlerm1304',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://bigo.tv/115976881',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
info_raw = self._download_json(
|
||||
'https://bigo.tv/studio/getInternalStudioInfo',
|
||||
user_id, data=urlencode_postdata({'siteId': user_id}))
|
||||
|
||||
if not isinstance(info_raw, dict):
|
||||
raise ExtractorError('Received invalid JSON data')
|
||||
if info_raw.get('code'):
|
||||
raise ExtractorError(
|
||||
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||
info = info_raw.get('data') or {}
|
||||
|
||||
if not info.get('alive'):
|
||||
raise ExtractorError('This user is offline.', expected=True)
|
||||
|
||||
return {
|
||||
'id': info.get('roomId') or user_id,
|
||||
'title': info.get('roomTopic') or info.get('nick_name') or user_id,
|
||||
'formats': [{
|
||||
'url': info.get('hls_src'),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
}],
|
||||
'thumbnail': info.get('snapshot'),
|
||||
'uploader': info.get('nick_name'),
|
||||
'uploader_id': user_id,
|
||||
'is_live': True,
|
||||
}
|
@@ -233,7 +233,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader': uploader_mobj.group('name').strip(),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
if not info.get('uploader'):
|
||||
@@ -369,6 +369,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
'filesize': int_or_none(play_data.get('size')),
|
||||
}]
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
song = self._call_api('song/info', au_id)
|
||||
title = song['title']
|
||||
statistic = song.get('statistic') or {}
|
||||
|
@@ -1,86 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BlinkxIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
IE_NAME = 'blinkx'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||
'info_dict': {
|
||||
'id': 'Da0Gw3xc',
|
||||
'ext': 'mp4',
|
||||
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||
'uploader': 'IGN News',
|
||||
'upload_date': '20150217',
|
||||
'timestamp': 1424215740,
|
||||
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||
'duration': 47.743333,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id[:8]
|
||||
|
||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
|
||||
+ 'video=%s' % video_id)
|
||||
data_json = self._download_webpage(api_url, display_id)
|
||||
data = json.loads(data_json)['api']['results'][0]
|
||||
duration = None
|
||||
thumbnails = []
|
||||
formats = []
|
||||
for m in data['media']:
|
||||
if m['type'] == 'jpg':
|
||||
thumbnails.append({
|
||||
'url': m['link'],
|
||||
'width': int(m['w']),
|
||||
'height': int(m['h']),
|
||||
})
|
||||
elif m['type'] == 'original':
|
||||
duration = float(m['d'])
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
||||
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||
tbr = vbr + abr if vbr and abr else None
|
||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': m['link'],
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(m.get('w')),
|
||||
'height': int_or_none(m.get('h')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'fullid': video_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'uploader': data['channel_name'],
|
||||
'timestamp': data['pubdate_epoch'],
|
||||
'description': data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
}
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -12,13 +13,28 @@ from ..utils import (
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.bongacams.net/claireashton',
|
||||
'info_dict': {
|
||||
'id': 'claireashton',
|
||||
'ext': 'mp4',
|
||||
'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'age_limit': 18,
|
||||
'uploader_id': 'ClaireAshton',
|
||||
'uploader': 'ClaireAshton',
|
||||
'like_count': int,
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE):
|
||||
def _real_extract(self, url):
|
||||
item = self._parse_json(zlib.decompress(compat_b64decode(
|
||||
compat_urllib_parse_unquote(self._match_id(url))),
|
||||
-zlib.MAX_WBITS), None)['video']['items'][0]
|
||||
-zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
|
||||
|
||||
|
@@ -1,38 +1,113 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .cbs import CBSBaseIE
|
||||
import re
|
||||
|
||||
# from .cbs import CBSBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class CBSSportsIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
|
||||
|
||||
# class CBSSportsEmbedIE(CBSBaseIE):
|
||||
class CBSSportsEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'cbssports:embed'
|
||||
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
|
||||
(?:
|
||||
ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
|
||||
pcid%3D(?P<pcid>\d+)
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
|
||||
'info_dict': {
|
||||
'id': '1214315075735',
|
||||
'ext': 'mp4',
|
||||
'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
|
||||
'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
|
||||
'timestamp': 1524111457,
|
||||
'upload_date': '20180419',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
|
||||
'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
# def _extract_video_info(self, filter_query, video_id):
|
||||
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
uuid, pcid = re.match(self._VALID_URL, url).groups()
|
||||
query = {'id': uuid} if uuid else {'pcid': pcid}
|
||||
video = self._download_json(
|
||||
'https://www.cbssports.com/api/content/video/',
|
||||
uuid or pcid, query=query)[0]
|
||||
video_id = video['id']
|
||||
title = video['title']
|
||||
metadata = video.get('metaData') or {}
|
||||
# return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
|
||||
# return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
metadata['files'][0]['url'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
|
||||
image = video.get('image')
|
||||
thumbnails = None
|
||||
if image:
|
||||
image_path = image.get('path')
|
||||
if image_path:
|
||||
thumbnails = [{
|
||||
'url': image_path,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
'filesize': int_or_none(image.get('size')),
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': video.get('description'),
|
||||
'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
class CBSSportsBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
|
||||
webpage, 'video id')
|
||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
|
||||
webpage, 'embed url')
|
||||
return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
|
||||
|
||||
|
||||
class CBSSportsIE(CBSSportsBaseIE):
|
||||
IE_NAME = 'cbssports'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
|
||||
'info_dict': {
|
||||
'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cover 3: Stanford Spring Gleaning',
|
||||
'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
|
||||
'timestamp': 1617218398,
|
||||
'upload_date': '20210331',
|
||||
'duration': 502,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
|
||||
IE_NAME = '247sports'
|
||||
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
|
||||
'info_dict': {
|
||||
'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
|
||||
'ext': 'mp4',
|
||||
'title': '2021 QB Jake Garcia senior highlights through five games',
|
||||
'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
|
||||
'timestamp': 1607114223,
|
||||
'upload_date': '20201204',
|
||||
'duration': 208,
|
||||
},
|
||||
}]
|
||||
|
@@ -133,6 +133,8 @@ class CDAIE(InfoExtractor):
|
||||
'age_limit': 18 if need_confirm_age else 0,
|
||||
}
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
# Source: https://www.cda.pl/js/player.js?t=1606154898
|
||||
def decrypt_file(a):
|
||||
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
|
||||
@@ -197,7 +199,7 @@ class CDAIE(InfoExtractor):
|
||||
handler = self._download_webpage
|
||||
|
||||
webpage = handler(
|
||||
self._BASE_URL + href, video_id,
|
||||
urljoin(self._BASE_URL, href), video_id,
|
||||
'Downloading %s version information' % resolution, fatal=False)
|
||||
if not webpage:
|
||||
# Manually report warning because empty page is returned when
|
||||
@@ -209,6 +211,4 @@ class CDAIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
@@ -12,35 +12,21 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
'id': '61924494877246241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace: Život v Grónsku',
|
||||
'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 3350,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
|
||||
'info_dict': {
|
||||
'id': '61924494877028507',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace: Bonus 01 - En',
|
||||
'title': 'Bonus 01 - En - Hyde Park Civilizace',
|
||||
'description': 'English Subtittles',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 81.3,
|
||||
@@ -51,31 +37,111 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'url': 'http://www.ceskatelevize.cz/zive/ct1/',
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r'ČT1 - živé vysílání online',
|
||||
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
# another
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
# 'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video with 18+ caution trailer
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Bogotart - Queer',
|
||||
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494877311053',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bogotart - Queer (Varování 18+)',
|
||||
'duration': 11.9,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bogotart - Queer (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', **kw),
|
||||
video_id, **kw)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
if playlist_description:
|
||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
type_ = 'IDEC'
|
||||
if re.search(r'(^/porady|/zive)/', parsed_url.path):
|
||||
next_data = self._search_nextjs_data(webpage, playlist_id)
|
||||
if '/zive/' in parsed_url.path:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
|
||||
else:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
||||
if not idec:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
|
||||
if idec:
|
||||
type_ = 'bonus'
|
||||
if not idec:
|
||||
raise ExtractorError('Failed to find IDEC id')
|
||||
iframe_hash = self._download_webpage(
|
||||
'https://www.ceskatelevize.cz/v-api/iframe-hash/',
|
||||
playlist_id, note='Getting IFRAME hash')
|
||||
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
|
||||
webpage = self._download_webpage(
|
||||
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
|
||||
playlist_id, note='Downloading player', query=query)
|
||||
|
||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
self.raise_geo_restricted(NOT_AVAILABLE_STRING)
|
||||
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
|
||||
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
|
||||
|
||||
type_ = None
|
||||
episode_id = None
|
||||
@@ -100,7 +166,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
data = {
|
||||
'playlist[0][type]': type_,
|
||||
'playlist[0][id]': episode_id,
|
||||
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||
'requestUrl': parsed_url.path,
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
@@ -108,7 +174,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
@@ -130,9 +196,6 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
@@ -167,7 +230,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
entries[num]['formats'].extend(formats)
|
||||
continue
|
||||
|
||||
item_id = item.get('id') or item['assetId']
|
||||
item_id = str_or_none(item.get('id') or item['assetId'])
|
||||
title = item['title']
|
||||
|
||||
duration = float_or_none(item.get('duration'))
|
||||
@@ -181,8 +244,6 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
if playlist_len == 1:
|
||||
final_title = playlist_title or title
|
||||
if is_live:
|
||||
final_title = self._live_title(final_title)
|
||||
else:
|
||||
final_title = '%s (%s)' % (playlist_title, title)
|
||||
|
||||
@@ -200,6 +261,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
for e in entries:
|
||||
self._sort_formats(e['formats'])
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def _get_subtitles(self, episode_id, subs):
|
||||
@@ -236,54 +299,3 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
yield line
|
||||
|
||||
return '\r\n'.join(_fix_subtitle(subtitles))
|
||||
|
||||
|
||||
class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
# video with 18+ caution trailer
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Queer: Bogotart',
|
||||
'description': 'Alternativní průvodce současným queer světem',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494876844842',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Varování 18+)',
|
||||
'duration': 10.2,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = update_url_query(unescapeHTML(self._search_regex(
|
||||
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
||||
webpage, 'iframe player url', group='url')), query={
|
||||
'autoStart': 'true',
|
||||
})
|
||||
|
||||
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||
|
@@ -17,7 +17,7 @@ import math
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar_Cookie,
|
||||
compat_cookies,
|
||||
compat_cookies_SimpleCookie,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
@@ -70,6 +70,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -1275,6 +1276,7 @@ class InfoExtractor(object):
|
||||
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
author = e.get('author')
|
||||
info.update({
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
@@ -1282,7 +1284,11 @@ class InfoExtractor(object):
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'uploader': str_or_none(e.get('author')),
|
||||
# author can be an instance of 'Organization' or 'Person' types.
|
||||
# both types can have 'name' property(inherited from 'Thing' type). [1]
|
||||
# however some websites are using 'Text' type instead.
|
||||
# 1. https://schema.org/VideoObject
|
||||
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
@@ -2708,7 +2714,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
|
||||
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
|
||||
webpage)
|
||||
if mobj:
|
||||
try:
|
||||
@@ -2729,9 +2735,14 @@ class InfoExtractor(object):
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
flat_pl = try_get(jwplayer_data, lambda x: x.get('playlist') or True)
|
||||
if flat_pl is None:
|
||||
# not even a dict
|
||||
return []
|
||||
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
if flat_pl is True:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
@@ -2779,6 +2790,13 @@ class InfoExtractor(object):
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ...
|
||||
'genre': clean_html(video_data.get('genre')),
|
||||
'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'release_year': int_or_none(video_data.get('releasedate')),
|
||||
'age_limit': int_or_none(video_data.get('age_restriction')),
|
||||
}
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
|
||||
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
|
||||
@@ -2787,7 +2805,9 @@ class InfoExtractor(object):
|
||||
'url': formats[0]['url'],
|
||||
})
|
||||
else:
|
||||
self._sort_formats(formats)
|
||||
# avoid exception in case of only sttls
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
entry['formats'] = formats
|
||||
entries.append(entry)
|
||||
if len(entries) == 1:
|
||||
@@ -2797,7 +2817,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
urls = []
|
||||
urls = set()
|
||||
formats = []
|
||||
for source in jwplayer_sources_data:
|
||||
if not isinstance(source, dict):
|
||||
@@ -2806,14 +2826,14 @@ class InfoExtractor(object):
|
||||
base_url, self._proto_relative_url(source.get('file')))
|
||||
if not source_url or source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=m3u8_id, fatal=False))
|
||||
elif source_type == 'dash' or ext == 'mpd':
|
||||
elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||
elif ext == 'smil':
|
||||
@@ -2828,20 +2848,23 @@ class InfoExtractor(object):
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
format_id = str_or_none(source.get('label'))
|
||||
height = int_or_none(source.get('height'))
|
||||
if height is None:
|
||||
if height is None and format_id:
|
||||
# Often no height is provided but there is a label in
|
||||
# format like "1080p", "720p SD", or 1080.
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
|
||||
'height', default=None))
|
||||
height = parse_resolution(format_id).get('height')
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'tbr': int_or_none(source.get('bitrate')),
|
||||
'tbr': int_or_none(source.get('bitrate'), scale=1000),
|
||||
'filesize': int_or_none(source.get('filesize')),
|
||||
'ext': ext,
|
||||
}
|
||||
if format_id:
|
||||
a_format['format_id'] = format_id
|
||||
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv'
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
@@ -2896,10 +2919,10 @@ class InfoExtractor(object):
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
||||
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||
req = sanitized_Request(url)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
|
148
youtube_dl/extractor/cpac.py
Normal file
148
youtube_dl/extractor/cpac.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
# compat_range
|
||||
try:
|
||||
if callable(xrange):
|
||||
range = xrange
|
||||
except (NameError, TypeError):
|
||||
pass
|
||||
|
||||
|
||||
class CPACIE(InfoExtractor):
|
||||
IE_NAME = 'cpac'
|
||||
_VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
|
||||
_TEST = {
|
||||
# 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
|
||||
'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||
'md5': 'e46ad699caafd7aa6024279f2614e8fa',
|
||||
'info_dict': {
|
||||
'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220215',
|
||||
'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
|
||||
'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
|
||||
'timestamp': 1644901200,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url_lang = 'fr' if '/l-episode?' in url else 'en'
|
||||
|
||||
content = self._download_json(
|
||||
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
|
||||
video_id)
|
||||
video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str)
|
||||
formats = []
|
||||
if video_url:
|
||||
content = content['page']
|
||||
title = str_or_none(content['details']['title_%s_t' % (url_lang, )])
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
|
||||
for fmt in formats:
|
||||
# prefer language to match URL
|
||||
fmt_lang = fmt.get('language')
|
||||
if fmt_lang == url_lang:
|
||||
fmt['language_preference'] = 10
|
||||
elif not fmt_lang:
|
||||
fmt['language_preference'] = -1
|
||||
else:
|
||||
fmt['language_preference'] = -10
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
|
||||
|
||||
def is_live(v_type):
|
||||
return (v_type == 'live') if v_type is not None else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
|
||||
'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
|
||||
'category': [category] if category else None,
|
||||
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
|
||||
'is_live': is_live(content['details'].get('type')),
|
||||
}
|
||||
|
||||
|
||||
class CPACPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'cpac:playlist'
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cpac.ca/program?id=6',
|
||||
'info_dict': {
|
||||
'id': 'id=6',
|
||||
'title': 'Headline Politics',
|
||||
'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
|
||||
'info_dict': {
|
||||
'id': 'key=hudson',
|
||||
'title': 'hudson',
|
||||
},
|
||||
'playlist_count': 22,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/search?programId=50',
|
||||
'info_dict': {
|
||||
'id': 'programId=50',
|
||||
'title': '50',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/emission?id=6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
|
||||
pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
|
||||
api_url = (
|
||||
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s'
|
||||
% (pl_type, video_id, ))
|
||||
content = self._download_json(api_url, video_id)
|
||||
entries = []
|
||||
total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
|
||||
for page in range(1, total_pages + 1):
|
||||
if page > 1:
|
||||
api_url = update_url_query(api_url, {'page': '%d' % (page, ), })
|
||||
content = self._download_json(
|
||||
api_url, video_id,
|
||||
note='Downloading continuation - %d' % (page, ),
|
||||
fatal=False)
|
||||
|
||||
for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
|
||||
episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )]))
|
||||
if episode_url:
|
||||
entries.append(episode_url)
|
||||
|
||||
return self.playlist_result(
|
||||
(self.url_result(entry) for entry in entries),
|
||||
playlist_id=video_id,
|
||||
playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1],
|
||||
playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]),
|
||||
)
|
@@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
def _call_api(self, path, video_id, query=None):
|
||||
headers = {}
|
||||
if self._auth_token:
|
||||
headers['X-Auth-Token'] = self._auth_token
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + path, video_id, headers=headers)
|
||||
self._API_BASE_URL + path, video_id, headers=headers, query=query)
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
@@ -52,62 +52,75 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': '262bb2f257ff301115f1973540de8983',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for encoding in media.get('encodings', []):
|
||||
m3u8_url = encoding.get('master_playlist_url')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
for encoding_format in ('m3u8', 'mpd'):
|
||||
media = self._call_api('media/' + video_id, video_id, query={
|
||||
'encodingsNew': 'true',
|
||||
'encodingsFormat': encoding_format,
|
||||
})
|
||||
for encoding in media.get('encodings', []):
|
||||
playlist_url = encoding.get('master_playlist_url')
|
||||
if encoding_format == 'm3u8':
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
playlist_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif encoding_format == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
playlist_url, video_id, mpd_id='dash', fatal=False))
|
||||
encoding_url = encoding.get('url')
|
||||
file_url = encoding.get('file_url')
|
||||
if not encoding_url and not file_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
f = {
|
||||
'width': int_or_none(encoding.get('width')),
|
||||
'height': int_or_none(encoding.get('height')),
|
||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||
'vcodec': encoding.get('video_codec'),
|
||||
'acodec': encoding.get('audio_codec'),
|
||||
'container': encoding.get('container_type'),
|
||||
}
|
||||
for f_url in (encoding_url, file_url):
|
||||
if not f_url:
|
||||
continue
|
||||
fmt = f.copy()
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||
if rtmp:
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': f_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = media['title']
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
@@ -132,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
@@ -140,10 +153,13 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/series/2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://curiositystream.com/collections/36',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||
# From http://www.gdcvault.com/play/1013700/Advanced-Material
|
||||
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# From https://gdcvault.com/play/1016624, empty speakerVideo
|
||||
'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
|
||||
'info_dict': {
|
||||
'id': '201210-822101_1349794556671DDDD',
|
||||
'ext': 'flv',
|
||||
'title': 'Pre-launch - Preparing to Take the Plunge',
|
||||
},
|
||||
}, {
|
||||
# From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
|
||||
'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_mp4(self, metadata):
|
||||
@@ -84,26 +96,20 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
'format_id': audio.get('code'),
|
||||
})
|
||||
slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(slide_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'slide deck video',
|
||||
'quality': -2,
|
||||
'preference': -2,
|
||||
'format_id': 'slides',
|
||||
})
|
||||
speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'speaker video',
|
||||
'quality': -1,
|
||||
'preference': -1,
|
||||
'format_id': 'speaker',
|
||||
})
|
||||
for video_key, format_id, preference in (
|
||||
('slide', 'slides', -2), ('speaker', 'speaker', -1)):
|
||||
video_path = xpath_text(metadata, './%sVideo' % video_key)
|
||||
if not video_path:
|
||||
continue
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': '%s video' % video_key,
|
||||
'quality': preference,
|
||||
'preference': preference,
|
||||
'format_id': format_id,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
|
||||
class EggheadCourseIE(EggheadBaseIE):
|
||||
IE_DESC = 'egghead.io course'
|
||||
IE_NAME = 'egghead:course'
|
||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
||||
'playlist_count': 29,
|
||||
'info_dict': {
|
||||
'id': '72',
|
||||
'id': '432655',
|
||||
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
|
||||
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
@@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
|
||||
class EggheadLessonIE(EggheadBaseIE):
|
||||
IE_DESC = 'egghead.io lesson'
|
||||
IE_NAME = 'egghead:lesson'
|
||||
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||
'info_dict': {
|
||||
@@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
|
||||
}, {
|
||||
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML
|
||||
merge_dicts,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor):
|
||||
'title': 'sexy babe softcore',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
'skip': 'Video not found',
|
||||
}, {
|
||||
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
|
||||
'md5': '1baa9602ede46ce904c431f5418d8916',
|
||||
@@ -77,19 +78,15 @@ class EroProfileIE(InfoExtractor):
|
||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
video_url = unescapeHTML(self._search_regex(
|
||||
r'<source src="([^"]+)', webpage, 'video url'))
|
||||
title = self._html_search_regex(
|
||||
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
(r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
|
||||
webpage, 'title')
|
||||
|
||||
return {
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
@@ -51,6 +51,10 @@ from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aliexpress import AliExpressLiveIE
|
||||
from .alsace20tv import (
|
||||
Alsace20TVIE,
|
||||
Alsace20TVEmbedIE,
|
||||
)
|
||||
from .apa import APAIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
@@ -71,7 +75,9 @@ from .arte import (
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
@@ -113,6 +119,7 @@ from .bfmtv import (
|
||||
)
|
||||
from .bibeltv import BibelTVIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bigo import BigoIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
@@ -131,7 +138,6 @@ from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
)
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
@@ -190,7 +196,11 @@ from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .cbssports import (
|
||||
CBSSportsEmbedIE,
|
||||
CBSSportsIE,
|
||||
TwentyFourSevenSportsIE,
|
||||
)
|
||||
from .ccc import (
|
||||
CCCIE,
|
||||
CCCPlaylistIE,
|
||||
@@ -198,10 +208,7 @@ from .ccc import (
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import (
|
||||
CeskaTelevizeIE,
|
||||
CeskaTelevizePoradyIE,
|
||||
)
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .charlierose import CharlieRoseIE
|
||||
from .chaturbate import ChaturbateIE
|
||||
@@ -249,6 +256,10 @@ from .commonprotocols import (
|
||||
from .condenast import CondeNastIE
|
||||
from .contv import CONtvIE
|
||||
from .corus import CorusIE
|
||||
from .cpac import (
|
||||
CPACIE,
|
||||
CPACPlaylistIE,
|
||||
)
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
@@ -465,6 +476,7 @@ from .hotstar import (
|
||||
)
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .hrfernsehen import HRFernsehenIE
|
||||
from .hrti import (
|
||||
HRTiIE,
|
||||
HRTiPlaylistIE,
|
||||
@@ -543,6 +555,7 @@ from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .krasview import KrasViewIE
|
||||
from .kth import KTHIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
@@ -594,7 +607,11 @@ from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .line import (
|
||||
LineTVIE,
|
||||
LineLiveIE,
|
||||
LineLiveChannelIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
@@ -602,10 +619,6 @@ from .linkedin import (
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .litv import LiTVIE
|
||||
from .livejournal import LiveJournalIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
LiveLeakEmbedIE,
|
||||
)
|
||||
from .livestream import (
|
||||
LivestreamIE,
|
||||
LivestreamOriginalIE,
|
||||
@@ -631,6 +644,7 @@ from .mangomolo import (
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .maoritv import MaoriTVIE
|
||||
from .markiza import (
|
||||
MarkizaIE,
|
||||
MarkizaPageIE,
|
||||
@@ -784,7 +798,14 @@ from .nick import (
|
||||
NickNightIE,
|
||||
NickRuIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .niconico import (
|
||||
NiconicoIE,
|
||||
NiconicoPlaylistIE,
|
||||
NiconicoUserIE,
|
||||
NicovideoSearchIE,
|
||||
NicovideoSearchDateIE,
|
||||
NicovideoSearchURLIE,
|
||||
)
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
@@ -878,11 +899,20 @@ from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
)
|
||||
from .palcomp3 import (
|
||||
PalcoMP3IE,
|
||||
PalcoMP3ArtistIE,
|
||||
PalcoMP3VideoIE,
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import (
|
||||
PeekVidsIE,
|
||||
PlayVidsIE,
|
||||
)
|
||||
from .peertube import PeerTubeIE
|
||||
from .people import PeopleIE
|
||||
from .performgroup import PerformGroupIE
|
||||
@@ -911,6 +941,7 @@ from .platzi import (
|
||||
from .playfm import PlayFMIE
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playstuff import PlayStuffIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
@@ -1234,6 +1265,11 @@ from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .thisvid import (
|
||||
ThisVidIE,
|
||||
ThisVidMemberIE,
|
||||
ThisVidPlaylistIE,
|
||||
)
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tiktok import (
|
||||
TikTokIE,
|
||||
@@ -1595,7 +1631,7 @@ from .youtube import (
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
#YoutubeSearchURLIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
|
@@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||
expected=True)
|
||||
elif '>You must log in to continue' in webpage:
|
||||
elif any(p in webpage for p in (
|
||||
'>You must log in to continue',
|
||||
'id="login_form"',
|
||||
'id="loginbutton"')):
|
||||
self.raise_login_required()
|
||||
|
||||
if not video_data and '/watchparty/' in url:
|
||||
|
@@ -5,29 +5,23 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class Formula1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
|
||||
'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
|
||||
'info_dict': {
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'id': '6060988138001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
'timestamp': 1463332814,
|
||||
'upload_date': '20160515',
|
||||
'uploader_id': '6057949432001',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
ooyala_embed_code = self._search_regex(
|
||||
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
|
||||
bc_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)
|
||||
|
@@ -383,6 +383,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# "<figure id=" pattern (#28792)
|
||||
'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -399,7 +403,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
video_id = self._search_regex(
|
||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||
r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
|
||||
return self._make_url_result(video_id)
|
||||
|
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FunimationIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_NETRC_MACHINE = 'funimation'
|
||||
_TOKEN = None
|
||||
@@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with lang code
|
||||
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
urlencode_postdata,
|
||||
@@ -102,6 +103,26 @@ class GDCVaultIE(InfoExtractor):
|
||||
'format': 'mp4-408',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Kaltura embed, whitespace between quote and embedded URL in iframe's src
|
||||
'url': 'https://www.gdcvault.com/play/1025699',
|
||||
'info_dict': {
|
||||
'id': '0_zagynv0a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tech Toolbox',
|
||||
'upload_date': '20190408',
|
||||
'uploader_id': 'joe@blazestreaming.com',
|
||||
'timestamp': 1554764629,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# HTML5 video
|
||||
'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self, webpage_url, display_id):
|
||||
@@ -175,7 +196,18 @@ class GDCVaultIE(InfoExtractor):
|
||||
|
||||
xml_name = self._html_search_regex(
|
||||
r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
|
||||
start_page, 'xml filename')
|
||||
start_page, 'xml filename', default=None)
|
||||
if not xml_name:
|
||||
info = self._parse_html5_media_entries(url, start_page, video_id)[0]
|
||||
info.update({
|
||||
'title': remove_start(self._search_regex(
|
||||
r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
|
||||
'title', default=None) or self._og_search_title(
|
||||
start_page, default=None), 'GDC Vault - '),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info
|
||||
embed_url = '%s/xml/%s' % (xml_root, xml_name)
|
||||
ie_key = 'DigitallySpeaking'
|
||||
|
||||
|
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
@@ -35,6 +36,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -84,7 +86,6 @@ from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .arkena import ArkenaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .kaltura import KalturaIE
|
||||
@@ -126,6 +127,7 @@ from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .vk import VKIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .medialaan import MedialaanIE
|
||||
@@ -1628,31 +1630,6 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20160409',
|
||||
},
|
||||
},
|
||||
# LiveLeak embed
|
||||
{
|
||||
'url': 'http://www.wykop.pl/link/3088787/',
|
||||
'md5': '7619da8c820e835bef21a1efa2a0fc71',
|
||||
'info_dict': {
|
||||
'id': '874_1459135191',
|
||||
'ext': 'mp4',
|
||||
'title': 'Man shows poor quality of new apartment building',
|
||||
'description': 'The wall is like a sand pile.',
|
||||
'uploader': 'Lake8737',
|
||||
},
|
||||
'add_ie': [LiveLeakIE.ie_key()],
|
||||
},
|
||||
# Another LiveLeak embed pattern (#13336)
|
||||
{
|
||||
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
|
||||
'info_dict': {
|
||||
'id': '2eb_1496309988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thief robs place where everyone was armed',
|
||||
'description': 'md5:694d73ee79e535953cf2488562288eee',
|
||||
'uploader': 'brazilwtf',
|
||||
},
|
||||
'add_ie': [LiveLeakIE.ie_key()],
|
||||
},
|
||||
# Duplicated embedded video URLs
|
||||
{
|
||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||
@@ -2248,6 +2225,102 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 52,
|
||||
},
|
||||
{
|
||||
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
|
||||
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
|
||||
'info_dict': {
|
||||
'id': '105',
|
||||
'display_id': 'kelis-4th-of-july',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kelis - 4th Of July',
|
||||
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://www.kvs-demo.com/embed/105/',
|
||||
'info_dict': {
|
||||
'id': '105',
|
||||
'display_id': 'kelis-4th-of-july',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kelis - 4th Of July / Embed Player',
|
||||
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# KVS Player (tested also in thisvid.py)
|
||||
'url': 'https://youix.com/video/leningrad-zoj/',
|
||||
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
|
||||
'info_dict': {
|
||||
'id': '18485',
|
||||
'display_id': 'leningrad-zoj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
|
||||
'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://youix.com/embed/18485',
|
||||
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
|
||||
'info_dict': {
|
||||
'id': '18485',
|
||||
'display_id': 'leningrad-zoj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ленинград - ЗОЖ',
|
||||
'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
|
||||
'md5': '94166bdb26b4cb1fb9214319a629fc51',
|
||||
'info_dict': {
|
||||
'id': '21217',
|
||||
'display_id': '40-nochey-2016',
|
||||
'ext': 'mp4',
|
||||
'title': '40 ночей (2016) - BogMedia.org',
|
||||
'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
|
||||
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player (for sites that serve kt_player.js via non-https urls)
|
||||
'url': 'http://www.camhub.world/embed/389508',
|
||||
'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
|
||||
'info_dict': {
|
||||
'id': '389508',
|
||||
'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
|
||||
'ext': 'mp4',
|
||||
'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
|
||||
'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mrdeepfakes.com/video/5/selena-gomez-pov-deep-fakes',
|
||||
'md5': 'fec4ad5ec150f655e0c74c696a4a2ff4',
|
||||
'info_dict': {
|
||||
'id': '5',
|
||||
'display_id': 'selena-gomez-pov-deep-fakes',
|
||||
'ext': 'mp4',
|
||||
'title': 'Selena Gomez POV (Deep Fakes) DeepFake Porn - MrDeepFakes',
|
||||
'description': 'md5:17d1f84b578c9c26875ac5ef9a932354',
|
||||
'height': 720,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
|
||||
'md5': 'e2f0a4c329f7986280b7328e24036d60',
|
||||
'info_dict': {
|
||||
'id': '284002',
|
||||
'display_id': 'just-out-of-the-shower-joi',
|
||||
'ext': 'mp4',
|
||||
'title': 'Just Out Of The Shower JOI - Shooshtime',
|
||||
'height': 720,
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2352,6 +2425,88 @@ class GenericIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
def _extract_kvs(self, url, webpage, video_id):
|
||||
|
||||
def getlicensetoken(license):
|
||||
modlicense = license.replace('$', '').replace('0', '1')
|
||||
center = int(len(modlicense) / 2)
|
||||
fronthalf = int(modlicense[:center + 1])
|
||||
backhalf = int(modlicense[center:])
|
||||
|
||||
modlicense = compat_str(4 * abs(fronthalf - backhalf))
|
||||
|
||||
def parts():
|
||||
for o in range(0, center + 1):
|
||||
for i in range(1, 5):
|
||||
yield compat_str((int(license[o + i]) + int(modlicense[o])) % 10)
|
||||
|
||||
return ''.join(parts())
|
||||
|
||||
def getrealurl(video_url, license_code):
|
||||
if not video_url.startswith('function/0/'):
|
||||
return video_url # not obfuscated
|
||||
|
||||
url_path, _, url_query = video_url.partition('?')
|
||||
urlparts = url_path.split('/')[2:]
|
||||
license = getlicensetoken(license_code)
|
||||
newmagic = urlparts[5][:32]
|
||||
|
||||
def spells(x, o):
|
||||
l = (o + sum(int(n) for n in license[o:])) % 32
|
||||
for i in range(0, len(x)):
|
||||
yield {l: x[o], o: x[l]}.get(i, x[i])
|
||||
|
||||
for o in range(len(newmagic) - 1, -1, -1):
|
||||
newmagic = ''.join(spells(newmagic, o))
|
||||
|
||||
urlparts[5] = newmagic + urlparts[5][32:]
|
||||
return '/'.join(urlparts) + '?' + url_query
|
||||
|
||||
flashvars = self._search_regex(
|
||||
r'(?s)<script\b[^>]*>.*?var\s+flashvars\s*=\s*(\{.+?\});.*?</script>',
|
||||
webpage, 'flashvars')
|
||||
flashvars = self._parse_json(flashvars, video_id, transform_source=js_to_json)
|
||||
|
||||
# extract the part after the last / as the display_id from the
|
||||
# canonical URL.
|
||||
display_id = self._search_regex(
|
||||
r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
|
||||
r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
|
||||
webpage, 'display_id', fatal=False
|
||||
)
|
||||
title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
|
||||
|
||||
thumbnail = flashvars['preview_url']
|
||||
if thumbnail.startswith('//'):
|
||||
protocol, _, _ = url.partition('/')
|
||||
thumbnail = protocol + thumbnail
|
||||
|
||||
url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys()))
|
||||
formats = []
|
||||
for key in url_keys:
|
||||
if '/get_file/' not in flashvars[key]:
|
||||
continue
|
||||
format_id = flashvars.get(key + '_text', key)
|
||||
formats.append(merge_dicts(
|
||||
parse_resolution(format_id) or parse_resolution(flashvars[key]), {
|
||||
'url': urljoin(url, getrealurl(flashvars[key], flashvars['license_code'])),
|
||||
'format_id': format_id,
|
||||
'ext': 'mp4',
|
||||
'http_headers': {'Referer': url},
|
||||
}))
|
||||
if not formats[-1].get('height'):
|
||||
formats[-1]['quality'] = 1
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': flashvars['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return self.url_result(self.http_scheme() + url)
|
||||
@@ -2560,9 +2715,16 @@ class GenericIE(InfoExtractor):
|
||||
# but actually don't.
|
||||
AGE_LIMIT_MARKERS = [
|
||||
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
|
||||
r'>[^<]*you acknowledge you are at least (\d+) years old',
|
||||
r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
|
||||
]
|
||||
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
|
||||
age_limit = 18
|
||||
for marker in AGE_LIMIT_MARKERS:
|
||||
m = re.search(marker, webpage)
|
||||
if not m:
|
||||
continue
|
||||
age_limit = max(
|
||||
age_limit or 0,
|
||||
int_or_none(m.groups() and m.group(1), default=18))
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(
|
||||
@@ -2777,6 +2939,11 @@ class GenericIE(InfoExtractor):
|
||||
if odnoklassniki_url:
|
||||
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
|
||||
|
||||
# Look for sibnet embedded player
|
||||
sibnet_urls = VKIE._extract_sibnet_urls(webpage)
|
||||
if sibnet_urls:
|
||||
return self.playlist_from_matches(sibnet_urls, video_id, video_title)
|
||||
|
||||
# Look for embedded ivi player
|
||||
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
@@ -3168,11 +3335,6 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(
|
||||
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
||||
|
||||
# Look for LiveLeak embeds
|
||||
liveleak_urls = LiveLeakIE._extract_urls(webpage)
|
||||
if liveleak_urls:
|
||||
return self.playlist_from_matches(liveleak_urls, video_id, video_title)
|
||||
|
||||
# Look for 3Q SDN embeds
|
||||
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
||||
if threeqsdn_url:
|
||||
@@ -3400,12 +3562,29 @@ class GenericIE(InfoExtractor):
|
||||
'url': src,
|
||||
'ext': (mimetype2ext(src_type)
|
||||
or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
|
||||
'http_headers': {
|
||||
'Referer': full_response.geturl(),
|
||||
},
|
||||
})
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
# Look for generic KVS player (before ld+json for tests)
|
||||
found = self._search_regex(
|
||||
(r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
|
||||
# kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
|
||||
r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
|
||||
), webpage, 'KVS player', group='ver', default=False)
|
||||
if found:
|
||||
self.report_extraction('%s: KVS Player' % (video_id, ))
|
||||
if found.split('.')[0] not in ('4', '5', '6'):
|
||||
self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found, ))
|
||||
return merge_dicts(
|
||||
self._extract_kvs(url, webpage, video_id),
|
||||
info_dict)
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
@@ -3468,7 +3647,7 @@ class GenericIE(InfoExtractor):
|
||||
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||
found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
|
||||
if not found:
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
found = re.search(
|
||||
|
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -116,6 +118,18 @@ class GoIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
|
||||
'info_dict': {
|
||||
'id': 'VDKA22600213',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pilot',
|
||||
'description': 'md5:74306df917cfc199d76d061d66bebdb4',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
@@ -149,14 +163,30 @@ class GoIE(AdobePassIE):
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
video_id = self._search_regex(
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||
), webpage, 'video id', default=video_id)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
|
||||
'data', default='{}'),
|
||||
display_id or video_id, fatal=False)
|
||||
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
|
||||
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
|
||||
video_id = None
|
||||
if layout:
|
||||
video_id = try_get(
|
||||
layout,
|
||||
(lambda x: x['videoid'], lambda x: x['video']['id']),
|
||||
compat_str)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# page.analytics.videoIdCode
|
||||
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||
), webpage, 'video id', default=video_id)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
|
101
youtube_dl/extractor/hrfernsehen.py
Normal file
101
youtube_dl/extractor/hrfernsehen.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
unescapeHTML
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
'info_dict': {
|
||||
'id': '130546',
|
||||
'ext': 'mp4',
|
||||
'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
|
||||
'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
|
||||
'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
|
||||
'subtitles': {'de': [{
|
||||
'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
|
||||
}]},
|
||||
'timestamp': 1598470200,
|
||||
'upload_date': '20200826',
|
||||
'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
|
||||
'title': 'hessenschau vom 26.08.2020'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def extract_airdate(self, loader_data):
|
||||
airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
|
||||
|
||||
if airdate_str is None:
|
||||
return None
|
||||
|
||||
return unified_timestamp(airdate_str)
|
||||
|
||||
def extract_formats(self, loader_data):
|
||||
stream_formats = []
|
||||
for stream_obj in loader_data["videoResolutionLevels"]:
|
||||
stream_format = {
|
||||
'format_id': str(stream_obj['verticalResolution']) + "p",
|
||||
'height': stream_obj['verticalResolution'],
|
||||
'url': stream_obj['url'],
|
||||
}
|
||||
|
||||
quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
|
||||
stream_obj['url'])
|
||||
if quality_information:
|
||||
stream_format['width'] = int_or_none(quality_information.group(1))
|
||||
stream_format['height'] = int_or_none(quality_information.group(2))
|
||||
stream_format['fps'] = int_or_none(quality_information.group(3))
|
||||
stream_format['tbr'] = int_or_none(quality_information.group(4))
|
||||
|
||||
stream_formats.append(stream_format)
|
||||
|
||||
self._sort_formats(stream_formats)
|
||||
return stream_formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title', 'name'], webpage)
|
||||
description = self._html_search_meta(
|
||||
['description'], webpage)
|
||||
|
||||
loader_str = unescapeHTML(self._search_regex(r"data-new-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
|
||||
loader_data = json.loads(loader_str)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': self.extract_formats(loader_data),
|
||||
'timestamp': self.extract_airdate(loader_data)
|
||||
}
|
||||
|
||||
if "subtitle" in loader_data:
|
||||
info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
|
||||
|
||||
thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
|
||||
if len(thumbnails) > 0:
|
||||
info["thumbnails"] = [{"url": t} for t in thumbnails]
|
||||
|
||||
return info
|
@@ -1,6 +1,9 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
@@ -90,7 +93,11 @@ class InfoQIE(BokeCCBaseIE):
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
try:
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
except ExtractorError:
|
||||
fields = {}
|
||||
|
||||
http_audio_url = fields.get('filename')
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@@ -32,6 +33,7 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
@@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by britneyspears',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 0,
|
||||
'timestamp': 1453760977,
|
||||
'upload_date': '20160125',
|
||||
'uploader_id': 'britneyspears',
|
||||
@@ -86,6 +89,24 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Post by instagram',
|
||||
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||
},
|
||||
}, {
|
||||
# IGTV
|
||||
'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
|
||||
'info_dict': {
|
||||
'id': 'BkfuX9UB-eK',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fingerboarding Tricks with @cass.fb',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 53.83,
|
||||
'timestamp': 1530032919,
|
||||
'upload_date': '20180626',
|
||||
'uploader_id': 'instagram',
|
||||
'uploader': 'Instagram',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
@@ -159,7 +180,9 @@ class InstagramIE(InfoExtractor):
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
title = media.get('title')
|
||||
thumbnail = media.get('display_src') or media.get('display_url')
|
||||
duration = float_or_none(media.get('video_duration'))
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
@@ -200,9 +223,10 @@ class InstagramIE(InfoExtractor):
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'title': node.get('title') or 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'duration': float_or_none(node.get('video_duration')),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
@@ -239,8 +263,9 @@ class InstagramIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by %s' % uploader_id,
|
||||
'title': title or 'Video by %s' % uploader_id,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
|
@@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
|
||||
'id': '196219',
|
||||
'display_id': 'stories-from-emona-i',
|
||||
'ext': 'flac',
|
||||
'title': 'Maya Filipič - Stories from Emona I',
|
||||
'artist': 'Maya Filipič',
|
||||
# 'title': 'Maya Filipič - Stories from Emona I',
|
||||
'title': 'Stories from Emona I',
|
||||
# 'artist': 'Maya Filipič',
|
||||
'track': 'Stories from Emona I',
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1217438117,
|
||||
'upload_date': '20080730',
|
||||
'license': 'by-nc-nd',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'average_rating': int,
|
||||
'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||
webpage = self._download_webpage(
|
||||
'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
models = self._parse_json(self._html_search_regex(
|
||||
r"data-bundled-models='([^']+)",
|
||||
webpage, 'bundled models'), track_id)
|
||||
track = models['track']['models'][0]
|
||||
# webpage = self._download_webpage(
|
||||
# 'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
# models = self._parse_json(self._html_search_regex(
|
||||
# r"data-bundled-models='([^']+)",
|
||||
# webpage, 'bundled models'), track_id)
|
||||
# track = models['track']['models'][0]
|
||||
track = self._call_api('track', track_id)
|
||||
title = track_name = track['name']
|
||||
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
artist = get_model('artist')
|
||||
artist_name = artist.get('name')
|
||||
if artist_name:
|
||||
title = '%s - %s' % (artist_name, title)
|
||||
album = get_model('album')
|
||||
# get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
# artist = get_model('artist')
|
||||
# artist_name = artist.get('name')
|
||||
# if artist_name:
|
||||
# title = '%s - %s' % (artist_name, title)
|
||||
# album = get_model('album')
|
||||
|
||||
formats = [{
|
||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||
@@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):
|
||||
|
||||
urls = []
|
||||
thumbnails = []
|
||||
for _, covers in track.get('cover', {}).items():
|
||||
for covers in (track.get('cover') or {}).values():
|
||||
for cover_id, cover_url in covers.items():
|
||||
if not cover_url or cover_url in urls:
|
||||
continue
|
||||
@@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
|
||||
})
|
||||
|
||||
tags = []
|
||||
for tag in track.get('tags', []):
|
||||
for tag in (track.get('tags') or []):
|
||||
tag_name = tag.get('name')
|
||||
if not tag_name:
|
||||
continue
|
||||
tags.append(tag_name)
|
||||
|
||||
stats = track.get('stats') or {}
|
||||
license = track.get('licenseCC') or []
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
@@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': track.get('description'),
|
||||
'duration': int_or_none(track.get('duration')),
|
||||
'artist': artist_name,
|
||||
# 'artist': artist_name,
|
||||
'track': track_name,
|
||||
'album': album.get('name'),
|
||||
# 'album': album.get('name'),
|
||||
'formats': formats,
|
||||
'license': '-'.join(track.get('licenseCC', [])) or None,
|
||||
'license': '-'.join(license) if license else None,
|
||||
'timestamp': int_or_none(track.get('dateCreated')),
|
||||
'view_count': int_or_none(stats.get('listenedAll')),
|
||||
'like_count': int_or_none(stats.get('favorited')),
|
||||
@@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class JamendoAlbumIE(InfoExtractor):
|
||||
class JamendoAlbumIE(JamendoIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||
'info_dict': {
|
||||
'id': '121486',
|
||||
@@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
}
|
||||
}
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
@@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||
album_name = album.get('name')
|
||||
|
||||
entries = []
|
||||
for track in album.get('tracks', []):
|
||||
for track in (album.get('tracks') or []):
|
||||
track_id = track.get('id')
|
||||
if not track_id:
|
||||
continue
|
||||
|
@@ -120,7 +120,7 @@ class KalturaIE(InfoExtractor):
|
||||
def _extract_urls(webpage):
|
||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||
finditer = (
|
||||
re.finditer(
|
||||
list(re.finditer(
|
||||
r"""(?xs)
|
||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||
\{.*?
|
||||
@@ -128,8 +128,8 @@ class KalturaIE(InfoExtractor):
|
||||
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||
""", webpage)
|
||||
or re.finditer(
|
||||
""", webpage))
|
||||
or list(re.finditer(
|
||||
r'''(?xs)
|
||||
(?P<q1>["'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
@@ -142,16 +142,16 @@ class KalturaIE(InfoExtractor):
|
||||
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
|
||||
)
|
||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage)
|
||||
or re.finditer(
|
||||
''', webpage))
|
||||
or list(re.finditer(
|
||||
r'''(?xs)
|
||||
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
|
||||
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
|
||||
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
||||
(?:(?!(?P=q1)).)*
|
||||
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
|
||||
(?:(?!(?P=q1)).)*
|
||||
(?P=q1)
|
||||
''', webpage)
|
||||
''', webpage))
|
||||
)
|
||||
urls = []
|
||||
for mobj in finditer:
|
||||
@@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
|
||||
'duration': info.get('duration'),
|
||||
'timestamp': info.get('createdAt'),
|
||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||
'view_count': info.get('plays'),
|
||||
'view_count': int_or_none(info.get('plays')),
|
||||
}
|
||||
|
31
youtube_dl/extractor/kth.py
Normal file
31
youtube_dl/extractor/kth.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class KTHIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
|
||||
_TEST = {
|
||||
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
|
||||
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
|
||||
'info_dict': {
|
||||
'id': '0_uoop6oz9',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
|
||||
'thumbnail': 're:https?://.+/thumbnail/.+',
|
||||
'duration': 3516,
|
||||
'timestamp': 1647345358,
|
||||
'upload_date': '20220315',
|
||||
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
result = self.url_result(
|
||||
smuggle_url('kaltura:308:%s' % video_id, {
|
||||
'service_url': 'https://api.kaltura.nordu.net'}),
|
||||
'Kaltura')
|
||||
return result
|
@@ -120,6 +120,26 @@ class LBRYIE(LBRYBaseIE):
|
||||
'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}, {
|
||||
# HLS
|
||||
'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e',
|
||||
'md5': 'fc82f45ea54915b1495dd7cb5cc1289f',
|
||||
'info_dict': {
|
||||
'id': 'e51671357333fe22ae88aad320bde2f6f96b1410',
|
||||
'ext': 'mp4',
|
||||
'title': 'PLANTS I WILL NEVER GROW AGAIN. THE BLACK LIST PLANTS FOR A CANADIAN GARDEN | Gardening in Canada 🍁',
|
||||
'description': 'md5:9c539c6a03fb843956de61a4d5288d5e',
|
||||
'timestamp': 1618254123,
|
||||
'upload_date': '20210412',
|
||||
'release_timestamp': 1618254002,
|
||||
'release_date': '20210412',
|
||||
'tags': list,
|
||||
'duration': 554,
|
||||
'channel': 'Gardening In Canada',
|
||||
'channel_id': 'b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'channel_url': 'https://odysee.com/@gardeningincanada:b8be0e93b423dad221abe29545fbe8ec36e806bc',
|
||||
'formats': 'mincount:3',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
@@ -163,10 +183,18 @@ class LBRYIE(LBRYBaseIE):
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
info = self._parse_stream(result, url)
|
||||
urlh = self._request_webpage(
|
||||
streaming_url, display_id, note='Downloading streaming redirect url info')
|
||||
if determine_ext(urlh.geturl()) == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
urlh.geturl(), display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(info['formats'])
|
||||
else:
|
||||
info['url'] = streaming_url
|
||||
info.update({
|
||||
'id': claim_id,
|
||||
'title': title,
|
||||
'url': streaming_url,
|
||||
})
|
||||
return info
|
||||
|
||||
|
@@ -4,7 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LineTVIE(InfoExtractor):
|
||||
@@ -88,3 +94,137 @@ class LineTVIE(InfoExtractor):
|
||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||
'view_count': video_info.get('meta', {}).get('count'),
|
||||
}
|
||||
|
||||
|
||||
class LineLiveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||
|
||||
def _parse_broadcast_item(self, item):
|
||||
broadcast_id = compat_str(item['id'])
|
||||
title = item['title']
|
||||
is_live = item.get('isBroadcastingNow')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
channel = item.get('channel') or {}
|
||||
channel_id = str_or_none(channel.get('id'))
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': int_or_none(item.get('createdAt')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
|
||||
'duration': int_or_none(item.get('archiveDuration')),
|
||||
'view_count': int_or_none(item.get('viewerCount')),
|
||||
'comment_count': int_or_none(item.get('chatCount')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class LineLiveIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
|
||||
'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
|
||||
'info_dict': {
|
||||
'id': '16331360',
|
||||
'title': '振りコピ講座😙😙😙',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1617095132,
|
||||
'upload_date': '20210330',
|
||||
'channel': '白川ゆめか',
|
||||
'channel_id': '4867368',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'is_live': False,
|
||||
}
|
||||
}, {
|
||||
# archiveStatus == 'DELETED'
|
||||
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
|
||||
broadcast = self._download_json(
|
||||
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
||||
broadcast_id)
|
||||
item = broadcast['item']
|
||||
info = self._parse_broadcast_item(item)
|
||||
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
||||
formats = []
|
||||
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
||||
if not v:
|
||||
continue
|
||||
if k == 'abr':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v, broadcast_id, 'mp4', protocol,
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls-' + k,
|
||||
'protocol': protocol,
|
||||
'url': v,
|
||||
}
|
||||
if not k.isdigit():
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
archive_status = item.get('archiveStatus')
|
||||
if archive_status != 'ARCHIVED':
|
||||
raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class LineLiveChannelIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://live.line.me/channels/5893542',
|
||||
'info_dict': {
|
||||
'id': '5893542',
|
||||
'title': 'いくらちゃん',
|
||||
'description': 'md5:c3a4af801f43b2fac0b02294976580be',
|
||||
},
|
||||
'playlist_mincount': 29
|
||||
}
|
||||
|
||||
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
||||
while True:
|
||||
for row in (archived_broadcasts.get('rows') or []):
|
||||
share_url = str_or_none(row.get('shareURL'))
|
||||
if not share_url:
|
||||
continue
|
||||
info = self._parse_broadcast_item(row)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': share_url,
|
||||
'ie_key': LineLiveIE.ie_key(),
|
||||
})
|
||||
yield info
|
||||
if not archived_broadcasts.get('hasNextPage'):
|
||||
return
|
||||
archived_broadcasts = self._download_json(
|
||||
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
||||
channel_id, query={
|
||||
'lastId': info['id'],
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
||||
return self.playlist_result(
|
||||
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
||||
channel_id, channel.get('title'), channel.get('information'))
|
||||
|
@@ -1,191 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||
'info_dict': {
|
||||
'id': '757_1364311680',
|
||||
'ext': 'mp4',
|
||||
'description': 'extremely bad day for this guy..!',
|
||||
'uploader': 'ljfriel2',
|
||||
'title': 'Most unlucky car accident',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||
'info_dict': {
|
||||
'id': 'f93_1390833151',
|
||||
'ext': 'mp4',
|
||||
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
||||
'uploader': 'ARD_Stinkt',
|
||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
# Prochan embed
|
||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
||||
'info_dict': {
|
||||
'id': '4f7_1392687779',
|
||||
'ext': 'mp4',
|
||||
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
|
||||
'uploader': 'CapObveus',
|
||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'Video is dead',
|
||||
}, {
|
||||
# Covers https://github.com/ytdl-org/youtube-dl/pull/5983
|
||||
# Multiple resolutions
|
||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
||||
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
|
||||
'info_dict': {
|
||||
'id': '801_1409392012',
|
||||
'ext': 'mp4',
|
||||
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
|
||||
'uploader': 'bony333',
|
||||
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
|
||||
'thumbnail': r're:^https?://.*\.jpg$'
|
||||
}
|
||||
}, {
|
||||
# Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
|
||||
'url': 'http://m.liveleak.com/view?i=763_1473349649',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '763_1473349649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
|
||||
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
|
||||
'uploader': 'Ziz',
|
||||
'upload_date': '20160908',
|
||||
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?i=677_1439397581',
|
||||
'info_dict': {
|
||||
'id': '677_1439397581',
|
||||
'title': 'Fuel Depot in China Explosion caught on video',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No original video
|
||||
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||
video_description = self._og_search_description(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'you confirm that you are ([0-9]+) years and over.',
|
||||
webpage, 'age limit', default=None))
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
if not entries:
|
||||
# Maybe an embed?
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
||||
webpage, 'embed URL')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
for idx, info_dict in enumerate(entries):
|
||||
formats = []
|
||||
for a_format in info_dict['formats']:
|
||||
if not a_format.get('height'):
|
||||
a_format['height'] = int_or_none(self._search_regex(
|
||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||
default=None))
|
||||
formats.append(a_format)
|
||||
|
||||
# Removing '.*.mp4' gives the raw video, which is essentially
|
||||
# the same video without the LiveLeak logo at the top (see
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/4768)
|
||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
||||
if a_format['url'] != orig_url:
|
||||
format_id = a_format.get('format_id')
|
||||
format_id = 'original' + ('-' + format_id if format_id else '')
|
||||
if self._is_valid_url(orig_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
|
||||
# Don't append entry ID for one-video pages to keep backward compatibility
|
||||
if len(entries) > 1:
|
||||
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
|
||||
else:
|
||||
info_dict['id'] = video_id
|
||||
|
||||
info_dict.update({
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'age_limit': age_limit,
|
||||
'thumbnail': video_thumbnail,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, video_id, video_title)
|
||||
|
||||
|
||||
class LiveLeakEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
|
||||
|
||||
# See generic.py for actual test cases
|
||||
_TESTS = [{
|
||||
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
kind, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if kind == 'f':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
liveleak_url = self._search_regex(
|
||||
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||
webpage, 'LiveLeak URL', group='url')
|
||||
else:
|
||||
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
|
||||
|
||||
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
@@ -1,11 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -20,17 +25,20 @@ class ManyVidsIE(InfoExtractor):
|
||||
'id': '133957',
|
||||
'ext': 'mp4',
|
||||
'title': 'everthing about me (Preview)',
|
||||
'uploader': 'ellyxxix',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
|
||||
'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
|
||||
'info_dict': {
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'description': 'md5:ec5901d41808b3746fed90face161612',
|
||||
'uploader': 'Sarah Calanthe',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
@@ -39,17 +47,50 @@ class ManyVidsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
|
||||
try:
|
||||
webpage = self._download_webpage(real_url, video_id)
|
||||
except Exception:
|
||||
# probably useless fallback
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video URL', group='url')
|
||||
info = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
|
||||
webpage, 'meta details', default='')
|
||||
info = extract_attributes(info)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True)
|
||||
player = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
|
||||
webpage, 'player details', default='')
|
||||
player = extract_attributes(player)
|
||||
|
||||
video_urls_and_ids = (
|
||||
(info.get('data-meta-video'), 'video'),
|
||||
(player.get('data-video-transcoded'), 'transcoded'),
|
||||
(player.get('data-video-filepath'), 'filepath'),
|
||||
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
|
||||
)
|
||||
|
||||
def txt_or_none(s, default=None):
|
||||
return (s.strip() or default) if isinstance(s, compat_str) else default
|
||||
|
||||
uploader = txt_or_none(info.get('data-meta-author'))
|
||||
|
||||
def mung_title(s):
|
||||
if uploader:
|
||||
s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
|
||||
return txt_or_none(s)
|
||||
|
||||
title = (
|
||||
mung_title(info.get('data-meta-title'))
|
||||
or self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None)
|
||||
or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True))
|
||||
|
||||
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
@@ -62,7 +103,8 @@ class ManyVidsIE(InfoExtractor):
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, fatal=False, data=urlencode_postdata({
|
||||
video_id, note='Setting format cookies', fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
@@ -70,23 +112,56 @@ class ManyVidsIE(InfoExtractor):
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
formats = []
|
||||
for v_url, fmt in video_urls_and_ids:
|
||||
v_url = url_or_none(v_url)
|
||||
if not v_url:
|
||||
continue
|
||||
if determine_ext(v_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': fmt,
|
||||
})
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||
'view count', default=None))
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
if f.get('height') is None:
|
||||
f['height'] = int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
|
||||
if '/preview/' in f['url']:
|
||||
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
|
||||
f['preference'] = -10
|
||||
if 'transcoded' in f['format_id']:
|
||||
f['preference'] = f.get('preference', -1) - 1
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def get_likes():
|
||||
likes = self._search_regex(
|
||||
r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
|
||||
webpage, 'likes', default='')
|
||||
likes = extract_attributes(likes)
|
||||
return int_or_none(likes.get('data-likes'))
|
||||
|
||||
def get_views():
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
|
||||
webpage, 'view count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'description': txt_or_none(info.get('data-meta-description')),
|
||||
'uploader': txt_or_none(info.get('data-meta-author')),
|
||||
'thumbnail': (
|
||||
url_or_none(info.get('data-meta-image'))
|
||||
or url_or_none(player.get('data-video-screenshot'))),
|
||||
'view_count': get_views(),
|
||||
'like_count': get_likes(),
|
||||
}
|
||||
|
31
youtube_dl/extractor/maoritv.py
Normal file
31
youtube_dl/extractor/maoritv.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MaoriTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
|
||||
'md5': '5ade8ef53851b6a132c051b1cd858899',
|
||||
'info_dict': {
|
||||
'id': '4774724855001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kōrero Mai, Series 1 Episode 54',
|
||||
'upload_date': '20160226',
|
||||
'timestamp': 1456455018,
|
||||
'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
|
||||
'uploader_id': '1614493167001',
|
||||
},
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'BrightcoveNew', brightcove_id)
|
@@ -15,33 +15,39 @@ from ..utils import (
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
||||
'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
|
||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||
'info_dict': {
|
||||
'id': '34934644',
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': 10619174,
|
||||
'uploader_id': '10619174',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/36787208',
|
||||
'url': 'https://medal.tv/clips/2um24TWdty0NA',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '36787208',
|
||||
'id': '2um24TWdty0NA',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': 5156321,
|
||||
'uploader_id': '5156321',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -24,7 +24,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||
(?:
|
||||
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
|
||||
player/index\.html\?.*?\bprogramGuid=
|
||||
player(?:/v\d+)?/index\.html\?.*?\bprogramGuid=
|
||||
)
|
||||
)(?P<id>[0-9A-Z]{16,})
|
||||
'''
|
||||
@@ -73,6 +73,10 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
|
||||
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# embedUrl (from https://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/)
|
||||
'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323&autoplay=true&purl=http://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'mediaset:FAFU000000665924',
|
||||
'only_matching': True,
|
||||
|
@@ -78,7 +78,7 @@ class MindsIE(MindsBaseIE):
|
||||
else:
|
||||
return self.url_result(entity['perma_url'])
|
||||
else:
|
||||
assert(entity['subtype'] == 'video')
|
||||
assert (entity['subtype'] == 'video')
|
||||
video_id = entity_id
|
||||
# 1080p and webm formats available only on the sources array
|
||||
video = self._call_api(
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
@@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'title': 'a/ Hot Teens',
|
||||
'categories': list,
|
||||
'upload_date': '20210104',
|
||||
'uploader_id': 'yonbiw',
|
||||
'uploader_id': 'anonymous',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
@@ -125,9 +126,10 @@ class MotherlessIE(InfoExtractor):
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = webpage.count('class="media-comment-contents"')
|
||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||
uploader_id = self._html_search_regex(
|
||||
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
|
||||
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
|
||||
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
|
||||
webpage, 'uploader_id')
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||
@@ -169,7 +171,18 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
|
||||
'any kind!'
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'playlist_mincount': 0,
|
||||
'expected_warnings': [
|
||||
'This group has no videos.',
|
||||
]
|
||||
}, {
|
||||
'url': 'https://motherless.com/g/beautiful_cock',
|
||||
'info_dict': {
|
||||
'id': 'beautiful_cock',
|
||||
'title': 'Beautiful Cock',
|
||||
'description': 'Group for lovely cocks yours, mine, a friends anything human',
|
||||
},
|
||||
'playlist_mincount': 2500,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -208,16 +221,23 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, fatal=False)
|
||||
page_count = self._int(self._search_regex(
|
||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
|
||||
webpage, 'page_count'), 'page_count')
|
||||
page_count = str_to_int(self._search_regex(
|
||||
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
|
||||
webpage, 'page_count', default=0))
|
||||
if not page_count:
|
||||
message = self._search_regex(
|
||||
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
|
||||
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
||||
self.report_warning(message, group_id)
|
||||
page_count = 1
|
||||
PAGE_SIZE = 80
|
||||
|
||||
def _get_page(idx):
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
if idx > 0:
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
for entry in self._extract_entries(webpage, url):
|
||||
yield entry
|
||||
|
||||
|
@@ -255,7 +255,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_child_with_type(parent, t):
|
||||
return next(c for c in parent['children'] if c.get('type') == t)
|
||||
for c in parent['children']:
|
||||
if c.get('type') == t:
|
||||
return c
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
try:
|
||||
@@ -286,7 +288,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
||||
ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
|
||||
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
|
||||
mgid = video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
return mgid
|
||||
|
@@ -35,7 +35,9 @@ class MySpassIE(InfoExtractor):
|
||||
title = xpath_text(metadata, 'title', fatal=True)
|
||||
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
||||
video_id_int = int(video_id)
|
||||
for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
|
||||
|
||||
grps = re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url)
|
||||
for group in grps.groups() if grps else []:
|
||||
group_int = int(group)
|
||||
if group_int > video_id_int:
|
||||
video_url = video_url.replace(
|
||||
|
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
@@ -20,13 +22,13 @@ class NDRBaseIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = next(group for group in mobj.groups() if group)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return self._extract_embed(webpage, display_id)
|
||||
return self._extract_embed(webpage, display_id, url)
|
||||
|
||||
|
||||
class NDRIE(NDRBaseIE):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
||||
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo, same content id
|
||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||
@@ -38,13 +40,14 @@ class NDRIE(NDRBaseIE):
|
||||
'title': 'Party, Pötte und Parade',
|
||||
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
|
||||
'uploader': 'ndrtv',
|
||||
'timestamp': 1431108900,
|
||||
'timestamp': 1431255671,
|
||||
'upload_date': '20150510',
|
||||
'duration': 3498,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# httpVideo, different content id
|
||||
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
|
||||
@@ -63,6 +66,7 @@ class NDRIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# httpAudio, same content id
|
||||
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
|
||||
@@ -74,8 +78,8 @@ class NDRIE(NDRBaseIE):
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'uploader': 'ndrinfo',
|
||||
'timestamp': 1290626100,
|
||||
'upload_date': '20140729',
|
||||
'timestamp': 1631711863,
|
||||
'upload_date': '20210915',
|
||||
'duration': 884,
|
||||
},
|
||||
'params': {
|
||||
@@ -89,9 +93,10 @@ class NDRIE(NDRBaseIE):
|
||||
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||
'ext': 'mp4',
|
||||
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||
'description': 'md5:700f6de264010585012a72f97b0ac0c9',
|
||||
'uploader': 'ndrtv',
|
||||
'upload_date': '20201113',
|
||||
'upload_date': '20201207',
|
||||
'timestamp': 1614349457,
|
||||
'duration': 1749,
|
||||
'subtitles': {
|
||||
'de': [{
|
||||
@@ -109,19 +114,38 @@ class NDRIE(NDRBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
embed_url = self._html_search_meta(
|
||||
'embedURL', webpage, 'embed URL',
|
||||
default=None) or self._search_regex(
|
||||
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'embed URL', group='url')
|
||||
def _extract_embed(self, webpage, display_id, url):
|
||||
embed_url = (
|
||||
self._html_search_meta(
|
||||
'embedURL', webpage, 'embed URL',
|
||||
default=None)
|
||||
or self._search_regex(
|
||||
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'embed URL', group='url', default=None)
|
||||
or self._search_regex(
|
||||
r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'embed URL', group='url', default=''))
|
||||
# some more work needed if we only found sophoraID
|
||||
if re.match(r'^[a-z]+\d+$', embed_url):
|
||||
# get the initial part of the url path,. eg /panorama/archiv/2022/
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
|
||||
# find tell-tale image with the actual ID
|
||||
ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
|
||||
# or try to use special knowledge!
|
||||
NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
|
||||
embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
|
||||
if not embed_url:
|
||||
raise ExtractorError('Unable to extract embedUrl')
|
||||
|
||||
description = self._search_regex(
|
||||
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
||||
webpage, 'description', default=None) or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(
|
||||
self._search_regex(
|
||||
r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
|
||||
webpage, 'upload date', default=None))
|
||||
(r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
|
||||
r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
|
||||
webpage, 'upload date', group='cont', default=None))
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
return merge_dicts({
|
||||
'_type': 'url_transparent',
|
||||
@@ -153,19 +177,19 @@ class NJoyIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# httpVideo, different content id
|
||||
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
|
||||
'md5': '417660fffa90e6df2fda19f1b40a64d8',
|
||||
'info_dict': {
|
||||
'id': 'dockville882',
|
||||
'id': 'livestream283',
|
||||
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
|
||||
'ext': 'mp4',
|
||||
'title': '"Ich hab noch nie" mit Felix Jaehn',
|
||||
'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
|
||||
'ext': 'mp3',
|
||||
'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
|
||||
'description': 'md5:681698f527b8601e511e7b79edde7d2c',
|
||||
'uploader': 'njoy',
|
||||
'upload_date': '20150822',
|
||||
'duration': 211,
|
||||
'upload_date': '20210830',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -175,18 +199,25 @@ class NJoyIE(NDRBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
def _extract_embed(self, webpage, display_id, url=None):
|
||||
# find tell-tale URL with the actual ID, or ...
|
||||
video_id = self._search_regex(
|
||||
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
|
||||
description = self._search_regex(
|
||||
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
(r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
|
||||
r'<iframe[^>]+id="pp_([\da-z]+)"', ),
|
||||
webpage, 'NDR id', default=None)
|
||||
|
||||
description = (
|
||||
self._html_search_meta('description', webpage)
|
||||
or self._search_regex(
|
||||
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
||||
webpage, 'description', fatal=False))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'NDREmbedBase',
|
||||
'url': 'ndr:%s' % video_id,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'title': display_id.replace('-', ' ').strip(),
|
||||
}
|
||||
|
||||
|
||||
@@ -291,7 +322,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
|
||||
class NDREmbedIE(NDREmbedBaseIE):
|
||||
IE_NAME = 'ndr:embed'
|
||||
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||
_VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
||||
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
||||
@@ -304,6 +335,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
'upload_date': '20150907',
|
||||
'duration': 132,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
|
||||
'md5': '002085c44bae38802d94ae5802a36e78',
|
||||
@@ -319,6 +351,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/info/audio51535-player.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
@@ -328,7 +361,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'is_live': False,
|
||||
'uploader': 'ndrinfo',
|
||||
'upload_date': '20140729',
|
||||
'upload_date': '20210915',
|
||||
'duration': 884,
|
||||
},
|
||||
'params': {
|
||||
@@ -349,15 +382,17 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# httpVideoLive
|
||||
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
|
||||
'info_dict': {
|
||||
'id': 'livestream217',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'upload_date': '20150910',
|
||||
'upload_date': '20210409',
|
||||
'uploader': 'ndrtv',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -395,9 +430,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
|
||||
'is_live': False,
|
||||
'upload_date': '20150807',
|
||||
'upload_date': '20200826',
|
||||
'duration': 1011,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# httpAudio
|
||||
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
|
||||
@@ -414,6 +450,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# httpAudioLive, no explicit ext
|
||||
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
|
||||
@@ -423,7 +460,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'uploader': 'njoy',
|
||||
'upload_date': '20150810',
|
||||
'upload_date': '20210830',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@@ -1,20 +1,32 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from hashlib import md5
|
||||
from base64 import b64encode
|
||||
from binascii import hexlify
|
||||
from datetime import datetime
|
||||
from hashlib import md5
|
||||
from random import randint
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_encrypt, pkcs7_padding
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
compat_itertools_count,
|
||||
)
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,32 +47,106 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
result = b64encode(m.digest()).decode('ascii')
|
||||
return result.replace('/', '_').replace('+', '-')
|
||||
|
||||
@classmethod
|
||||
def make_player_api_request_data_and_headers(cls, song_id, bitrate):
|
||||
KEY = b'e82ckenh8dichen8'
|
||||
URL = '/api/song/enhance/player/url'
|
||||
now = int(time.time() * 1000)
|
||||
rand = randint(0, 1000)
|
||||
cookie = {
|
||||
'osver': None,
|
||||
'deviceId': None,
|
||||
'appver': '8.0.0',
|
||||
'versioncode': '140',
|
||||
'mobilename': None,
|
||||
'buildver': '1623435496',
|
||||
'resolution': '1920x1080',
|
||||
'__csrf': '',
|
||||
'os': 'pc',
|
||||
'channel': None,
|
||||
'requestId': '{0}_{1:04}'.format(now, rand),
|
||||
}
|
||||
request_text = json.dumps(
|
||||
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
|
||||
separators=(',', ':'))
|
||||
message = 'nobody{0}use{1}md5forencrypt'.format(
|
||||
URL, request_text).encode('latin1')
|
||||
msg_digest = md5(message).hexdigest()
|
||||
|
||||
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
|
||||
URL, request_text, msg_digest)
|
||||
data = pkcs7_padding(bytes_to_intlist(data))
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
|
||||
encrypted_params = hexlify(encrypted).decode('ascii').upper()
|
||||
|
||||
cookie = '; '.join(
|
||||
['{0}={1}'.format(k, v if v is not None else 'undefined')
|
||||
for [k, v] in cookie.items()])
|
||||
|
||||
headers = {
|
||||
'User-Agent': std_headers['User-Agent'],
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': 'https://music.163.com',
|
||||
'Cookie': cookie,
|
||||
}
|
||||
return ('params={0}'.format(encrypted_params), headers)
|
||||
|
||||
def _call_player_api(self, song_id, bitrate):
|
||||
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
|
||||
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
|
||||
try:
|
||||
msg = 'empty result'
|
||||
result = self._download_json(
|
||||
url, song_id, data=data.encode('ascii'), headers=headers)
|
||||
if result:
|
||||
return result
|
||||
except ExtractorError as e:
|
||||
if type(e.cause) in (ValueError, TypeError):
|
||||
# JSON load failure
|
||||
raise
|
||||
except Exception as e:
|
||||
msg = error_to_compat_str(e)
|
||||
self.report_warning('%s API call (%s) failed: %s' % (
|
||||
song_id, bitrate, msg))
|
||||
return {}
|
||||
|
||||
def extract_formats(self, info):
|
||||
err = 0
|
||||
formats = []
|
||||
song_id = info['id']
|
||||
for song_format in self._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
continue
|
||||
song_file_path = '/%s/%s.%s' % (
|
||||
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
|
||||
|
||||
# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
|
||||
# from NetEase's CDN provider that can be used if m5.music.126.net does not
|
||||
# work, especially for users outside of Mainland China
|
||||
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
|
||||
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
|
||||
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
|
||||
song_url = host + song_file_path
|
||||
bitrate = int_or_none(details.get('bitrate')) or 999000
|
||||
data = self._call_player_api(song_id, bitrate)
|
||||
for song in try_get(data, lambda x: x['data'], list) or []:
|
||||
song_url = try_get(song, lambda x: x['url'])
|
||||
if not song_url:
|
||||
continue
|
||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'ext': details.get('extension'),
|
||||
'abr': float_or_none(details.get('bitrate'), scale=1000),
|
||||
'abr': float_or_none(song.get('br'), scale=1000),
|
||||
'format_id': song_format,
|
||||
'filesize': details.get('size'),
|
||||
'asr': details.get('sr')
|
||||
'filesize': int_or_none(song.get('size')),
|
||||
'asr': int_or_none(details.get('sr')),
|
||||
})
|
||||
break
|
||||
elif err == 0:
|
||||
err = try_get(song, lambda x: x['code'], int)
|
||||
|
||||
if not formats:
|
||||
msg = 'No media links found'
|
||||
if err != 0 and (err < 200 or err >= 400):
|
||||
raise ExtractorError(
|
||||
'%s (site code %d)' % (msg, err, ), expected=True)
|
||||
else:
|
||||
self.raise_geo_restricted(
|
||||
msg + ': probably this video is not available from your location due to geo restriction.',
|
||||
countries=['CN'])
|
||||
|
||||
return formats
|
||||
|
||||
@classmethod
|
||||
@@ -76,33 +162,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:song'
|
||||
IE_DESC = '网易云音乐'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/song?id=32102397',
|
||||
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
|
||||
'md5': '3e909614ce09b1ccef4a3eb205441190',
|
||||
'info_dict': {
|
||||
'id': '32102397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bad Blood (feat. Kendrick Lamar)',
|
||||
'title': 'Bad Blood',
|
||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||
'upload_date': '20150517',
|
||||
'timestamp': 1431878400,
|
||||
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
||||
'upload_date': '20150516',
|
||||
'timestamp': 1431792000,
|
||||
'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics translation.',
|
||||
'url': 'http://music.163.com/#/song?id=29822014',
|
||||
'info_dict': {
|
||||
'id': '29822014',
|
||||
'ext': 'mp3',
|
||||
'title': '听见下雨的声音',
|
||||
'creator': '周杰伦',
|
||||
'upload_date': '20141225',
|
||||
'timestamp': 1419523200,
|
||||
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
@@ -112,9 +184,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'title': 'Opus 28',
|
||||
'creator': 'Dustin O\'Halloran',
|
||||
'upload_date': '20080211',
|
||||
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
|
||||
'timestamp': 1202745600,
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'Has translated name.',
|
||||
'url': 'http://music.163.com/#/song?id=22735043',
|
||||
@@ -128,7 +200,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'timestamp': 1264608000,
|
||||
'alt_title': '说出愿望吧(Genie)',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||
'md5': '95826c73ea50b1c288b22180ec9e754d',
|
||||
'info_dict': {
|
||||
'id': '95670',
|
||||
'ext': 'mp3',
|
||||
'title': '国际歌',
|
||||
'creator': '马备',
|
||||
'upload_date': '19911130',
|
||||
'timestamp': 691516800,
|
||||
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
||||
},
|
||||
}]
|
||||
|
||||
def _process_lyrics(self, lyrics_info):
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -7,7 +8,7 @@ from ..utils import urljoin
|
||||
|
||||
|
||||
class NhkBaseIE(InfoExtractor):
|
||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
|
||||
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
|
||||
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
|
||||
_TYPE_REGEX = r'/(?P<type>video|audio)/'
|
||||
|
||||
@@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor):
|
||||
def _extract_episode_info(self, url, episode=None):
|
||||
fetch_episode = episode is None
|
||||
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
||||
if episode_id.isdigit():
|
||||
if len(episode_id) == 7:
|
||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||
|
||||
is_video = m_type == 'video'
|
||||
@@ -84,7 +85,8 @@ class NhkBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class NhkVodIE(NhkBaseIE):
|
||||
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
|
||||
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
@@ -124,6 +126,19 @@ class NhkVodIE(NhkBaseIE):
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video, alphabetic character in ID #29670
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 'qfjay6cg',
|
||||
'ext': 'mp4',
|
||||
'title': 'DESIGN TALKS plus - Fishermen’s Finery',
|
||||
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
|
||||
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
|
||||
'upload_date': '20210615',
|
||||
'timestamp': 1623722008,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -2,25 +2,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
PostProcessingError,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
@@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||
'md5': 'a5bad06f1347452102953f323c69da34s',
|
||||
'info_dict': {
|
||||
'id': 'sm22312215',
|
||||
'ext': 'mp4',
|
||||
@@ -157,11 +160,34 @@ class NiconicoIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DMC video with heartbeat
|
||||
'url': 'https://www.nicovideo.jp/watch/sm34815188',
|
||||
'md5': '9360c6e1f1519d7759e2fe8e1326ae83',
|
||||
'info_dict': {
|
||||
'id': 'sm34815188',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:aee93e9f3366db72f902f6cd5d389cb7',
|
||||
'description': 'md5:7b9149fc7a00ab053cafaf5c19662704',
|
||||
'thumbnail': r're:https?://.*',
|
||||
'uploader': 'md5:2762e18fa74dbb40aa1ad27c6291ee32',
|
||||
'uploader_id': '67449889',
|
||||
'upload_date': '20190322',
|
||||
'timestamp': int, # timestamp is unstable
|
||||
'duration': 1082.0,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0'
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -191,37 +217,89 @@ class NiconicoIE(InfoExtractor):
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
return login_ok
|
||||
|
||||
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||
def yesno(boolean):
|
||||
return 'yes' if boolean else 'no'
|
||||
def _get_heartbeat_info(self, info_dict):
|
||||
|
||||
session_api_data = api_data['video']['dmcInfo']['session_api']
|
||||
session_api_endpoint = session_api_data['urls'][0]
|
||||
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
||||
|
||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||
api_data = (
|
||||
info_dict.get('_api_data')
|
||||
or self._parse_json(
|
||||
self._html_search_regex(
|
||||
'data-api-data="([^"]+)"',
|
||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||
'API data', default='{}'),
|
||||
video_id))
|
||||
|
||||
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||
|
||||
def ping():
|
||||
status = try_get(
|
||||
self._download_json(
|
||||
'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
|
||||
query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
|
||||
note='Acquiring permission for downloading video',
|
||||
headers=self._API_HEADERS),
|
||||
lambda x: x['meta']['status'])
|
||||
if status != 200:
|
||||
self.report_warning('Failed to acquire permission for playing video. The video may not download.')
|
||||
|
||||
yesno = lambda x: 'yes' if x else 'no'
|
||||
|
||||
# m3u8 (encryption)
|
||||
if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
|
||||
protocol = 'm3u8'
|
||||
encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
|
||||
session_api_http_parameters = {
|
||||
'parameters': {
|
||||
'hls_parameters': {
|
||||
'encryption': {
|
||||
encryption: {
|
||||
'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
|
||||
'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
|
||||
}
|
||||
},
|
||||
'transfer_preset': '',
|
||||
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||
'segment_duration': 6000,
|
||||
}
|
||||
}
|
||||
}
|
||||
# http
|
||||
else:
|
||||
protocol = 'http'
|
||||
session_api_http_parameters = {
|
||||
'parameters': {
|
||||
'http_output_download_parameters': {
|
||||
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
session_response = self._download_json(
|
||||
session_api_endpoint['url'], video_id,
|
||||
query={'_format': 'json'},
|
||||
headers={'Content-Type': 'application/json'},
|
||||
note='Downloading JSON metadata for %s' % format_id,
|
||||
note='Downloading JSON metadata for %s' % info_dict['format_id'],
|
||||
data=json.dumps({
|
||||
'session': {
|
||||
'client_info': {
|
||||
'player_id': session_api_data['player_id'],
|
||||
'player_id': session_api_data.get('playerId'),
|
||||
},
|
||||
'content_auth': {
|
||||
'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
|
||||
'content_key_timeout': session_api_data['content_key_timeout'],
|
||||
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||
'service_id': 'nicovideo',
|
||||
'service_user_id': session_api_data['service_user_id']
|
||||
'service_user_id': session_api_data.get('serviceUserId')
|
||||
},
|
||||
'content_id': session_api_data['content_id'],
|
||||
'content_id': session_api_data.get('contentId'),
|
||||
'content_src_id_sets': [{
|
||||
'content_src_ids': [{
|
||||
'src_id_to_mux': {
|
||||
'audio_src_ids': [audio_quality['id']],
|
||||
'video_src_ids': [video_quality['id']],
|
||||
'audio_src_ids': [audio_src_id],
|
||||
'video_src_ids': [video_src_id],
|
||||
}
|
||||
}]
|
||||
}],
|
||||
@@ -229,52 +307,81 @@ class NiconicoIE(InfoExtractor):
|
||||
'content_uri': '',
|
||||
'keep_method': {
|
||||
'heartbeat': {
|
||||
'lifetime': session_api_data['heartbeat_lifetime']
|
||||
'lifetime': session_api_data.get('heartbeatLifetime')
|
||||
}
|
||||
},
|
||||
'priority': session_api_data['priority'],
|
||||
'priority': session_api_data.get('priority'),
|
||||
'protocol': {
|
||||
'name': 'http',
|
||||
'parameters': {
|
||||
'http_parameters': {
|
||||
'parameters': {
|
||||
'http_output_download_parameters': {
|
||||
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
||||
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
||||
}
|
||||
}
|
||||
}
|
||||
'http_parameters': session_api_http_parameters
|
||||
}
|
||||
},
|
||||
'recipe_id': session_api_data['recipe_id'],
|
||||
'recipe_id': session_api_data.get('recipeId'),
|
||||
'session_operation_auth': {
|
||||
'session_operation_auth_by_signature': {
|
||||
'signature': session_api_data['signature'],
|
||||
'token': session_api_data['token'],
|
||||
'signature': session_api_data.get('signature'),
|
||||
'token': session_api_data.get('token'),
|
||||
}
|
||||
},
|
||||
'timing_constraint': 'unlimited'
|
||||
}
|
||||
}).encode())
|
||||
|
||||
resolution = video_quality.get('resolution', {})
|
||||
info_dict['url'] = session_response['data']['session']['content_uri']
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# get heartbeat info
|
||||
heartbeat_info_dict = {
|
||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||
'data': json.dumps(session_response['data']),
|
||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
||||
'ping': ping
|
||||
}
|
||||
|
||||
return info_dict, heartbeat_info_dict
|
||||
|
||||
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||
def parse_format_id(id_code):
|
||||
mobj = re.match(r'''(?x)
|
||||
(?:archive_)?
|
||||
(?:(?P<codec>[^_]+)_)?
|
||||
(?:(?P<br>[\d]+)kbps_)?
|
||||
(?:(?P<res>[\d+]+)p_)?
|
||||
''', '%s_' % id_code)
|
||||
return mobj.groupdict() if mobj else {}
|
||||
|
||||
protocol = 'niconico_dmc'
|
||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||
vdict = parse_format_id(video_quality['id'])
|
||||
adict = parse_format_id(audio_quality['id'])
|
||||
resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
|
||||
vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
|
||||
|
||||
return {
|
||||
'url': session_response['data']['session']['content_uri'],
|
||||
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
||||
'format_id': format_id,
|
||||
'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
|
||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000),
|
||||
'vbr': float_or_none(video_quality.get('bitrate'), 1000),
|
||||
'height': resolution.get('height'),
|
||||
'width': resolution.get('width'),
|
||||
'vcodec': vdict.get('codec'),
|
||||
'acodec': adict.get('codec'),
|
||||
'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
|
||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
||||
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
||||
'width': int_or_none(resolution.get('width')),
|
||||
'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
|
||||
'protocol': protocol,
|
||||
'http_headers': {
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Get video webpage. We are not actually interested in it for normal
|
||||
# cases, but need the cookies in order to be able to download the
|
||||
# info webpage
|
||||
# Get video webpage for API data.
|
||||
webpage, handle = self._download_webpage_handle(
|
||||
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
if video_id.startswith('so'):
|
||||
@@ -284,86 +391,136 @@ class NiconicoIE(InfoExtractor):
|
||||
'data-api-data="([^"]+)"', webpage,
|
||||
'API data', default='{}'), video_id)
|
||||
|
||||
def _format_id_from_url(video_url):
|
||||
return 'economy' if video_real_url.endswith('low') else 'normal'
|
||||
def get_video_info_web(items):
|
||||
return dict_get(api_data['video'], items)
|
||||
|
||||
try:
|
||||
video_real_url = api_data['video']['smileInfo']['url']
|
||||
except KeyError: # Flash videos
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
# Get video info
|
||||
video_info_xml = self._download_xml(
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||
video_id, note='Downloading video info page')
|
||||
|
||||
flv_info = compat_parse_qs(flv_info_webpage)
|
||||
if 'url' not in flv_info:
|
||||
if 'deleted' in flv_info:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
elif 'closed' in flv_info:
|
||||
raise ExtractorError('Niconico videos now require logging in',
|
||||
expected=True)
|
||||
elif 'error' in flv_info:
|
||||
raise ExtractorError('%s reports error: %s' % (
|
||||
self.IE_NAME, flv_info['error'][0]), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find video URL')
|
||||
def get_video_info_xml(items):
|
||||
if not isinstance(items, list):
|
||||
items = [items]
|
||||
for item in items:
|
||||
ret = xpath_text(video_info_xml, './/' + item)
|
||||
if ret:
|
||||
return ret
|
||||
|
||||
video_info_xml = self._download_xml(
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||
video_id, note='Downloading video info page')
|
||||
if get_video_info_xml('error'):
|
||||
error_code = get_video_info_xml('code')
|
||||
|
||||
def get_video_info(items):
|
||||
if not isinstance(items, list):
|
||||
items = [items]
|
||||
for item in items:
|
||||
ret = xpath_text(video_info_xml, './/' + item)
|
||||
if ret:
|
||||
return ret
|
||||
if error_code == 'DELETED':
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
elif error_code == 'NOT_FOUND':
|
||||
raise ExtractorError('The video is not found.',
|
||||
expected=True)
|
||||
elif error_code == 'COMMUNITY':
|
||||
self.to_screen('%s: The video is community members only.' % video_id)
|
||||
else:
|
||||
raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
|
||||
|
||||
video_real_url = flv_info['url'][0]
|
||||
# Start extracting video formats
|
||||
formats = []
|
||||
|
||||
extension = get_video_info('movie_type')
|
||||
if not extension:
|
||||
extension = determine_ext(video_real_url)
|
||||
# Get HTML5 videos info
|
||||
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
|
||||
if not quality_info:
|
||||
raise ExtractorError('The video can\'t be downloaded', expected=True)
|
||||
|
||||
formats = [{
|
||||
'url': video_real_url,
|
||||
'ext': extension,
|
||||
'format_id': _format_id_from_url(video_real_url),
|
||||
}]
|
||||
else:
|
||||
formats = []
|
||||
for audio_quality in quality_info.get('audios') or {}:
|
||||
for video_quality in quality_info.get('videos') or {}:
|
||||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||
continue
|
||||
formats.append(self._extract_format_for_quality(
|
||||
api_data, video_id, audio_quality, video_quality))
|
||||
|
||||
dmc_info = api_data['video'].get('dmcInfo')
|
||||
if dmc_info: # "New" HTML5 videos
|
||||
quality_info = dmc_info['quality']
|
||||
for audio_quality in quality_info['audios']:
|
||||
for video_quality in quality_info['videos']:
|
||||
if not audio_quality['available'] or not video_quality['available']:
|
||||
continue
|
||||
formats.append(self._extract_format_for_quality(
|
||||
api_data, video_id, audio_quality, video_quality))
|
||||
# Get flv/swf info
|
||||
timestamp = None
|
||||
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
||||
if video_real_url:
|
||||
is_economy = video_real_url.endswith('low')
|
||||
|
||||
self._sort_formats(formats)
|
||||
else: # "Old" HTML5 videos
|
||||
formats = [{
|
||||
if is_economy:
|
||||
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
|
||||
|
||||
# Invoking ffprobe to determine resolution
|
||||
pp = FFmpegPostProcessor(self._downloader)
|
||||
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
|
||||
|
||||
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
|
||||
|
||||
try:
|
||||
metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
|
||||
except PostProcessingError as err:
|
||||
raise ExtractorError(err.msg, expected=True)
|
||||
|
||||
v_stream = a_stream = {}
|
||||
|
||||
# Some complex swf files doesn't have video stream (e.g. nm4809023)
|
||||
for stream in metadata['streams']:
|
||||
if stream['codec_type'] == 'video':
|
||||
v_stream = stream
|
||||
elif stream['codec_type'] == 'audio':
|
||||
a_stream = stream
|
||||
|
||||
# Community restricted videos seem to have issues with the thumb API not returning anything at all
|
||||
filesize = int(
|
||||
(get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
|
||||
or metadata['format']['size']
|
||||
)
|
||||
extension = (
|
||||
get_video_info_xml('movie_type')
|
||||
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
|
||||
)
|
||||
|
||||
# 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
|
||||
timestamp = (
|
||||
parse_iso8601(get_video_info_web('first_retrieve'))
|
||||
or unified_timestamp(get_video_info_web('postedDateTime'))
|
||||
)
|
||||
metadata_timestamp = (
|
||||
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
|
||||
or timestamp if extension != 'mp4' else 0
|
||||
)
|
||||
|
||||
# According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
|
||||
smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
|
||||
|
||||
is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
|
||||
|
||||
# If movie file size is unstable, old server movie is not source movie.
|
||||
if filesize > 1:
|
||||
formats.append({
|
||||
'url': video_real_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': _format_id_from_url(video_real_url),
|
||||
}]
|
||||
'format_id': 'smile' if not is_economy else 'smile_low',
|
||||
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
|
||||
'ext': extension,
|
||||
'container': extension,
|
||||
'vcodec': v_stream.get('codec_name'),
|
||||
'acodec': a_stream.get('codec_name'),
|
||||
# Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
|
||||
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
|
||||
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
|
||||
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
|
||||
'height': int_or_none(v_stream.get('height')),
|
||||
'width': int_or_none(v_stream.get('width')),
|
||||
'source_preference': 5 if not is_economy else -2,
|
||||
'quality': 5 if is_source and not is_economy else None,
|
||||
'filesize': filesize
|
||||
})
|
||||
|
||||
def get_video_info(items):
|
||||
return dict_get(api_data['video'], items)
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Start extracting information
|
||||
title = get_video_info('title')
|
||||
if not title:
|
||||
title = self._og_search_title(webpage, default=None)
|
||||
if not title:
|
||||
title = self._html_search_regex(
|
||||
title = (
|
||||
get_video_info_xml('title') # prefer to get the untranslated original title
|
||||
or get_video_info_web(['originalTitle', 'title'])
|
||||
or self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(
|
||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||
webpage, 'video title')
|
||||
webpage, 'video title'))
|
||||
|
||||
watch_api_data_string = self._html_search_regex(
|
||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||
@@ -372,14 +529,15 @@ class NiconicoIE(InfoExtractor):
|
||||
video_detail = watch_api_data.get('videoDetail', {})
|
||||
|
||||
thumbnail = (
|
||||
get_video_info(['thumbnail_url', 'thumbnailURL'])
|
||||
self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
|
||||
or dict_get( # choose highest from 720p to 240p
|
||||
get_video_info_web('thumbnail'),
|
||||
['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
|
||||
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
||||
or video_detail.get('thumbnail'))
|
||||
|
||||
description = get_video_info('description')
|
||||
description = get_video_info_web('description')
|
||||
|
||||
timestamp = (parse_iso8601(get_video_info('first_retrieve'))
|
||||
or unified_timestamp(get_video_info('postedDateTime')))
|
||||
if not timestamp:
|
||||
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
||||
if match:
|
||||
@@ -388,19 +546,25 @@ class NiconicoIE(InfoExtractor):
|
||||
timestamp = parse_iso8601(
|
||||
video_detail['postedAt'].replace('/', '-'),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||
timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
|
||||
|
||||
view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
|
||||
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
||||
if not view_count:
|
||||
match = self._html_search_regex(
|
||||
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
||||
webpage, 'view count', default=None)
|
||||
if match:
|
||||
view_count = int_or_none(match.replace(',', ''))
|
||||
view_count = view_count or video_detail.get('viewCount')
|
||||
view_count = (
|
||||
view_count
|
||||
or video_detail.get('viewCount')
|
||||
or try_get(api_data, lambda x: x['video']['count']['view']))
|
||||
|
||||
comment_count = (
|
||||
int_or_none(get_video_info_web('comment_num'))
|
||||
or video_detail.get('commentCount')
|
||||
or try_get(api_data, lambda x: x['video']['count']['comment']))
|
||||
|
||||
comment_count = (int_or_none(get_video_info('comment_num'))
|
||||
or video_detail.get('commentCount')
|
||||
or try_get(api_data, lambda x: x['thread']['commentCount']))
|
||||
if not comment_count:
|
||||
match = self._html_search_regex(
|
||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||
@@ -409,22 +573,41 @@ class NiconicoIE(InfoExtractor):
|
||||
comment_count = int_or_none(match.replace(',', ''))
|
||||
|
||||
duration = (parse_duration(
|
||||
get_video_info('length')
|
||||
get_video_info_web('length')
|
||||
or self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', default=None))
|
||||
or video_detail.get('length')
|
||||
or get_video_info('duration'))
|
||||
or get_video_info_web('duration'))
|
||||
|
||||
webpage_url = get_video_info('watch_url') or url
|
||||
webpage_url = get_video_info_web('watch_url') or url
|
||||
|
||||
# for channel movie and community movie
|
||||
channel_id = try_get(
|
||||
api_data,
|
||||
(lambda x: x['channel']['globalId'],
|
||||
lambda x: x['community']['globalId']))
|
||||
channel = try_get(
|
||||
api_data,
|
||||
(lambda x: x['channel']['name'],
|
||||
lambda x: x['community']['name']))
|
||||
|
||||
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
||||
# in the JSON, which will cause None to be returned instead of {}.
|
||||
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
||||
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
|
||||
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
|
||||
uploader_id = str_or_none(
|
||||
get_video_info_web(['ch_id', 'user_id'])
|
||||
or owner.get('id')
|
||||
or channel_id
|
||||
)
|
||||
uploader = (
|
||||
get_video_info_web(['ch_name', 'user_nickname'])
|
||||
or owner.get('nickname')
|
||||
or channel
|
||||
)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'_api_data': api_data,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
@@ -432,6 +615,8 @@ class NiconicoIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'duration': duration,
|
||||
@@ -440,7 +625,7 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
|
||||
class NiconicoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||
@@ -456,60 +641,185 @@ class NiconicoPlaylistIE(InfoExtractor):
|
||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _call_api(self, list_id, resource, query):
|
||||
return self._download_json(
|
||||
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||
'Downloading %s JSON metatdata' % resource, query=query,
|
||||
headers={'X-Frontend-Id': 6})['data']['mylist']
|
||||
|
||||
def _parse_owner(self, item):
|
||||
owner = item.get('owner') or {}
|
||||
if owner:
|
||||
return {
|
||||
'uploader': owner.get('name'),
|
||||
'uploader_id': owner.get('id'),
|
||||
}
|
||||
return {}
|
||||
|
||||
def _fetch_page(self, list_id, page):
|
||||
page += 1
|
||||
items = self._call_api(list_id, 'page %d' % page, {
|
||||
'page': page,
|
||||
'pageSize': self._PAGE_SIZE,
|
||||
})['items']
|
||||
for item in items:
|
||||
video = item.get('video') or {}
|
||||
video_id = video.get('id')
|
||||
if not video_id:
|
||||
continue
|
||||
count = video.get('count') or {}
|
||||
get_count = lambda x: int_or_none(count.get(x))
|
||||
info = {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
'description': video.get('shortDescription'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'view_count': get_count('view'),
|
||||
'comment_count': get_count('comment'),
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
}
|
||||
info.update(self._parse_owner(video))
|
||||
yield info
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
mylist = self._call_api(list_id, 'list', {
|
||||
'pageSize': 1,
|
||||
})
|
||||
entries = InAdvancePagedList(
|
||||
functools.partial(self._fetch_page, list_id),
|
||||
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
||||
self._PAGE_SIZE)
|
||||
result = self.playlist_result(
|
||||
entries, list_id, mylist.get('name'), mylist.get('description'))
|
||||
result.update(self._parse_owner(mylist))
|
||||
return result
|
||||
|
||||
def get_page_data(pagenum, pagesize):
|
||||
return self._download_json(
|
||||
'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||
query={'page': 1 + pagenum, 'pageSize': pagesize},
|
||||
headers=self._API_HEADERS).get('data').get('mylist')
|
||||
|
||||
data = get_page_data(0, 1)
|
||||
title = data.get('name')
|
||||
description = data.get('description')
|
||||
uploader = data.get('owner').get('name')
|
||||
uploader_id = data.get('owner').get('id')
|
||||
|
||||
def pagefunc(pagenum):
|
||||
data = get_page_data(pagenum, 25)
|
||||
return ({
|
||||
'_type': 'url',
|
||||
'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
|
||||
} for item in data.get('items'))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': list_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'entries': OnDemandPagedList(pagefunc, 25),
|
||||
}
|
||||
|
||||
|
||||
class NicovideoSearchBaseIE(InfoExtractor):
|
||||
_MAX_RESULTS = float('inf')
|
||||
|
||||
def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
|
||||
query = query or {}
|
||||
pages = [query['page']] if 'page' in query else itertools.count(1)
|
||||
for page_num in pages:
|
||||
query['page'] = str(page_num)
|
||||
webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
|
||||
results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.+?)(?=["\'])', webpage)
|
||||
for item in results:
|
||||
yield self.url_result('http://www.nicovideo.jp/watch/%s' % item, 'Niconico', item)
|
||||
if not results:
|
||||
break
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
entries = self._entries(self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
|
||||
if n < self._MAX_RESULTS:
|
||||
entries = itertools.islice(entries, 0, n)
|
||||
return self.playlist_result(entries, query, query)
|
||||
|
||||
|
||||
class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
||||
IE_DESC = 'Nico video search'
|
||||
IE_NAME = 'nicovideo:search'
|
||||
_SEARCH_KEY = 'nicosearch'
|
||||
|
||||
def _search_results(self, query):
|
||||
return self._entries(
|
||||
self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
|
||||
|
||||
|
||||
class NicovideoSearchURLIE(NicovideoSearchBaseIE):
|
||||
IE_NAME = '%s_url' % NicovideoSearchIE.IE_NAME
|
||||
IE_DESC = 'Nico video search URLs'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/search/sm9',
|
||||
'info_dict': {
|
||||
'id': 'sm9',
|
||||
'title': 'sm9'
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
}, {
|
||||
'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
|
||||
'info_dict': {
|
||||
'id': 'sm9',
|
||||
'title': 'sm9'
|
||||
},
|
||||
'playlist_count': 31,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = self._match_id(url)
|
||||
return self.playlist_result(self._entries(url, query), query, query)
|
||||
|
||||
|
||||
class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
||||
IE_DESC = 'Nico video search, newest first'
|
||||
IE_NAME = '%s:date' % NicovideoSearchIE.IE_NAME
|
||||
_SEARCH_KEY = 'nicosearchdate'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'nicosearchdateall:a',
|
||||
'info_dict': {
|
||||
'id': 'a',
|
||||
'title': 'a'
|
||||
},
|
||||
'playlist_mincount': 1610,
|
||||
}]
|
||||
|
||||
_START_DATE = datetime.date(2007, 1, 1)
|
||||
_RESULTS_PER_PAGE = 32
|
||||
_MAX_PAGES = 50
|
||||
|
||||
def _entries(self, url, item_id, start_date=None, end_date=None):
|
||||
start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
|
||||
|
||||
# If the last page has a full page of videos, we need to break down the query interval further
|
||||
last_page_len = len(list(self._get_entries_for_date(
|
||||
url, item_id, start_date, end_date, self._MAX_PAGES,
|
||||
note='Checking number of videos from {0} to {1}'.format(start_date, end_date))))
|
||||
if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
|
||||
midpoint = start_date + ((end_date - start_date) // 2)
|
||||
for entry in itertools.chain(
|
||||
iter(self._entries(url, item_id, midpoint, end_date)),
|
||||
iter(self._entries(url, item_id, start_date, midpoint))):
|
||||
yield entry
|
||||
else:
|
||||
self.to_screen('{0}: Downloading results from {1} to {2}'.format(item_id, start_date, end_date))
|
||||
for entry in iter(self._get_entries_for_date(
|
||||
url, item_id, start_date, end_date, note=' Downloading page %(page)s')):
|
||||
yield entry
|
||||
|
||||
def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
|
||||
query = {
|
||||
'start': compat_str(start_date),
|
||||
'end': compat_str(end_date or start_date),
|
||||
'sort': 'f',
|
||||
'order': 'd',
|
||||
}
|
||||
if page_num:
|
||||
query['page'] = compat_str(page_num)
|
||||
|
||||
for entry in iter(super(NicovideoSearchDateIE, self)._entries(url, item_id, query=query, note=note)):
|
||||
yield entry
|
||||
|
||||
|
||||
class NiconicoUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'https://www.nicovideo.jp/user/419948',
|
||||
'info_dict': {
|
||||
'id': '419948',
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}
|
||||
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0'
|
||||
}
|
||||
|
||||
def _entries(self, list_id):
|
||||
total_count = 1
|
||||
count = page_num = 0
|
||||
while count < total_count:
|
||||
json_parsed = self._download_json(
|
||||
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
|
||||
headers=self._API_HEADERS,
|
||||
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||
if not page_num:
|
||||
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||
for entry in json_parsed["data"]["items"]:
|
||||
count += 1
|
||||
yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
@@ -58,10 +58,9 @@ class NRKBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
||||
return self._download_json(
|
||||
urljoin('http://psapi.nrk.no/', path),
|
||||
urljoin('https://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query,
|
||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||
fatal=fatal, query=query)
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
|
@@ -1,71 +1,113 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class NuvidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://m.nuvid.com/video/1310741/',
|
||||
'md5': 'eab207b7ac4fccfb4e23c86201f11277',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nuvid.com/video/6513023/italian-babe',
|
||||
'md5': '772d2f8288f3d3c5c45f7a41761c7844',
|
||||
'info_dict': {
|
||||
'id': '1310741',
|
||||
'id': '6513023',
|
||||
'ext': 'mp4',
|
||||
'title': 'Horny babes show their awesome bodeis and',
|
||||
'duration': 129,
|
||||
'title': 'italian babe',
|
||||
'format_id': '360p',
|
||||
'duration': 321.0,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'thumbnails': list,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://m.nuvid.com/video/6523263',
|
||||
'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52',
|
||||
'info_dict': {
|
||||
'id': '6523263',
|
||||
'ext': 'mp4',
|
||||
'title': 'Slut brunette college student anal dorm',
|
||||
'format_id': '720p',
|
||||
'duration': 421.0,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'thumbnails': list,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://m.nuvid.com/video/6415801/',
|
||||
'md5': '638d5ececb138d5753593f751ae3f697',
|
||||
'info_dict': {
|
||||
'id': '6415801',
|
||||
'ext': 'mp4',
|
||||
'title': 'My best friend wanted to fuck my wife for a long time',
|
||||
'format_id': '720p',
|
||||
'duration': 1882,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'thumbnails': list,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
page_url = 'http://m.nuvid.com/video/%s' % video_id
|
||||
qualities = {
|
||||
'lq': '360p',
|
||||
'hq': '720p',
|
||||
}
|
||||
|
||||
json_url = 'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0'.format(**locals())
|
||||
video_data = self._download_json(
|
||||
json_url, video_id, headers={
|
||||
'Accept': 'application/json, text/javascript, */*; q = 0.01',
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
|
||||
}) or {}
|
||||
|
||||
# nice to have, not required
|
||||
webpage = self._download_webpage(
|
||||
page_url, video_id, 'Downloading video page')
|
||||
# When dwnld_speed exists and has a value larger than the MP4 file's
|
||||
# bitrate, Nuvid returns the MP4 URL
|
||||
# It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
|
||||
self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
|
||||
mp4_webpage = self._download_webpage(
|
||||
page_url, video_id, 'Downloading video page for MP4 format')
|
||||
'http://m.nuvid.com/video/%s' % (video_id, ),
|
||||
video_id, 'Downloading video page', fatal=False) or ''
|
||||
|
||||
title = (
|
||||
try_get(video_data, lambda x: x['title'], compat_str)
|
||||
or self._html_search_regex(
|
||||
(r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''',
|
||||
r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''',
|
||||
r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''),
|
||||
webpage, 'title', group='title')).strip()
|
||||
|
||||
html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
|
||||
video_url = self._html_search_regex(html5_video_re, webpage, video_id)
|
||||
mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
}]
|
||||
if mp4_video_url != video_url:
|
||||
formats.append({
|
||||
'url': mp4_video_url,
|
||||
})
|
||||
'url': source,
|
||||
'format_id': qualities.get(quality),
|
||||
'height': int_or_none(qualities.get(quality)[:-1]),
|
||||
} for quality, source in video_data.get('files').items() if source]
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<span title="([^"]+)">',
|
||||
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
|
||||
r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(video_data.get('duration') or video_data.get('duration_format'))
|
||||
thumbnails = [
|
||||
{
|
||||
'url': thumb_url,
|
||||
} for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
|
||||
{'url': thumb_url, }
|
||||
for thumb_url in (
|
||||
url_or_none(src) for src in re.findall(
|
||||
r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>',
|
||||
webpage))
|
||||
]
|
||||
thumbnail = thumbnails[0]['url'] if thumbnails else None
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
[r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
|
||||
r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': url_or_none(video_data.get('poster')),
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
get_exe_version,
|
||||
is_outdated_version,
|
||||
process_communicate_or_kill,
|
||||
std_headers,
|
||||
)
|
||||
|
||||
@@ -226,7 +227,7 @@ class PhantomJSwrapper(object):
|
||||
self.exe, '--ssl-protocol=any',
|
||||
self._TMP_FILES['script'].name
|
||||
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = p.communicate()
|
||||
out, err = process_communicate_or_kill(p)
|
||||
if p.returncode != 0:
|
||||
raise ExtractorError(
|
||||
'Executing JS failed\n:' + encodeArgument(err))
|
||||
|
@@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src, video_id, f4m_id=format_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -140,6 +143,25 @@ class ORFTVthekIE(InfoExtractor):
|
||||
})
|
||||
|
||||
upload_date = unified_strdate(sd.get('created_date'))
|
||||
|
||||
thumbnails = []
|
||||
preview = sd.get('preview_image_url')
|
||||
if preview:
|
||||
thumbnails.append({
|
||||
'id': 'preview',
|
||||
'url': preview,
|
||||
'preference': 0,
|
||||
})
|
||||
image = sd.get('image_full_url')
|
||||
if not image and len(data_jsb) == 1:
|
||||
image = self._og_search_thumbnail(webpage)
|
||||
if image:
|
||||
thumbnails.append({
|
||||
'id': 'full',
|
||||
'url': image,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
entries.append({
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
@@ -149,7 +171,7 @@ class ORFTVthekIE(InfoExtractor):
|
||||
'description': sd.get('description'),
|
||||
'duration': int_or_none(sd.get('duration_in_seconds')),
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': sd.get('image_full_url'),
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -182,7 +204,7 @@ class ORFRadioIE(InfoExtractor):
|
||||
duration = end - start if end and start else None
|
||||
entries.append({
|
||||
'id': loop_stream_id.replace('.mp3', ''),
|
||||
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
|
||||
'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
|
||||
'title': title,
|
||||
'description': clean_html(data.get('subtitle')),
|
||||
'duration': duration,
|
||||
|
148
youtube_dl/extractor/palcomp3.py
Normal file
148
youtube_dl/extractor/palcomp3.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class PalcoMP3BaseIE(InfoExtractor):
|
||||
_GQL_QUERY_TMPL = '''{
|
||||
artist(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}'''
|
||||
_ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
|
||||
%s
|
||||
}'''
|
||||
_MUSIC_FIELDS = '''duration
|
||||
hls
|
||||
mp3File
|
||||
musicID
|
||||
plays
|
||||
title'''
|
||||
|
||||
def _call_api(self, artist_slug, artist_fields):
|
||||
return self._download_json(
|
||||
'https://www.palcomp3.com.br/graphql/', artist_slug, query={
|
||||
'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
|
||||
})['data']
|
||||
|
||||
def _parse_music(self, music):
|
||||
music_id = compat_str(music['musicID'])
|
||||
title = music['title']
|
||||
|
||||
formats = []
|
||||
hls_url = music.get('hls')
|
||||
if hls_url:
|
||||
formats.append({
|
||||
'url': hls_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
mp3_file = music.get('mp3File')
|
||||
if mp3_file:
|
||||
formats.append({
|
||||
'url': mp3_file,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(music.get('duration')),
|
||||
'view_count': int_or_none(music.get('plays')),
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
|
||||
artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
|
||||
music = self._call_api(artist_slug, artist_fields)['artist']['music']
|
||||
return self._parse_music(music)
|
||||
|
||||
|
||||
class PalcoMP3IE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:song'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
|
||||
'md5': '99fd6405b2d8fd589670f6db1ba3b358',
|
||||
'info_dict': {
|
||||
'id': '3162927',
|
||||
'ext': 'mp3',
|
||||
'title': 'Nossas Composições - CUIDA BEM DELA',
|
||||
'duration': 210,
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
|
||||
|
||||
|
||||
class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:artist'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com.br/condedoforro/',
|
||||
'info_dict': {
|
||||
'id': '358396',
|
||||
'title': 'Conde do Forró',
|
||||
},
|
||||
'playlist_mincount': 188,
|
||||
}]
|
||||
_ARTIST_FIELDS_TMPL = '''artistID
|
||||
musics {
|
||||
nodes {
|
||||
%s
|
||||
}
|
||||
}
|
||||
name'''
|
||||
|
||||
@ classmethod
|
||||
def suitable(cls, url):
|
||||
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_slug = self._match_id(url)
|
||||
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||
|
||||
def entries():
|
||||
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
||||
yield self._parse_music(music)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), str_or_none(artist.get('artistID')), artist.get('name'))
|
||||
|
||||
|
||||
class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
||||
IE_NAME = 'PalcoMP3:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '_pD1nR2qqPg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||
'upload_date': '20161107',
|
||||
'uploader_id': 'maiaramaraisaoficial',
|
||||
'uploader': 'Maiara e Maraisa',
|
||||
}
|
||||
}]
|
||||
_MUSIC_FIELDS = 'youtubeID'
|
||||
|
||||
def _parse_music(self, music):
|
||||
youtube_id = music['youtubeID']
|
||||
return self.url_result(youtube_id, 'Youtube', youtube_id)
|
193
youtube_dl/extractor/peekvids.py
Normal file
193
youtube_dl/extractor/peekvids.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PeekVidsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?peekvids\.com/
|
||||
(?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
|
||||
(?P<id>[^/?&#]*)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
|
||||
'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
|
||||
'info_dict': {
|
||||
'id': '1262717',
|
||||
'display_id': 'BSyLMbN0YCd',
|
||||
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
|
||||
'timestamp': 1642579329,
|
||||
'upload_date': '20220119',
|
||||
'duration': 416,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'uploader': 'SEXYhub.com',
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'www.peekvids.com'
|
||||
|
||||
def _get_detail(self, html):
|
||||
return get_element_by_class('detail-video-block', html)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, expected_status=429)
|
||||
if '>Rate Limit Exceeded' in webpage:
|
||||
raise ExtractorError(
|
||||
'[%s] %s: %s' % (self.IE_NAME, video_id, 'You are suspected as a bot. Wait, or pass the captcha test on the site and provide --cookies.'),
|
||||
expected=True)
|
||||
|
||||
title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
display_id = video_id
|
||||
video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
|
||||
srcs = self._download_json(
|
||||
'https://%s/v-alt/%s' % (self._DOMAIN, video_id), video_id,
|
||||
note='Downloading list of source files')
|
||||
formats = [{
|
||||
'url': f_url,
|
||||
'format_id': f_id,
|
||||
'height': int_or_none(f_id),
|
||||
} for f_url, f_id in (
|
||||
(url_or_none(f_v), f_match.group(1))
|
||||
for f_v, f_match in (
|
||||
(v, re.match(r'^data-src(\d{3,})$', k))
|
||||
for k, v in srcs.items() if v) if f_match)
|
||||
if f_url
|
||||
]
|
||||
if not formats:
|
||||
formats = [{'url': url} for url in srcs.values()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
|
||||
info.pop('url', None)
|
||||
# may not have found the thumbnail if it was in a list in the ld+json
|
||||
info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
|
||||
detail = self._get_detail(webpage) or ''
|
||||
info['description'] = self._html_search_regex(
|
||||
r'(?s)(.+?)(?:%s\s*<|<ul\b)' % (re.escape(info.get('description', '')), ),
|
||||
detail, 'description', default=None) or None
|
||||
info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
|
||||
|
||||
def cat_tags(name, html):
|
||||
l = self._html_search_regex(
|
||||
r'(?s)<span\b[^>]*>\s*%s\s*:\s*</span>(.+?)</li>' % (re.escape(name), ),
|
||||
html, name, default='')
|
||||
return [x for x in re.split(r'\s+', l) if x]
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'categories': cat_tags('Categories', detail),
|
||||
'tags': cat_tags('Tags', detail),
|
||||
'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
|
||||
}, info)
|
||||
|
||||
|
||||
class PlayVidsIE(PeekVidsIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
|
||||
'md5': '2f12e50213dd65f142175da633c4564c',
|
||||
'info_dict': {
|
||||
'id': '1978030',
|
||||
'display_id': 'U3pBrYhsjXM',
|
||||
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
|
||||
'timestamp': 1640435839,
|
||||
'upload_date': '20211225',
|
||||
'duration': 416,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'uploader': 'SEXYhub.com',
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
|
||||
'md5': 'e783986e596cafbf46411a174ab42ba6',
|
||||
'info_dict': {
|
||||
'id': '762385',
|
||||
'display_id': 'bKmGLe3IwjZ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
|
||||
'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
|
||||
'timestamp': 1516958544,
|
||||
'upload_date': '20180126',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 480,
|
||||
'uploader': 'Brazzers',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/v/47iUho33toY',
|
||||
'md5': 'b056b5049d34b648c1e86497cf4febce',
|
||||
'info_dict': {
|
||||
'id': '700621',
|
||||
'display_id': '47iUho33toY',
|
||||
'ext': 'mp4',
|
||||
'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
|
||||
'description': None,
|
||||
'timestamp': 1507052209,
|
||||
'upload_date': '20171003',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 332,
|
||||
'uploader': 'Cacerenele',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
|
||||
'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
|
||||
'info_dict': {
|
||||
'id': '1523518',
|
||||
'display_id': 'z3_7iwWCmqt',
|
||||
'ext': 'mp4',
|
||||
'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
|
||||
'description': None,
|
||||
'timestamp': 1607470323,
|
||||
'upload_date': '20201208',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 593,
|
||||
'uploader': 'yorours',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'www.playvids.com'
|
||||
|
||||
def _get_detail(self, html):
|
||||
return get_element_by_class('detail-block', html)
|
@@ -569,15 +569,15 @@ class PeerTubeIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
full_description = self._call_api(
|
||||
host, video_id, 'description', note='Downloading description JSON',
|
||||
fatal=False)
|
||||
description = video.get('description')
|
||||
if len(description) >= 250:
|
||||
# description is shortened
|
||||
full_description = self._call_api(
|
||||
host, video_id, 'description', note='Downloading description JSON',
|
||||
fatal=False)
|
||||
|
||||
description = None
|
||||
if isinstance(full_description, dict):
|
||||
description = str_or_none(full_description.get('description'))
|
||||
if not description:
|
||||
description = video.get('description')
|
||||
if isinstance(full_description, dict):
|
||||
description = str_or_none(full_description.get('description')) or description
|
||||
|
||||
subtitles = self.extract_subtitles(host, video_id)
|
||||
|
||||
|
@@ -12,6 +12,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class PeriscopeBaseIE(InfoExtractor):
|
||||
_M3U8_HEADERS = {
|
||||
'Referer': 'https://www.periscope.tv/'
|
||||
}
|
||||
|
||||
def _call_api(self, method, query, item_id):
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s' % method,
|
||||
@@ -54,9 +58,11 @@ class PeriscopeBaseIE(InfoExtractor):
|
||||
m3u8_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native'
|
||||
if state in ('ended', 'timed_out') else 'm3u8',
|
||||
m3u8_id=format_id, fatal=fatal)
|
||||
m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS)
|
||||
if len(m3u8_formats) == 1:
|
||||
self._add_width_and_height(m3u8_formats[0], width, height)
|
||||
for f in m3u8_formats:
|
||||
f.setdefault('http_headers', {}).update(self._M3U8_HEADERS)
|
||||
return m3u8_formats
|
||||
|
||||
|
||||
|
@@ -9,8 +9,9 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
xpath_text,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,10 +28,11 @@ class PhoenixIE(ZDFBaseIE):
|
||||
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||
'duration': 1691,
|
||||
'timestamp': 1613906100,
|
||||
'timestamp': 1613902500,
|
||||
'upload_date': '20210221',
|
||||
'uploader': 'Phoenix',
|
||||
'channel': 'corona nachgehakt',
|
||||
'series': 'corona nachgehakt',
|
||||
'episode': 'Wohin führt der Protest in der Pandemie?',
|
||||
},
|
||||
}, {
|
||||
# Youtube embed
|
||||
@@ -79,50 +81,53 @@ class PhoenixIE(ZDFBaseIE):
|
||||
|
||||
video_id = compat_str(video.get('basename') or video.get('content'))
|
||||
|
||||
details = self._download_xml(
|
||||
details = self._download_json(
|
||||
'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
|
||||
video_id, 'Downloading details XML', query={
|
||||
video_id, 'Downloading details JSON', query={
|
||||
'ak': 'web',
|
||||
'ptmd': 'true',
|
||||
'id': video_id,
|
||||
'profile': 'player2',
|
||||
})
|
||||
|
||||
title = title or xpath_text(
|
||||
details, './/information/title', 'title', fatal=True)
|
||||
content_id = xpath_text(
|
||||
details, './/video/details/basename', 'content id', fatal=True)
|
||||
title = title or details['title']
|
||||
content_id = details['tracking']['nielsen']['content']['assetid']
|
||||
|
||||
info = self._extract_ptmd(
|
||||
'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
|
||||
content_id, None, url)
|
||||
|
||||
timestamp = unified_timestamp(xpath_text(details, './/details/airtime'))
|
||||
duration = int_or_none(try_get(
|
||||
details, lambda x: x['tracking']['nielsen']['content']['length']))
|
||||
timestamp = unified_timestamp(details.get('editorialDate'))
|
||||
series = try_get(
|
||||
details, lambda x: x['tracking']['nielsen']['content']['program'],
|
||||
compat_str)
|
||||
episode = title if details.get('contentType') == 'episode' else None
|
||||
|
||||
thumbnails = []
|
||||
for node in details.findall('.//teaserimages/teaserimage'):
|
||||
thumbnail_url = node.text
|
||||
teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
|
||||
for thumbnail_key, thumbnail_url in teaser_images.items():
|
||||
thumbnail_url = urljoin(url, thumbnail_url)
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
thumbnail_key = node.get('key')
|
||||
if thumbnail_key:
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': xpath_text(details, './/information/detail'),
|
||||
'duration': int_or_none(xpath_text(details, './/details/lengthSec')),
|
||||
'description': details.get('leadParagraph'),
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'uploader': xpath_text(details, './/details/channel'),
|
||||
'uploader_id': xpath_text(details, './/details/originChannelId'),
|
||||
'channel': xpath_text(details, './/details/originChannelTitle'),
|
||||
'uploader': details.get('tvService'),
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
})
|
||||
|
@@ -1,22 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
try_get,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@@ -34,65 +27,46 @@ class PicartoIE(InfoExtractor):
|
||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
||||
channel_id)
|
||||
data = self._download_json(
|
||||
'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
|
||||
'query': '''{
|
||||
channel(name: "%s") {
|
||||
adult
|
||||
id
|
||||
online
|
||||
stream_name
|
||||
title
|
||||
}
|
||||
getLoadBalancerUrl(channel_name: "%s") {
|
||||
url
|
||||
}
|
||||
}''' % (channel_id, channel_id),
|
||||
})['data']
|
||||
metadata = data['channel']
|
||||
|
||||
if metadata.get('online') is False:
|
||||
if metadata.get('online') == 0:
|
||||
raise ExtractorError('Stream is offline', expected=True)
|
||||
title = metadata['title']
|
||||
|
||||
cdn_data = self._download_json(
|
||||
'https://picarto.tv/process/channel', channel_id,
|
||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
||||
note='Downloading load balancing info')
|
||||
data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
|
||||
channel_id, 'Downloading load balancing info')
|
||||
|
||||
token = mobj.group('token') or 'public'
|
||||
params = {
|
||||
'con': int(time.time() * 1000),
|
||||
'token': token,
|
||||
}
|
||||
|
||||
prefered_edge = cdn_data.get('preferedEdge')
|
||||
formats = []
|
||||
|
||||
for edge in cdn_data['edges']:
|
||||
edge_ep = edge.get('ep')
|
||||
if not edge_ep or not isinstance(edge_ep, compat_str):
|
||||
for source in (cdn_data.get('source') or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
edge_id = edge.get('id')
|
||||
for tech in cdn_data['techs']:
|
||||
tech_label = tech.get('label')
|
||||
tech_type = tech.get('type')
|
||||
preference = 0
|
||||
if edge_id == prefered_edge:
|
||||
preference += 1
|
||||
format_id = []
|
||||
if edge_id:
|
||||
format_id.append(edge_id)
|
||||
if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
|
||||
format_id.append('hls')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
update_url_query(
|
||||
'https://%s/hls/%s/index.m3u8'
|
||||
% (edge_ep, channel_id), params),
|
||||
channel_id, 'mp4', preference=preference,
|
||||
m3u8_id='-'.join(format_id), fatal=False))
|
||||
continue
|
||||
elif tech_type == 'video/mp4' or tech_label == 'MP4':
|
||||
format_id.append('mp4')
|
||||
formats.append({
|
||||
'url': update_url_query(
|
||||
'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
|
||||
params),
|
||||
'format_id': '-'.join(format_id),
|
||||
'preference': preference,
|
||||
})
|
||||
else:
|
||||
# rtmp format does not seem to work
|
||||
continue
|
||||
source_type = source.get('type')
|
||||
if source_type == 'html5/application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'html5/video/mp4':
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
mature = metadata.get('adult')
|
||||
@@ -103,10 +77,10 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(metadata.get('title') or channel_id),
|
||||
'title': self._live_title(title.strip()),
|
||||
'is_live': True,
|
||||
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
||||
'channel': channel_id,
|
||||
'channel_id': metadata.get('id'),
|
||||
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
|
65
youtube_dl/extractor/playstuff.py
Normal file
65
youtube_dl/extractor/playstuff.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class PlayStuffIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a',
|
||||
'md5': 'c82d3669e5247c64bc382577843e5bd0',
|
||||
'info_dict': {
|
||||
'id': '6250584958001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga',
|
||||
'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913',
|
||||
'uploader_id': '6005208634001',
|
||||
'timestamp': 1619491027,
|
||||
'upload_date': '20210427',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
# geo restricted, bypassable
|
||||
'url': 'https://play.stuff.co.nz/details/_6155660351001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
state = self._parse_json(
|
||||
self._search_regex(
|
||||
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'),
|
||||
video_id)
|
||||
|
||||
account_id = try_get(
|
||||
state, lambda x: x['configurations']['accountId'],
|
||||
compat_str) or '6005208634001'
|
||||
player_id = try_get(
|
||||
state, lambda x: x['configurations']['playerId'],
|
||||
compat_str) or 'default'
|
||||
|
||||
entries = []
|
||||
for item_id, video in state['items'].items():
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
asset_id = try_get(
|
||||
video, lambda x: x['content']['attributes']['assetId'],
|
||||
compat_str)
|
||||
if not asset_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id),
|
||||
{'geo_countries': ['NZ']}),
|
||||
'BrightcoveNew', video_id))
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user