mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-24 09:08:36 +09:00
Compare commits
260 Commits
2020.11.29
...
c3399cac19
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c3399cac19 | ||
|
|
9237aaa77f | ||
|
|
766fcdd0fa | ||
|
|
f6ea29e24b | ||
|
|
8a3797a4ab | ||
|
|
745db8899d | ||
|
|
83db801cbf | ||
|
|
964a8eb754 | ||
|
|
ac61f2e058 | ||
|
|
8487e8b98a | ||
|
|
9c484c0019 | ||
|
|
0e96b4b5ce | ||
|
|
a563c97c5c | ||
|
|
e88c9ef62a | ||
|
|
0889eb33e0 | ||
|
|
0021a2b9a1 | ||
|
|
19ec468635 | ||
|
|
491ee7efe4 | ||
|
|
8522bcd97c | ||
|
|
ac71fd5919 | ||
|
|
8e953dcbb1 | ||
|
|
f4afb9a6a8 | ||
|
|
d5b8cf093c | ||
|
|
5c6e84c0ff | ||
|
|
1aaee908b9 | ||
|
|
b2d9fd9c9f | ||
|
|
bc2f83b95e | ||
|
|
85de33b04e | ||
|
|
7dfd966848 | ||
|
|
a25d03d7cb | ||
|
|
cabfd4b1f0 | ||
|
|
7b643d4cd0 | ||
|
|
1f1d01d498 | ||
|
|
21a42e2588 | ||
|
|
2df93a0c4a | ||
|
|
75972e200d | ||
|
|
d0d838638c | ||
|
|
8c17afc471 | ||
|
|
40d66e07df | ||
|
|
ab89a8678b | ||
|
|
4d7d056909 | ||
|
|
c35bc82606 | ||
|
|
2f56caf083 | ||
|
|
4066945919 | ||
|
|
2a84694b1e | ||
|
|
4046ffe1e1 | ||
|
|
d1d0612160 | ||
|
|
7b0f04ed1f | ||
|
|
2e21b06ea2 | ||
|
|
a6f75e6e89 | ||
|
|
bd18824c2a | ||
|
|
bdd044e67b | ||
|
|
f7e95fb2a0 | ||
|
|
9dd674e1d2 | ||
|
|
9c1e164e0c | ||
|
|
c706fbe9fe | ||
|
|
ebdcf70b0d | ||
|
|
5966095e65 | ||
|
|
9ee984fc76 | ||
|
|
53528e1d23 | ||
|
|
c931c4b8dd | ||
|
|
7acd042bbb | ||
|
|
bcfe485e01 | ||
|
|
479cc6d5a1 | ||
|
|
38286ee729 | ||
|
|
1a95953867 | ||
|
|
71febd1c52 | ||
|
|
f1bc56c99b | ||
|
|
64e419bd73 | ||
|
|
782ea947b4 | ||
|
|
f27224d57b | ||
|
|
c007188598 | ||
|
|
af93ecfd88 | ||
|
|
794771a164 | ||
|
|
6f2eaaf73d | ||
|
|
4c7a4dbc4d | ||
|
|
f86b299d0e | ||
|
|
e474996541 | ||
|
|
aed617e311 | ||
|
|
0fa67c1d68 | ||
|
|
365b3cc72d | ||
|
|
a272fe21a8 | ||
|
|
cec1c2f211 | ||
|
|
12053450dc | ||
|
|
46cffb0c47 | ||
|
|
c32a059f52 | ||
|
|
6911312e53 | ||
|
|
f22b5a6b96 | ||
|
|
58e55198c1 | ||
|
|
d61ed9f2f1 | ||
|
|
8bc4c6350e | ||
|
|
cfa4ffa23b | ||
|
|
4f1dc1463d | ||
|
|
17e0f41d34 | ||
|
|
b57b27ff8f | ||
|
|
bbe8cc6662 | ||
|
|
98106accb6 | ||
|
|
af1312bfc3 | ||
|
|
4c7d7215cd | ||
|
|
0370d9eb3d | ||
|
|
1434651d20 | ||
|
|
2c312ab84a | ||
|
|
0ee78d62d5 | ||
|
|
7f3c90ab25 | ||
|
|
1d3cd29730 | ||
|
|
4ef1fc9707 | ||
|
|
f9e6aa1dcf | ||
|
|
f83db9064b | ||
|
|
2da9a86399 | ||
|
|
ecaa535cf4 | ||
|
|
79dd92b1fe | ||
|
|
bd3844c9c2 | ||
|
|
7bf5e3a84a | ||
|
|
08a17dae5b | ||
|
|
924ea66ade | ||
|
|
5b72f5b74f | ||
|
|
bfa345744d | ||
|
|
f966461476 | ||
|
|
b8aea53682 | ||
|
|
c0d9eb7043 | ||
|
|
3ba6aabd25 | ||
|
|
a8b31505ed | ||
|
|
90a271e914 | ||
|
|
172d270607 | ||
|
|
22feed08a1 | ||
|
|
942b8ca3be | ||
|
|
3729c52f9d | ||
|
|
71679eaee8 | ||
|
|
76fe4ba3b2 | ||
|
|
164a4a5756 | ||
|
|
455951985b | ||
|
|
c29500e412 | ||
|
|
1bc1520adc | ||
|
|
022e05dc1f | ||
|
|
b34c9551aa | ||
|
|
84f19c026f | ||
|
|
6bde5492b6 | ||
|
|
6086df4d6a | ||
|
|
c98052c5da | ||
|
|
ab62bc5838 | ||
|
|
bc87ba8424 | ||
|
|
b79df1b68d | ||
|
|
2797c7be45 | ||
|
|
755f186e21 | ||
|
|
2240a1dc4d | ||
|
|
03d3af9768 | ||
|
|
5ce9527e16 | ||
|
|
c527f5ada0 | ||
|
|
ace52668f0 | ||
|
|
9c33eb027e | ||
|
|
679b711395 | ||
|
|
1727541315 | ||
|
|
45b0a0d11b | ||
|
|
e665fcd4da | ||
|
|
aae737d4af | ||
|
|
92a6de861e | ||
|
|
5ff881aee6 | ||
|
|
eae19a4473 | ||
|
|
f70c263ce5 | ||
|
|
92d135921f | ||
|
|
d8008dee4f | ||
|
|
bb38a12157 | ||
|
|
bcc8ef0a5a | ||
|
|
3cb1a5dc73 | ||
|
|
ed40c02c9b | ||
|
|
b5fdceb4d1 | ||
|
|
1e6f7f3b45 | ||
|
|
469ff02f4e | ||
|
|
56f1c5ad38 | ||
|
|
3a6b8f4edf | ||
|
|
3ae02d4a62 | ||
|
|
59fea63dc2 | ||
|
|
22933e02d4 | ||
|
|
98a62d7cbd | ||
|
|
d6c5fdef34 | ||
|
|
4b9051cf39 | ||
|
|
00f5068908 | ||
|
|
432c6b0f48 | ||
|
|
ad5e5788ff | ||
|
|
dc65041c22 | ||
|
|
9f88b07945 | ||
|
|
225646c4ca | ||
|
|
5c6e9f0b6c | ||
|
|
6c370bc149 | ||
|
|
37fd242342 | ||
|
|
aee1f87168 | ||
|
|
b69bb1ed11 | ||
|
|
772cefef8c | ||
|
|
842654b6d0 | ||
|
|
df5e50954b | ||
|
|
a4a2fa8754 | ||
|
|
9da0504a09 | ||
|
|
470cf496f5 | ||
|
|
e029da9add | ||
|
|
e00b8f60d4 | ||
|
|
644c3ef886 | ||
|
|
9d8d0f8b4a | ||
|
|
5a1fbbf8b7 | ||
|
|
e2bdf8bf4f | ||
|
|
c368dc98e0 | ||
|
|
e7eff914cd | ||
|
|
07333d0062 | ||
|
|
5bd7ad2e81 | ||
|
|
3ded751985 | ||
|
|
6956db3606 | ||
|
|
17b01228f8 | ||
|
|
4f1ecca58d | ||
|
|
2717036489 | ||
|
|
d9482c0083 | ||
|
|
791b743765 | ||
|
|
fa604d9083 | ||
|
|
2bf0634d16 | ||
|
|
dccf4932e1 | ||
|
|
91dd25fe1e | ||
|
|
06bf2ac20f | ||
|
|
6ad0d8781e | ||
|
|
f2c704e112 | ||
|
|
5e822c2526 | ||
|
|
cc017e07ca | ||
|
|
082da36416 | ||
|
|
6bf95b15ee | ||
|
|
4c93b2fd15 | ||
|
|
1b26bfd425 | ||
|
|
13ec444a98 | ||
|
|
51579d87e4 | ||
|
|
e147619669 | ||
|
|
e7f93fbd85 | ||
|
|
58f7ada235 | ||
|
|
c67b33888f | ||
|
|
e8c0af04b7 | ||
|
|
5d769860c3 | ||
|
|
02b04785ee | ||
|
|
41c92b8d02 | ||
|
|
fe0c28f956 | ||
|
|
957c65b9ea | ||
|
|
5e95e18ce9 | ||
|
|
e91df0c550 | ||
|
|
c5636e9bca | ||
|
|
2e47264235 | ||
|
|
1c78cb118c | ||
|
|
beab2f88c9 | ||
|
|
0025447369 | ||
|
|
da4304609d | ||
|
|
ea89680aea | ||
|
|
664dd8ba85 | ||
|
|
64554c12e1 | ||
|
|
4ded9c0f00 | ||
|
|
c0820dd52a | ||
|
|
2bb70750a9 | ||
|
|
09d923f2eb | ||
|
|
37d979ad33 | ||
|
|
95ac4de229 | ||
|
|
d3e142b3fa | ||
|
|
132aece1ed | ||
|
|
3e4e338133 | ||
|
|
be19ae11fd | ||
|
|
59d63d8d4a | ||
|
|
cfeba5d17f | ||
|
|
6da0e5e7a2 | ||
|
|
d6ce649f15 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.29**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.11.29
|
||||
[debug] youtube-dl version 2021.01.03
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.29**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
||||
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.29**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.29**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.11.29
|
||||
[debug] youtube-dl version 2021.01.03
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.29. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.29**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
||||
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -7,8 +7,10 @@
|
||||
---
|
||||
|
||||
### Before submitting a *pull request* make sure you have:
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
- [ ] Read [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site)
|
||||
- [ ] Read [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) and adjusted the code to meet them
|
||||
- [ ] Covered the code with tests (note that PRs without tests will be REJECTED)
|
||||
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
|
||||
|
||||
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
|
||||
|
||||
74
.github/workflows/ci.yml
vendored
Normal file
74
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
name: CI
|
||||
on: [push]
|
||||
jobs:
|
||||
tests:
|
||||
name: Tests
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
# TODO: python 2.6
|
||||
python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
|
||||
python-impl: [cpython]
|
||||
ytdl-test-set: [core, download]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# python 3.2 is only available on windows via setup-python
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: download
|
||||
run-tests-ext: bat
|
||||
# jython
|
||||
- os: ubuntu-latest
|
||||
python-impl: jython
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: sh
|
||||
- os: ubuntu-latest
|
||||
python-impl: jython
|
||||
ytdl-test-set: download
|
||||
run-tests-ext: sh
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
if: ${{ matrix.python-impl == 'cpython' }}
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Set up Java 8
|
||||
if: ${{ matrix.python-impl == 'jython' }}
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 8
|
||||
- name: Install Jython
|
||||
if: ${{ matrix.python-impl == 'jython' }}
|
||||
run: |
|
||||
wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
||||
java -jar jython-installer.jar -s -d "$HOME/jython"
|
||||
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Run tests
|
||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
||||
env:
|
||||
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
flake8:
|
||||
name: Linter
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install flake8
|
||||
run: pip install flake8
|
||||
- name: Run flake8
|
||||
run: flake8 .
|
||||
50
.travis.yml
50
.travis.yml
@@ -1,50 +0,0 @@
|
||||
language: python
|
||||
python:
|
||||
- "2.6"
|
||||
- "2.7"
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "pypy"
|
||||
- "pypy3"
|
||||
dist: trusty
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
jobs:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- python: 3.8
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.8
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
- name: flake8
|
||||
python: 3.8
|
||||
dist: xenial
|
||||
install: pip install flake8
|
||||
script: flake8 .
|
||||
fast_finish: true
|
||||
allow_failures:
|
||||
- env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
before_install:
|
||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
||||
script: ./devscripts/run_tests.sh
|
||||
275
ChangeLog
275
ChangeLog
@@ -1,3 +1,278 @@
|
||||
version 2021.01.03
|
||||
|
||||
Extractors
|
||||
* [nrk] Improve series metadata extraction (#27473)
|
||||
+ [nrk] Extract subtitles
|
||||
* [nrk] Fix age limit extraction
|
||||
* [nrk] Improve video id extraction
|
||||
+ [nrk] Add support for podcasts (#27634, #27635)
|
||||
* [nrk] Generalize and delegate all item extractors to nrk
|
||||
+ [nrk] Add support for mp3 formats
|
||||
* [nrktv] Switch to playback endpoint
|
||||
* [vvvvid] Fix season metadata extraction (#18130)
|
||||
* [stitcher] Fix extraction (#20811, #27606)
|
||||
* [acast] Fix extraction (#21444, #27612, #27613)
|
||||
+ [arcpublishing] Add support for arcpublishing.com (#2298, #9340, #17200)
|
||||
+ [sky] Add support for Sports News articles and Brighcove videos (#13054)
|
||||
+ [vvvvid] Extract akamai formats
|
||||
* [vvvvid] Skip unplayable episodes (#27599)
|
||||
* [yandexvideo] Fix extraction for Python 3.4
|
||||
|
||||
|
||||
version 2020.12.31
|
||||
|
||||
Core
|
||||
* [utils] Accept only supported protocols in url_or_none
|
||||
* [YoutubeDL] Allow format filtering using audio language (#16209)
|
||||
|
||||
Extractors
|
||||
+ [redditr] Extract all thumbnails (#27503)
|
||||
* [vvvvid] Improve info extraction
|
||||
+ [vvvvid] Add support for playlists (#18130, #27574)
|
||||
+ [yandexdisk] Extract info from webpage
|
||||
* [yandexdisk] Fix extraction (#17861, #27131)
|
||||
* [yandexvideo] Use old API call as fallback
|
||||
* [yandexvideo] Fix extraction (#25000)
|
||||
- [nbc] Remove CSNNE extractor
|
||||
* [nbc] Fix NBCSport VPlayer URL extraction (#16640)
|
||||
+ [aenetworks] Add support for biography.com (#3863)
|
||||
* [uktvplay] Match new video URLs (#17909)
|
||||
* [sevenplay] Detect API errors
|
||||
* [tenplay] Fix format extraction (#26653)
|
||||
* [brightcove] Raise error for DRM protected videos (#23467, #27568)
|
||||
|
||||
|
||||
version 2020.12.29
|
||||
|
||||
Extractors
|
||||
* [youtube] Improve yt initial data extraction (#27524)
|
||||
* [youtube:tab] Improve URL matching #27559)
|
||||
* [youtube:tab] Restore retry on browse requests (#27313, #27564)
|
||||
* [aparat] Fix extraction (#22285, #22611, #23348, #24354, #24591, #24904,
|
||||
#25418, #26070, #26350, #26738, #27563)
|
||||
- [brightcove] Remove sonyliv specific code
|
||||
* [piksel] Improve format extraction
|
||||
+ [zype] Add support for uplynk videos
|
||||
+ [toggle] Add support for live.mewatch.sg (#27555)
|
||||
+ [go] Add support for fxnow.fxnetworks.com (#13972, #22467, #23754, #26826)
|
||||
* [teachable] Improve embed detection (#26923)
|
||||
* [mitele] Fix free video extraction (#24624, #25827, #26757)
|
||||
* [telecinco] Fix extraction
|
||||
* [youtube] Update invidious.snopyta.org (#22667)
|
||||
* [amcnetworks] Improve auth only video detection (#27548)
|
||||
+ [generic] Add support for VHX Embeds (#27546)
|
||||
|
||||
|
||||
version 2020.12.26
|
||||
|
||||
Extractors
|
||||
* [instagram] Fix comment count extraction
|
||||
+ [instagram] Add support for reel URLs (#26234, #26250)
|
||||
* [bbc] Switch to media selector v6 (#23232, #23933, #26303, #26432, #26821,
|
||||
#27538)
|
||||
* [instagram] Improve thumbnail extraction
|
||||
* [instagram] Fix extraction when authenticated (#22880, #26377, #26981,
|
||||
#27422)
|
||||
* [spankbang:playlist] Fix extraction (#24087)
|
||||
+ [spankbang] Add support for playlist videos
|
||||
* [pornhub] Improve like and dislike count extraction (#27356)
|
||||
* [pornhub] Fix lq formats extraction (#27386, #27393)
|
||||
+ [bongacams] Add support for bongacams.com (#27440)
|
||||
* [youtube:tab] Extend URL regular expression (#27501)
|
||||
* [theweatherchannel] Fix extraction (#25930, #26051)
|
||||
+ [sprout] Add support for Universal Kids (#22518)
|
||||
* [theplatform] Allow passing geo bypass countries from other extractors
|
||||
+ [wistia] Add support for playlists (#27533)
|
||||
+ [ctv] Add support for ctv.ca (#27525)
|
||||
* [9c9media] Improve info extraction
|
||||
* [youtube] Fix automatic captions extraction (#27162, #27388)
|
||||
* [sonyliv] Fix title for movies
|
||||
* [sonyliv] Fix extraction (#25667)
|
||||
* [streetvoice] Fix extraction (#27455, #27492)
|
||||
+ [facebook] Add support for watchparty pages (#27507)
|
||||
* [cbslocal] Fix video extraction
|
||||
+ [brightcove] Add another method to extract policyKey
|
||||
* [mewatch] Relax URL regular expression (#27506)
|
||||
|
||||
|
||||
version 2020.12.22
|
||||
|
||||
Core
|
||||
* [common] Remove unwanted query params from unsigned akamai manifest URLs
|
||||
|
||||
Extractors
|
||||
- [tastytrade] Remove extractor (#25716)
|
||||
* [niconico] Fix playlist extraction (#27428)
|
||||
- [everyonesmixtape] Remove extractor
|
||||
- [kanalplay] Remove extractor
|
||||
* [arkena] Fix extraction
|
||||
* [nba] Rewrite extractor
|
||||
* [turner] Improve info extraction
|
||||
* [youtube] Improve xsrf token extraction (#27442)
|
||||
* [generic] Improve RSS age limit extraction
|
||||
* [generic] Fix RSS itunes thumbnail extraction (#27405)
|
||||
+ [redditr] Extract duration (#27426)
|
||||
- [zaq1] Remove extractor
|
||||
+ [asiancrush] Add support for retrocrush.tv
|
||||
* [asiancrush] Fix extraction
|
||||
- [noco] Remove extractor (#10864)
|
||||
* [nfl] Fix extraction (#22245)
|
||||
* [skysports] Relax URL regular expression (#27435)
|
||||
+ [tv5unis] Add support for tv5unis.ca (#22399, #24890)
|
||||
+ [videomore] Add support for more.tv (#27088)
|
||||
+ [yandexmusic] Add support for music.yandex.com (#27425)
|
||||
+ [nhk:program] Add support for audio programs and program clips
|
||||
+ [nhk] Add support for NHK video programs (#27230)
|
||||
|
||||
|
||||
version 2020.12.14
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve JSON-LD interaction statistic extraction (#23306)
|
||||
* [downloader/hls] Delegate manifests with media initialization to ffmpeg
|
||||
+ [extractor/common] Document duration meta field for playlists
|
||||
|
||||
Extractors
|
||||
* [mdr] Bypass geo restriction
|
||||
* [mdr] Improve extraction (#24346, #26873)
|
||||
* [yandexmusic:album] Improve album title extraction (#27418)
|
||||
* [eporner] Fix view count extraction and make optional (#23306)
|
||||
+ [eporner] Extend URL regular expression
|
||||
* [eporner] Fix hash extraction and extend _VALID_URL (#27396)
|
||||
* [slideslive] Use m3u8 entry protocol for m3u8 formats (#27400)
|
||||
* [twitcasting] Fix format extraction and improve info extraction (#24868)
|
||||
* [linuxacademy] Fix authentication and extraction (#21129, #26223, #27402)
|
||||
* [itv] Clean description from HTML tags (#27399)
|
||||
* [vlive] Sort live formats (#27404)
|
||||
* [hotstart] Fix and improve extraction
|
||||
* Fix format extraction (#26690)
|
||||
+ Extract thumbnail URL (#16079, #20412)
|
||||
+ Add support for country specific playlist URLs (#23496)
|
||||
* Select the last id in video URL (#26412)
|
||||
+ [youtube] Add some invidious instances (#27373)
|
||||
|
||||
|
||||
version 2020.12.12
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Improve thumbnail filename deducing (#26010, #27244)
|
||||
|
||||
Extractors
|
||||
+ [ruutu] Extract more metadata
|
||||
+ [ruutu] Detect non-free videos (#21154)
|
||||
* [ruutu] Authenticate format URLs (#21031, #26782)
|
||||
+ [ruutu] Add support for static.nelonenmedia.fi (#25412)
|
||||
+ [ruutu] Extend URL regular expression (#24839)
|
||||
+ [facebook] Add support archived live video URLs (#15859)
|
||||
* [wdr] Improve overall extraction
|
||||
+ [wdr] Extend subtitles extraction (#22672, #22723)
|
||||
+ [facebook] Add support for videos attached to Relay based story pages
|
||||
(#10795)
|
||||
+ [wdr:page] Add support for kinder.wdr.de (#27350)
|
||||
+ [facebook] Add another regular expression for handleServerJS
|
||||
* [facebook] Fix embed page extraction
|
||||
+ [facebook] Add support for Relay post pages (#26935)
|
||||
+ [facebook] Add support for watch videos (#22795, #27062)
|
||||
+ [facebook] Add support for group posts with multiple videos (#19131)
|
||||
* [itv] Fix series metadata extraction (#26897)
|
||||
- [itv] Remove old extraction method (#23177)
|
||||
* [facebook] Redirect mobile URLs to desktop URLs (#24831, #25624)
|
||||
+ [facebook] Add support for Relay based pages (#26823)
|
||||
* [facebook] Try to reduce unnecessary tahoe requests
|
||||
- [facebook] Remove hardcoded Chrome User-Agent (#18974, #25411, #26958,
|
||||
#27329)
|
||||
- [smotri] Remove extractor (#27358)
|
||||
- [beampro] Remove extractor (#17290, #22871, #23020, #23061, #26099)
|
||||
|
||||
|
||||
version 2020.12.09
|
||||
|
||||
Core
|
||||
* [extractor/common] Fix inline HTML5 media tags processing (#27345)
|
||||
|
||||
Extractors
|
||||
* [youtube:tab] Improve identity token extraction (#27197)
|
||||
* [youtube:tab] Make click tracking params on continuation optional
|
||||
* [youtube:tab] Delegate inline playlists to tab-based playlists (27298)
|
||||
+ [tubitv] Extract release year (#27317)
|
||||
* [amcnetworks] Fix free content extraction (#20354)
|
||||
+ [lbry:channel] Add support for channels (#25584)
|
||||
+ [lbry] Add support for short and embed URLs
|
||||
* [lbry] Fix channel metadata extraction
|
||||
+ [telequebec] Add support for video.telequebec.tv (#27339)
|
||||
* [telequebec] Fix extraction (#25733, #26883)
|
||||
+ [youtube:tab] Capture and output alerts (#27340)
|
||||
* [tvplay:home] Fix extraction (#21153)
|
||||
* [americastestkitchen] Fix Extraction and add support
|
||||
for Cook's Country and Cook's Illustrated (#17234, #27322)
|
||||
+ [slideslive] Add support for yoda service videos and extract subtitles
|
||||
(#27323)
|
||||
|
||||
|
||||
version 2020.12.07
|
||||
|
||||
Core
|
||||
* [extractor/common] Extract timestamp from Last-Modified header
|
||||
+ [extractor/common] Add support for dl8-* media tags (#27283)
|
||||
* [extractor/common] Fix media type extraction for HTML5 media tags
|
||||
in start/end form
|
||||
|
||||
Extractors
|
||||
* [aenetworks] Fix extraction (#23363, #23390, #26795, #26985)
|
||||
* Fix Fastly format extraction
|
||||
+ Add support for play and watch subdomains
|
||||
+ Extract series metadata
|
||||
* [youtube] Improve youtu.be extraction in non-existing playlists (#27324)
|
||||
+ [generic] Extract RSS video description, timestamp and itunes metadata
|
||||
(#27177)
|
||||
* [nrk] Reduce the number of instalments and episodes requests
|
||||
* [nrk] Improve extraction
|
||||
* Improve format extraction for old akamai formats
|
||||
+ Add is_live value to entry info dict
|
||||
* Request instalments only when available
|
||||
* Fix skole extraction
|
||||
+ [peertube] Extract fps
|
||||
+ [peertube] Recognize audio-only formats (#27295)
|
||||
|
||||
|
||||
version 2020.12.05
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve Akamai HTTP format extraction
|
||||
* Allow m3u8 manifest without an additional audio format
|
||||
* Fix extraction for qualities starting with a number
|
||||
|
||||
Extractors
|
||||
* [teachable:course] Improve extraction (#24507, #27286)
|
||||
* [nrk] Improve error extraction
|
||||
* [nrktv:series] Improve extraction (#21926)
|
||||
* [nrktv:season] Improve extraction
|
||||
* [nrk] Improve format extraction and geo-restriction detection (#24221)
|
||||
* [pornhub] Handle HTTP errors gracefully (#26414)
|
||||
* [nrktv] Relax URL regular expression (#27299, #26185)
|
||||
+ [zdf] Extract webm formats (#26659)
|
||||
+ [gamespot] Extract DASH and HTTP formats
|
||||
+ [tver] Add support for tver.jp (#26662, #27284)
|
||||
+ [pornhub] Add support for pornhub.org (#27276)
|
||||
|
||||
|
||||
version 2020.12.02
|
||||
|
||||
Extractors
|
||||
+ [tva] Add support for qub.ca (#27235)
|
||||
+ [toggle] Detect DRM protected videos (#16479, #20805)
|
||||
+ [toggle] Add support for new MeWatch URLs (#27256)
|
||||
* [youtube:tab] Extract channels only from channels tab (#27266)
|
||||
+ [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030,
|
||||
#18806, #23148, #24461, #26171, #26800, #27263)
|
||||
* [cspan] Pass Referer header with format's video URL (#26032, #25729)
|
||||
* [youtube] Improve age-gated videos extraction (#27259)
|
||||
+ [mediaset] Add support for movie URLs (#27240)
|
||||
* [yandexmusic] Refactor
|
||||
+ [yandexmusic] Add support for artist's tracks and albums (#11887, #22284)
|
||||
* [yandexmusic:track] Fix extraction (#26449, #26669, #26747, #26748, #26762)
|
||||
|
||||
|
||||
version 2020.11.29
|
||||
|
||||
Core
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
[](https://travis-ci.org/ytdl-org/youtube-dl)
|
||||
[](https://github.com/ytdl-org/youtube-dl/actions?query=workflow%3ACI)
|
||||
|
||||
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
@@ -371,7 +372,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
out, youtube-dl will ask interactively.
|
||||
-2, --twofactor TWOFACTOR Two-factor authentication code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
||||
--video-password PASSWORD Video password (vimeo, youku)
|
||||
|
||||
## Adobe Pass Options:
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
@@ -677,6 +678,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
- `language`: Language code
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
|
||||
@@ -879,7 +881,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
|
||||
java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
|
||||
$HOME/jython/bin/jython -m pip install nose
|
||||
17
devscripts/run_tests.bat
Normal file
17
devscripts/run_tests.bat
Normal file
@@ -0,0 +1,17 @@
|
||||
@echo off
|
||||
|
||||
rem Keep this list in sync with the `offlinetest` target in Makefile
|
||||
set DOWNLOAD_TESTS="age_restriction^|download^|iqiyi_sdk_interpreter^|socks^|subtitles^|write_annotations^|youtube_lists^|youtube_signature"
|
||||
|
||||
if "%YTDL_TEST_SET%" == "core" (
|
||||
set test_set="-I test_("%DOWNLOAD_TESTS%")\.py"
|
||||
set multiprocess_args=""
|
||||
) else if "%YTDL_TEST_SET%" == "download" (
|
||||
set test_set="-I test_(?!"%DOWNLOAD_TESTS%").+\.py"
|
||||
set multiprocess_args="--processes=4 --process-timeout=540"
|
||||
) else (
|
||||
echo YTDL_TEST_SET is not set or invalid
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
nosetests test --verbose %test_set:"=% %multiprocess_args:"=%
|
||||
@@ -35,6 +35,8 @@
|
||||
- **adobetv:video**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||
- **aenetworks:collection**
|
||||
- **aenetworks:show**
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
@@ -55,6 +57,7 @@
|
||||
- **appletrailers**
|
||||
- **appletrailers:section**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ArcPublishing**
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
@@ -102,6 +105,7 @@
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BiliBiliPlayer**
|
||||
- **BioBioChileTV**
|
||||
- **Biography**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
@@ -110,6 +114,7 @@
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BongaCams**
|
||||
- **BostonGlobe**
|
||||
- **Box**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
@@ -144,6 +149,7 @@
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSLocalArticle**
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
@@ -193,9 +199,9 @@
|
||||
- **CrooksAndLiars**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **cu.ntv.co.jp**: Nippon Television Network
|
||||
- **Culturebox**
|
||||
@@ -266,7 +272,6 @@
|
||||
- **ESPNArticle**
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **EWETV**
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
@@ -308,11 +313,11 @@
|
||||
- **FrontendMasters**
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
- **FujiTVFODPlus7**
|
||||
- **Funimation**
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
- **Gaia**
|
||||
- **GameInformer**
|
||||
- **GameSpot**
|
||||
@@ -345,6 +350,7 @@
|
||||
- **hgtv.com:show**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
@@ -397,7 +403,6 @@
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
- **KarriereVideos**
|
||||
@@ -421,7 +426,8 @@
|
||||
- **la7.it**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
- **lbry.tv**
|
||||
- **lbry**
|
||||
- **lbry:channel**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
@@ -486,6 +492,7 @@
|
||||
- **META**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **mewatch**
|
||||
- **Mgoon**
|
||||
- **MGTV**: 芒果TV
|
||||
- **MiaoPai**
|
||||
@@ -496,8 +503,6 @@
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
- **Mixer:live**
|
||||
- **Mixer:vod**
|
||||
- **MLB**
|
||||
- **Mnet**
|
||||
- **MNetTV**
|
||||
@@ -538,6 +543,11 @@
|
||||
- **NationalGeographicTV**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **nba:watch**
|
||||
- **nba:watch:collection**
|
||||
- **NBAChannel**
|
||||
- **NBAEmbed**
|
||||
- **NBAWatchEmbed**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **nbcolympics**
|
||||
@@ -567,8 +577,10 @@
|
||||
- **NextTV**: 壹電視
|
||||
- **Nexx**
|
||||
- **NexxEmbed**
|
||||
- **nfl.com**
|
||||
- **nfl.com** (Currently broken)
|
||||
- **nfl.com:article** (Currently broken)
|
||||
- **NhkVod**
|
||||
- **NhkVodProgram**
|
||||
- **nhl.com**
|
||||
- **nick.com**
|
||||
- **nick.de**
|
||||
@@ -582,7 +594,6 @@
|
||||
- **njoy:embed**
|
||||
- **NJPWWorld**: 新日本プロレスワールド
|
||||
- **NobelPrize**
|
||||
- **Noco**
|
||||
- **NonkTube**
|
||||
- **Noovo**
|
||||
- **Normalboots**
|
||||
@@ -600,6 +611,7 @@
|
||||
- **Npr**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKRadioPodkast**
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
@@ -709,6 +721,7 @@
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuantumTV**
|
||||
- **Qub**
|
||||
- **Quickline**
|
||||
- **QuicklineLive**
|
||||
- **R7**
|
||||
@@ -802,19 +815,16 @@
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
- **sky.it**
|
||||
- **sky:news**
|
||||
- **sky:sports**
|
||||
- **sky:sports:news**
|
||||
- **skyacademy.it**
|
||||
- **SkylineWebcams**
|
||||
- **SkyNews**
|
||||
- **skynewsarabia:article**
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
- **Slideshare**
|
||||
- **SlidesLive**
|
||||
- **Slutload**
|
||||
- **smotri**: Smotri.com
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
- **smotri:community**: Smotri.com community videos
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **SonyLIV**
|
||||
@@ -872,7 +882,6 @@
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
- **TastyTrade**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**
|
||||
@@ -895,6 +904,7 @@
|
||||
- **TeleQuebecEmission**
|
||||
- **TeleQuebecLive**
|
||||
- **TeleQuebecSquat**
|
||||
- **TeleQuebecVideo**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TennisTV**
|
||||
@@ -945,12 +955,15 @@
|
||||
- **TV2DKBornholmPlay**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **tv5unis**
|
||||
- **tv5unis:video**
|
||||
- **tv8.it**
|
||||
- **TVA**
|
||||
- **TVANouvelles**
|
||||
- **TVANouvellesArticle**
|
||||
- **TVC**
|
||||
- **TVCArticle**
|
||||
- **TVer**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **TVN24**
|
||||
@@ -1079,6 +1092,7 @@
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
- **VVVVIDShow**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Wakanim**
|
||||
@@ -1101,6 +1115,7 @@
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **Wistia**
|
||||
- **WistiaPlaylist**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **WorldStarHipHop**
|
||||
- **WSJ**: Wall Street Journal
|
||||
@@ -1132,6 +1147,8 @@
|
||||
- **yahoo:japannews**: Yahoo! Japan News
|
||||
- **YandexDisk**
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
|
||||
- **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YandexVideo**
|
||||
@@ -1158,9 +1175,9 @@
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:tab**: YouTube.com tab
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **YoutubeYtBe**
|
||||
- **YoutubeYtUser**
|
||||
- **Zapiks**
|
||||
- **Zaq1**
|
||||
- **Zattoo**
|
||||
- **ZattooLive**
|
||||
- **ZDF**
|
||||
|
||||
@@ -98,6 +98,55 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||
|
||||
def test_search_json_ld_realworld(self):
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
||||
expect_dict(
|
||||
self,
|
||||
self.ie._search_json_ld(r'''<script type="application/ld+json">
|
||||
{
|
||||
"@context": "http://schema.org/",
|
||||
"@type": "VideoObject",
|
||||
"name": "1 On 1 With Kleio",
|
||||
"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
|
||||
"duration": "PT0H12M23S",
|
||||
"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
|
||||
"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
|
||||
"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
|
||||
"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
|
||||
"width": "1920",
|
||||
"height": "1080",
|
||||
"encodingFormat": "mp4",
|
||||
"bitrate": "6617kbps",
|
||||
"isFamilyFriendly": "False",
|
||||
"description": "Kleio Valentien",
|
||||
"uploadDate": "2015-12-05T21:24:35+01:00",
|
||||
"interactionStatistic": {
|
||||
"@type": "InteractionCounter",
|
||||
"interactionType": { "@type": "http://schema.org/WatchAction" },
|
||||
"userInteractionCount": 1120958
|
||||
}, "aggregateRating": {
|
||||
"@type": "AggregateRating",
|
||||
"ratingValue": "88",
|
||||
"ratingCount": "630",
|
||||
"bestRating": "100",
|
||||
"worstRating": "0"
|
||||
}, "actor": [{
|
||||
"@type": "Person",
|
||||
"name": "Kleio Valentien",
|
||||
"url": "https://www.eporner.com/pornstar/kleio-valentien/"
|
||||
}]}
|
||||
</script>''', None),
|
||||
{
|
||||
'title': '1 On 1 With Kleio',
|
||||
'description': 'Kleio Valentien',
|
||||
'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
|
||||
'timestamp': 1449347075,
|
||||
'duration': 743.0,
|
||||
'view_count': 1120958,
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
})
|
||||
|
||||
def test_download_json(self):
|
||||
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
|
||||
self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
|
||||
@@ -108,6 +157,18 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
||||
|
||||
def test_parse_html5_media_entries(self):
|
||||
# inline video tag
|
||||
expect_dict(
|
||||
self,
|
||||
self.ie._parse_html5_media_entries(
|
||||
'https://127.0.0.1/video.html',
|
||||
r'<html><video src="/vid.mp4" /></html>', None)[0],
|
||||
{
|
||||
'formats': [{
|
||||
'url': 'https://127.0.0.1/vid.mp4',
|
||||
}],
|
||||
})
|
||||
|
||||
# from https://www.r18.com/
|
||||
# with kpbs in label
|
||||
expect_dict(
|
||||
|
||||
@@ -36,7 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
@@ -57,8 +57,8 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
# def test_youtube_user_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
|
||||
|
||||
@@ -21,6 +21,7 @@ from youtube_dl.utils import (
|
||||
encode_base_n,
|
||||
caesar,
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
@@ -554,6 +555,11 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
self.assertEqual(url_or_none('s3://foo.de'), None)
|
||||
self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
|
||||
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
|
||||
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
|
||||
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
@@ -1465,6 +1471,10 @@ Line 1
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||
|
||||
def test_clean_podcast_url(self):
|
||||
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -1083,7 +1083,7 @@ class YoutubeDL(object):
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
@@ -2410,7 +2410,7 @@ class YoutubeDL(object):
|
||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
|
||||
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
||||
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||
t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
|
||||
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
||||
|
||||
@@ -42,11 +42,13 @@ class HlsFD(FragmentFD):
|
||||
# no segments will definitely be appended to the end of the playlist.
|
||||
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||
# # event media playlists [4]
|
||||
r'#EXT-X-MAP:', # media initialization [5]
|
||||
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||
# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
|
||||
)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||
@@ -170,8 +172,12 @@ class HlsFD(FragmentFD):
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||
# not what it decrypts to.
|
||||
if not test:
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
|
||||
@@ -2,21 +2,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ACastIE(InfoExtractor):
|
||||
class ACastBaseIE(InfoExtractor):
|
||||
def _extract_episode(self, episode, show_info):
|
||||
title = episode['title']
|
||||
info = {
|
||||
'id': episode['id'],
|
||||
'display_id': episode.get('episodeUrl'),
|
||||
'url': clean_podcast_url(episode['url']),
|
||||
'title': title,
|
||||
'description': clean_html(episode.get('description') or episode.get('summary')),
|
||||
'thumbnail': episode.get('image'),
|
||||
'timestamp': parse_iso8601(episode.get('publishDate')),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'filesize': int_or_none(episode.get('contentLength')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(episode.get('episode')),
|
||||
}
|
||||
info.update(show_info)
|
||||
return info
|
||||
|
||||
def _extract_show_info(self, show):
|
||||
return {
|
||||
'creator': show.get('author'),
|
||||
'series': show.get('title'),
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id, query=None):
|
||||
return self._download_json(
|
||||
'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
|
||||
|
||||
|
||||
class ACastIE(ACastBaseIE):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -28,15 +55,15 @@ class ACastIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
|
||||
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766.602563,
|
||||
'duration': 2766,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
@@ -45,7 +72,7 @@ class ACastIE(InfoExtractor):
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
@@ -54,40 +81,14 @@ class ACastIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
s = self._download_json(
|
||||
'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
|
||||
display_id)
|
||||
media_url = s['url']
|
||||
if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
|
||||
episode_url = s.get('episodeUrl')
|
||||
if episode_url:
|
||||
display_id = episode_url
|
||||
else:
|
||||
channel, display_id = re.match(self._VALID_URL, s['link']).groups()
|
||||
cast_data = self._download_json(
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
e = cast_data['episode']
|
||||
title = e.get('name') or s['title']
|
||||
return {
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
|
||||
'duration': float_or_none(e.get('duration') or s.get('duration')),
|
||||
'filesize': int_or_none(e.get('contentLength')),
|
||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
||||
'season_number': int_or_none(e.get('seasonNumber')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
}
|
||||
episode = self._call_api(
|
||||
'%s/episodes/%s' % (channel, display_id),
|
||||
display_id, {'showInfo': 'true'})
|
||||
return self._extract_episode(
|
||||
episode, self._extract_show_info(episode.get('show') or {}))
|
||||
|
||||
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
class ACastChannelIE(ACastBaseIE):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -102,34 +103,24 @@ class ACastChannelIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
||||
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE_URL = 'https://play.acast.com/api/'
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
|
||||
def _fetch_page(self, channel_slug, page):
|
||||
casts = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
|
||||
channel_slug, note='Download page %d of channel data' % page)
|
||||
for cast in casts:
|
||||
yield self.url_result(
|
||||
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
||||
'ACast', cast['id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_slug = self._match_id(url)
|
||||
channel_data = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_slug), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, compat_str(
|
||||
channel_data['id']), channel_data['name'], channel_data.get('description'))
|
||||
show_slug = self._match_id(url)
|
||||
show = self._call_api(show_slug, show_slug)
|
||||
show_info = self._extract_show_info(show)
|
||||
entries = []
|
||||
for episode in (show.get('episodes') or []):
|
||||
entries.append(self._extract_episode(episode, show_info))
|
||||
return self.playlist_result(
|
||||
entries, show.get('id'), show.get('title'), show.get('description'))
|
||||
|
||||
@@ -5,20 +5,32 @@ import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class AENetworksBaseIE(ThePlatformIE):
|
||||
_BASE_URL_REGEX = r'''(?x)https?://
|
||||
(?:(?:www|play|watch)\.)?
|
||||
(?P<domain>
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/'''
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
_DOMAIN_MAP = {
|
||||
'history.com': ('HISTORY', 'history'),
|
||||
'aetv.com': ('AETV', 'aetv'),
|
||||
'mylifetime.com': ('LIFETIME', 'lifetime'),
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||
'fyi.tv': ('FYI', 'fyi'),
|
||||
'historyvault.com': (None, 'historyvault'),
|
||||
'biography.com': (None, 'biography'),
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
query = {'mbr': 'true'}
|
||||
@@ -31,7 +43,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'assetTypes': 'high_video_s3'
|
||||
}, {
|
||||
'assetTypes': 'high_video_s3',
|
||||
'switch': 'hls_ingest_fastly'
|
||||
'switch': 'hls_high_fastly',
|
||||
}]
|
||||
formats = []
|
||||
subtitles = {}
|
||||
@@ -44,6 +56,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e, GeoRestrictedError):
|
||||
raise
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
@@ -57,24 +71,45 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/
|
||||
(?:
|
||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||
specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
|
||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
|
||||
shows/[^/]+/season-\d+/episode-\d+|
|
||||
(?:
|
||||
(?:movie|special)s/[^/]+|
|
||||
(?:shows/[^/]+/)?videos
|
||||
)/[^/?#&]+
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'info_dict': {
|
||||
@@ -91,22 +126,23 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
|
||||
'info_dict': {
|
||||
'id': '71889446852',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
|
||||
'info_dict': {
|
||||
'id': 'SERIES4317',
|
||||
'title': 'Atlanta Plastic',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'only_matching': True
|
||||
'info_dict': {
|
||||
'id': '600587331957',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inlawful Entry',
|
||||
'description': 'md5:57c12115a2b384d883fe64ca50529e08',
|
||||
'timestamp': 1452634428,
|
||||
'upload_date': '20160112',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
@@ -117,78 +153,125 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
||||
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.history.com/videos/history-of-valentines-day',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
'aetv.com': 'AETV',
|
||||
'mylifetime.com': 'LIFETIME',
|
||||
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
|
||||
'fyi.tv': 'FYI',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
if url_parts_len == 1:
|
||||
entries = []
|
||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
else:
|
||||
# single season
|
||||
url_parts_len = 2
|
||||
if url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
episode_url = compat_urlparse.urljoin(
|
||||
url, episode_attributes['data-canonical'])
|
||||
entries.append(self.url_result(
|
||||
episode_url, 'AENetworks',
|
||||
episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
||||
domain, canonical = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
|
||||
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
[r"media_url\s*=\s*'(?P<url>[^']+)'",
|
||||
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
|
||||
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
||||
webpage, 'video url', group='url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
return info
|
||||
|
||||
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
def _call_api(self, resource, slug, brand, fields):
|
||||
return self._download_json(
|
||||
'https://yoga.appsvcs.aetnd.com/graphql',
|
||||
slug, query={'brand': brand}, data=urlencode_postdata({
|
||||
'query': '''{
|
||||
%s(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}''' % (resource, slug, fields),
|
||||
}))['data'][resource]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, slug = re.match(self._VALID_URL, url).groups()
|
||||
_, brand = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = 'http://watch.%s' % domain
|
||||
|
||||
entries = []
|
||||
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||
doc = self._get_doc(item)
|
||||
canonical = doc.get('canonical')
|
||||
if not canonical:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
|
||||
|
||||
description = None
|
||||
if self._PLAYLIST_DESCRIPTION_KEY:
|
||||
description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist.get('id'),
|
||||
playlist.get(self._PLAYLIST_TITLE_KEY), description)
|
||||
|
||||
|
||||
class AENetworksCollectionIE(AENetworksListBaseIE):
|
||||
IE_NAME = 'aenetworks:collection'
|
||||
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
|
||||
'info_dict': {
|
||||
'id': '282',
|
||||
'title': 'America The Story of Us',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/mysteryquest',
|
||||
'only_matching': True
|
||||
}]
|
||||
_RESOURCE = 'list'
|
||||
_ITEMS_KEY = 'items'
|
||||
_PLAYLIST_TITLE_KEY = 'display_title'
|
||||
_PLAYLIST_DESCRIPTION_KEY = None
|
||||
_FIELDS = '''id
|
||||
display_title
|
||||
items {
|
||||
... on ListVideoItem {
|
||||
doc {
|
||||
canonical
|
||||
id
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _get_doc(self, item):
|
||||
return item.get('doc') or {}
|
||||
|
||||
|
||||
class AENetworksShowIE(AENetworksListBaseIE):
|
||||
IE_NAME = 'aenetworks:show'
|
||||
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/ancient-aliens',
|
||||
'info_dict': {
|
||||
'id': 'SH012427480000',
|
||||
'title': 'Ancient Aliens',
|
||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||
},
|
||||
'playlist_mincount': 168,
|
||||
}]
|
||||
_RESOURCE = 'series'
|
||||
_ITEMS_KEY = 'episodes'
|
||||
_PLAYLIST_TITLE_KEY = 'title'
|
||||
_PLAYLIST_DESCRIPTION_KEY = 'description'
|
||||
_FIELDS = '''description
|
||||
id
|
||||
title
|
||||
episodes {
|
||||
canonical
|
||||
id
|
||||
}'''
|
||||
|
||||
def _get_doc(self, item):
|
||||
return item
|
||||
|
||||
|
||||
class HistoryTopicIE(AENetworksBaseIE):
|
||||
@@ -204,6 +287,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
'timestamp': 1375819729,
|
||||
'upload_date': '20130806',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -212,36 +296,47 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def theplatform_url_result(self, theplatform_url, video_id, query):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': smuggle_url(
|
||||
update_url_query(theplatform_url, query),
|
||||
{
|
||||
'sig': {
|
||||
'key': self._THEPLATFORM_KEY,
|
||||
'secret': self._THEPLATFORM_SECRET,
|
||||
},
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://www.history.com/videos/' + display_id,
|
||||
AENetworksIE.ie_key())
|
||||
|
||||
|
||||
class HistoryPlayerIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_aetn_info(domain, 'id', video_id, url)
|
||||
|
||||
|
||||
class BiographyIE(AENetworksBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
|
||||
'info_dict': {
|
||||
'id': '30322987',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vincent Van Gogh - Full Episode',
|
||||
'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
|
||||
'timestamp': 1311970571,
|
||||
'upload_date': '20110729',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/history/videos',
|
||||
video_id, query={'filter[id]': video_id})['results'][0]
|
||||
title = result['title']
|
||||
info = self._extract_aen_smil(result['publicUrl'], video_id)
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': result.get('description'),
|
||||
'duration': int_or_none(result.get('duration')),
|
||||
'timestamp': int_or_none(result.get('added'), 1000),
|
||||
})
|
||||
return info
|
||||
player_url = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||
webpage, 'player URL')
|
||||
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -11,25 +13,22 @@ from ..utils import (
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
|
||||
'info_dict': {
|
||||
'id': 's3MX01Nl4vPH',
|
||||
'id': '4Lq1dzOnZGt0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maron - Season 4 - Step 1',
|
||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
||||
'age_limit': 17,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462468831,
|
||||
'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
|
||||
'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
|
||||
'upload_date': '20201120',
|
||||
'timestamp': 1605904350,
|
||||
'uploader': 'AMCN',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires TV provider accounts',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
@@ -55,32 +54,34 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_REQUESTOR_ID_MAP = {
|
||||
'amc': 'AMC',
|
||||
'bbcamerica': 'BBCA',
|
||||
'ifc': 'IFC',
|
||||
'sundancetv': 'SUNDANCE',
|
||||
'wetv': 'WETV',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
properties = self._download_json(
|
||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
|
||||
display_id)['data']['properties']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(
|
||||
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'link\.theplatform\.com/s/([^?]+)',
|
||||
media_url, 'theplatform_path'), display_id)
|
||||
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
auth_required = self._search_regex(
|
||||
r'window\.authRequired\s*=\s*(true|false);',
|
||||
webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(
|
||||
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'requestor id')
|
||||
video_category = properties.get('videoCategory')
|
||||
if video_category and video_category.endswith('-Auth'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
|
||||
@@ -1,33 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
'info_dict': {
|
||||
'id': '5b400b9ee338f922cb06450c',
|
||||
'title': 'Weeknight Japanese Suppers',
|
||||
'title': 'Japanese Suppers',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523664000,
|
||||
'upload_date': '20180414',
|
||||
'release_date': '20180414',
|
||||
'release_date': '20180410',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
'episode': 'Weeknight Japanese Suppers',
|
||||
'episode': 'Japanese Suppers',
|
||||
'episode_number': 15,
|
||||
},
|
||||
'params': {
|
||||
@@ -36,47 +36,31 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
resource_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
is_episode = resource_type == 'episode'
|
||||
if is_episode:
|
||||
resource_type = 'episodes'
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
webpage, 'initial context'),
|
||||
video_id, js_to_json)
|
||||
|
||||
ep_data = try_get(
|
||||
video_data,
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
|
||||
zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
'description') or ep_meta.get('description'))
|
||||
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
||||
release_date = unified_strdate(ep_data.get('aired_at'))
|
||||
|
||||
season_number = int_or_none(ep_meta.get('season_number'))
|
||||
episode = ep_meta.get('title')
|
||||
episode_number = int_or_none(ep_meta.get('episode_number'))
|
||||
resource = self._download_json(
|
||||
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
|
||||
video = resource['video'] if is_episode else resource
|
||||
episode = resource if is_episode else resource.get('episode') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'ie_key': 'Zype',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'release_date': release_date,
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'description': clean_html(video.get('description')),
|
||||
'release_date': unified_strdate(video.get('publishDate')),
|
||||
'series': try_get(episode, lambda x: x['show']['title']),
|
||||
'episode': episode.get('title'),
|
||||
}
|
||||
|
||||
@@ -116,7 +116,76 @@ class AnvatoIE(InfoExtractor):
|
||||
'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
|
||||
'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
|
||||
'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||
'X8POa4zPPaKVZHqmWjuEzfP31b1QM9VN': 'Dn5vOY9ooDw7VSl9qztjZI5o0g08mA0z',
|
||||
'M2v78QkBMpNJlSPp9diX5F2PBmBy6Bog': 'ka6K32kyo7nDZfNkjQCGWf1lpApXMd1B',
|
||||
'bvJ0dQpav07l0hG5JgfVLF2dv1vARwpP': 'BzoQW24GrJZoJfmNodiJKSPeB9B8NOxj',
|
||||
'lxQMLg2XZKuEZaWgsqubBxV9INZ6bryY': 'Vm2Mx6noKds9jB71h6urazwlTG3m9x8l',
|
||||
'04EnjvXeoSmkbJ9ckPs7oY0mcxv7PlyN': 'aXERQP9LMfQVlEDsgGs6eEA1SWznAQ8P',
|
||||
'mQbO2ge6BFRWVPYCYpU06YvNt80XLvAX': 'E2BV1NGmasN5v7eujECVPJgwflnLPm2A',
|
||||
'g43oeBzJrCml7o6fa5fRL1ErCdeD8z4K': 'RX34mZ6zVH4Nr6whbxIGLv9WSbxEKo8V',
|
||||
'VQrDJoP7mtdBzkxhXbSPwGB1coeElk4x': 'j2VejQx0VFKQepAF7dI0mJLKtOVJE18z',
|
||||
'WxA5NzLRjCrmq0NUgaU5pdMDuZO7RJ4w': 'lyY5ADLKaIOLEgAsGQCveEMAcqnx3rY9',
|
||||
'M4lpMXB71ie0PjMCjdFzVXq0SeRVqz49': 'n2zVkOqaLIv3GbLfBjcwW51LcveWOZ2e',
|
||||
'dyDZGEqN8u8nkJZcJns0oxYmtP7KbGAn': 'VXOEqQW9BtEVLajfZQSLEqxgS5B7qn2D',
|
||||
'E7QNjrVY5u5mGvgu67IoDgV1CjEND8QR': 'rz8AaDmdKIkLmPNhB5ILPJnjS5PnlL8d',
|
||||
'a4zrqjoKlfzg0dwHEWtP31VqcLBpjm4g': 'LY9J16gwETdGWa3hjBu5o0RzuoQDjqXQ',
|
||||
'dQP5BZroMsMVLO1hbmT5r2Enu86GjxA6': '7XR3oOdbPF6x3PRFLDCq9RkgsRjAo48V',
|
||||
'M4lKNBO1NFe0PjMCj1tzVXq0SeRVqzA9': 'n2zoRqGLRUv3GbLfBmTwW51LcveWOZYe',
|
||||
'nAZ7MZdpGCGg1pqFEbsoJOz2C60mv143': 'dYJgdqA9aT4yojETqGi7yNgoFADxqmXP',
|
||||
'3y1MERYgOuE9NzbFgwhV6Wv2F0YKvbyz': '081xpZDQgC4VadLTavhWQxrku56DAgXV',
|
||||
'bmQvmEXr5HWklBMCZOcpE2Z3HBYwqGyl': 'zxXPbVNyMiMAZldhr9FkOmA0fl4aKr2v',
|
||||
'wA7oDNYldfr6050Hwxi52lPZiVlB86Ap': 'ZYK16aA7ni0d3l3c34uwpxD7CbReMm8Q',
|
||||
'g43MbKMWmFml7o7sJoSRkXxZiXRvJ3QK': 'RX3oBJonvs4Nr6rUWBCGn3matRGqJPXV',
|
||||
'mA9VdlqpLS0raGaSDvtoqNrBTzb8XY4q': '0XN4OjBD3fnW7r7IbmtJB4AyfOmlrE2r',
|
||||
'mAajOwgkGt17oGoFmEuklMP9H0GnW54d': 'lXbBLPGyzikNGeGujAuAJGjZiwLRxyXR',
|
||||
'vy8vjJ9kbUwrRqRu59Cj5dWZfzYErlAb': 'K8l7gpwaGcBpnAnCLNCmPZRdin3eaQX0',
|
||||
'xQMWBpR8oHEZaWaSMGUb0avOHjLVYn4Y': 'm2MrN4vEaf9jB7BFy5Srb40jTrN67AYl',
|
||||
'xyKEmVO3miRr6D6UVkt7oB8jtD6aJEAv': 'g2ddDebqDfqdgKgswyUKwGjbTWwzq923',
|
||||
'7Qk0wa2D9FjKapacoJF27aLvUDKkLGA0': 'b2kgBEkephJaMkMTL7s1PLe4Ua6WyP2P',
|
||||
'3QLg6nqmNTJ5VvVTo7f508LPidz1xwyY': 'g2L1GgpraipmAOAUqmIbBnPxHOmw4MYa',
|
||||
'3y1B7zZjXTE9NZNSzZSVNPZaTNLjo6Qz': '081b5G6wzH4VagaURmcWbN5mT4JGEe2V',
|
||||
'lAqnwvkw6SG6D8DSqmUg6DRLUp0w3G4x': 'O2pbP0xPDFNJjpjIEvcdryOJtpkVM4X5',
|
||||
'awA7xd1N0Hr6050Hw2c52lPZiVlB864p': 'GZYKpn4aoT0d3l3c3PiwpxD7CbReMmXQ',
|
||||
'jQVqPLl9YHL1WGWtR1HDgWBGT63qRNyV': '6X03ne6vrU4oWyWUN7tQVoajikxJR3Ye',
|
||||
'GQRMR8mL7uZK797t7xH3eNzPIP5dOny1': 'm2vqPWGd4U31zWzSyasDRAoMT1PKRp8o',
|
||||
'zydq9RdmRhXLkNkfNoTJlMzaF0lWekQB': '3X7LnvE7vH5nkEkSqLiey793Un7dLB8e',
|
||||
'VQrDzwkB2IdBzjzu9MHPbEYkSB50gR4x': 'j2VebLzoKUKQeEesmVh0gM1eIp9jKz8z',
|
||||
'mAa2wMamBs17oGoFmktklMP9H0GnW54d': 'lXbgP74xZTkNGeGujVUAJGjZiwLRxy8R',
|
||||
'7yjB6ZLG6sW8R6RF2xcan1KGfJ5dNoyd': 'wXQkPorvPHZ45N5t4Jf6qwg5Tp4xvw29',
|
||||
'a4zPpNeWGuzg0m0iX3tPeanGSkRKWXQg': 'LY9oa3QAyHdGW9Wu3Ri5JGeEik7l1N8Q',
|
||||
'k2rneA2M38k25cXDwwSknTJlxPxQLZ6M': '61lyA2aEVDzklfdwmmh31saPxQx2VRjp',
|
||||
'bK9Zk4OvPnvxduLgxvi8VUeojnjA02eV': 'o5jANYjbeMb4nfBaQvcLAt1jzLzYx6ze',
|
||||
'5VD6EydM3R9orHmNMGInGCJwbxbQvGRw': 'w3zjmX7g4vnxzCxElvUEOiewkokXprkZ',
|
||||
'70X35QbVYVYNPUmP9YfbzI06YqYQk2R1': 'vG4Aj2BMjMjoztB7zeFOnCVPJpJ8lMOa',
|
||||
'26qYwQVG9p1Bks2GgBckjfDJOXOAMgG1': 'r4ev9X0mv5zqJc0yk5IBDcQOwZw8mnwQ',
|
||||
'rvVKpA56MBXWlSxMw3cobT5pdkd4Dm7q': '1J7ZkY53pZ645c93owcLZuveE7E8B3rL',
|
||||
'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo': 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo',
|
||||
'jdKqRGF16dKsBviMDae7IGDl7oTjEbVV': 'Q09l7vhlNxPFErIOK6BVCe7KnwUW5DVV',
|
||||
'3QLkogW1OUJ5VvPsrDH56DY2u7lgZWyY': 'g2LRE1V9espmAOPhE4ubj4ZdUA57yDXa',
|
||||
'wyJvWbXGBSdbkEzhv0CW8meou82aqRy8': 'M2wolPvyBIpQGkbT4juedD4ruzQGdK2y',
|
||||
'7QkdZrzEkFjKap6IYDU2PB0oCNZORmA0': 'b2kN1l96qhJaMkPs9dt1lpjBfwqZoA8P',
|
||||
'pvA05113MHG1w3JTYxc6DVlRCjErVz4O': 'gQXeAbblBUnDJ7vujbHvbRd1cxlz3AXO',
|
||||
'mA9blJDZwT0raG1cvkuoeVjLC7ZWd54q': '0XN9jRPwMHnW7rvumgfJZOD9CJgVkWYr',
|
||||
'5QwRN5qKJTvGKlDTmnf7xwNZcjRmvEy9': 'R2GP6LWBJU1QlnytwGt0B9pytWwAdDYy',
|
||||
'eyn5rPPbkfw2KYxH32fG1q58CbLJzM40': 'p2gyqooZnS56JWeiDgfmOy1VugOQEBXn',
|
||||
'3BABn3b5RfPJGDwilbHe7l82uBoR05Am': '7OYZG7KMVhbPdKJS3xcWEN3AuDlLNmXj',
|
||||
'xA5zNGXD3HrmqMlF6OS5pdMDuZO7RJ4w': 'yY5DAm6r1IOLE3BCVMFveEMAcqnx3r29',
|
||||
'g43PgW3JZfml7o6fDEURL1ErCdeD8zyK': 'RX3aQn1zrS4Nr6whDgCGLv9WSbxEKo2V',
|
||||
'lAqp8WbGgiG6D8LTKJcg3O72CDdre1Qx': 'O2pnm6473HNJjpKuVosd3vVeh975yrX5',
|
||||
'wyJbYEDxKSdbkJ6S6RhW8meou82aqRy8': 'M2wPm7EgRSpQGlAh70CedD4ruzQGdKYy',
|
||||
'M4lgW28nLCe0PVdtaXszVXq0SeRVqzA9': 'n2zmJvg4jHv3G0ETNgiwW51LcveWOZ8e',
|
||||
'5Qw3OVvp9FvGKlDTmOC7xwNZcjRmvEQ9': 'R2GzDdml9F1Qlnytw9s0B9pytWwAdD8y',
|
||||
'vy8a98X7zCwrRqbHrLUjYzwDiK2b70Qb': 'K8lVwzyjZiBpnAaSGeUmnAgxuGOBxmY0',
|
||||
'g4eGjJLLoiqRD3Pf9oT5O03LuNbLRDQp': '6XqD59zzpfN4EwQuaGt67qNpSyRBlnYy',
|
||||
'g43OPp9boIml7o6fDOIRL1ErCdeD8z4K': 'RX33alNB4s4Nr6whDPUGLv9WSbxEKoXV',
|
||||
'xA2ng9OkBcGKzDbTkKsJlx7dUK8R3dA5': 'z2aPnJvzBfObkwGC3vFaPxeBhxoMqZ8K',
|
||||
'xyKEgBajZuRr6DEC0Kt7XpD1cnNW9gAv': 'g2ddlEBvRsqdgKaI4jUK9PrgfMexGZ23',
|
||||
'BAogww51jIMa2JnH1BcYpXM5F658RNAL': 'rYWDmm0KptlkGv4FGJFMdZmjs9RDE6XR',
|
||||
'BAokpg62VtMa2JnH1mHYpXM5F658RNAL': 'rYWryDnlNslkGv4FG4HMdZmjs9RDE62R',
|
||||
'a4z1Px5e2hzg0m0iMMCPeanGSkRKWXAg': 'LY9eorNQGUdGW9WuKKf5JGeEik7l1NYQ',
|
||||
'kAx69R58kF9nY5YcdecJdl2pFXP53WyX': 'gXyRxELpbfPvLeLSaRil0mp6UEzbZJ8L',
|
||||
'BAoY13nwViMa2J2uo2cY6BlETgmdwryL': 'rYWwKzJmNFlkGvGtNoUM9bzwIJVzB1YR',
|
||||
}
|
||||
|
||||
_MCP_TO_ACCESS_KEY_TABLE = {
|
||||
@@ -189,19 +258,17 @@ class AnvatoIE(InfoExtractor):
|
||||
|
||||
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
||||
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
||||
payload = {
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
|
||||
'anvts': server_time,
|
||||
},
|
||||
api = {
|
||||
'anvrid': anvrid,
|
||||
'anvts': server_time,
|
||||
}
|
||||
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
data=json.dumps(payload).encode('utf-8'))
|
||||
data=json.dumps({'api': api}).encode('utf-8'))
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id):
|
||||
video_data = self._get_video_json(access_key, video_id)
|
||||
@@ -259,7 +326,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'description': video_data.get('def_description'),
|
||||
'tags': video_data.get('def_tags', '').split(','),
|
||||
'categories': video_data.get('categories'),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'thumbnail': video_data.get('src_image_url') or video_data.get('thumbnail'),
|
||||
'timestamp': int_or_none(video_data.get(
|
||||
'ts_published') or video_data.get('ts_added')),
|
||||
'uploader': video_data.get('mcp_id'),
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
options = self._parse_json(self._search_regex(
|
||||
r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
|
||||
|
||||
formats = []
|
||||
for sources in player['multiSRC']:
|
||||
for sources in (options.get('multiSRC') or []):
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
@@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
info['title'] = get_element_by_id('videoTitle', webpage) or \
|
||||
self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'duration': int_or_none(options.get('duration')),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
61
youtube_dl/extractor/applepodcasts.py
Normal file
61
youtube_dl/extractor/applepodcasts.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593921600,
|
||||
'duration': 6425,
|
||||
'series': 'The Tim Dillon Show',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id)
|
||||
episode = ember_data['data']['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode['name'],
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
}
|
||||
174
youtube_dl/extractor/arcpublishing.py
Normal file
174
youtube_dl/extractor/arcpublishing.py
Normal file
@@ -0,0 +1,174 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ArcPublishingIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
|
||||
_TESTS = [{
|
||||
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
|
||||
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/
|
||||
'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.actionnewsjax.com/video/live-stream/
|
||||
'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/
|
||||
'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/
|
||||
'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/
|
||||
'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/
|
||||
'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/
|
||||
'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/
|
||||
'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/
|
||||
'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/
|
||||
'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html
|
||||
'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_POWA_DEFAULTS = [
|
||||
(['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
|
||||
([
|
||||
'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo',
|
||||
'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom',
|
||||
'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek',
|
||||
], 'video-api-cdn.%s.arcpublishing.com/api'),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = []
|
||||
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||
powa = extract_attributes(powa_el) or {}
|
||||
org = powa.get('data-org')
|
||||
uuid = powa.get('data-uuid')
|
||||
if org and uuid:
|
||||
entries.append('arcpublishing:%s:%s' % (org, uuid))
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
org, uuid = re.match(self._VALID_URL, url).groups()
|
||||
for orgs, tmpl in self._POWA_DEFAULTS:
|
||||
if org in orgs:
|
||||
base_api_tmpl = tmpl
|
||||
break
|
||||
else:
|
||||
base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api'
|
||||
if org == 'wapo':
|
||||
org = 'washpost'
|
||||
video = self._download_json(
|
||||
'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org),
|
||||
uuid, query={'uuid': uuid})[0]
|
||||
title = video['headlines']['basic']
|
||||
is_live = video.get('status') == 'live'
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for s in video.get('streams', []):
|
||||
s_url = s.get('url')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
stream_type = s.get('stream_type')
|
||||
if stream_type == 'smil':
|
||||
smil_formats = self._extract_smil_formats(
|
||||
s_url, uuid, fatal=False)
|
||||
for f in smil_formats:
|
||||
if f['url'].endswith('/cfx/st'):
|
||||
f['app'] = 'cfx/st'
|
||||
if not f['play_path'].startswith('mp4:'):
|
||||
f['play_path'] = 'mp4:' + f['play_path']
|
||||
if isinstance(f['tbr'], float):
|
||||
f['vbr'] = f['tbr'] * 1000
|
||||
del f['tbr']
|
||||
f['format_id'] = 'rtmp-%d' % f['vbr']
|
||||
formats.extend(smil_formats)
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
if f.get('acodec') == 'none':
|
||||
f['preference'] = -40
|
||||
elif f.get('vcodec') == 'none':
|
||||
f['preference'] = -50
|
||||
height = f.get('height')
|
||||
if not height:
|
||||
continue
|
||||
vbr = self._search_regex(
|
||||
r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None)
|
||||
if vbr:
|
||||
f['vbr'] = int(vbr)
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
'filesize': int_or_none(s.get('filesize')),
|
||||
'url': s_url,
|
||||
'preference': -1,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id'))
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'thumbnail': try_get(video, lambda x: x['promo_image']['url']),
|
||||
'description': try_get(video, lambda x: x['subheadlines']['basic']),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video.get('duration'), 100),
|
||||
'timestamp': parse_iso8601(video.get('created_date')),
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
}
|
||||
@@ -6,13 +6,11 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,22 +18,27 @@ class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
video\.arkena\.com/play2/embed/player\?|
|
||||
video\.(?:arkena|qbrick)\.com/play2/embed/player\?|
|
||||
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
|
||||
'md5': '97f117754e5f3c020f5f26da4a44ebaf',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'id': 'd8ab4607-00090107-aab86310',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
'title': 'EM_HT20_117_roslund_v2.mp4',
|
||||
'timestamp': 1608285912,
|
||||
'upload_date': '20201218',
|
||||
'duration': 1429.162667,
|
||||
'subtitles': {
|
||||
'sv': 'count:3',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||
'only_matching': True,
|
||||
@@ -72,62 +75,89 @@ class ArkenaIE(InfoExtractor):
|
||||
if not video_id or not account_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||
media = self._download_json(
|
||||
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
|
||||
video_id, query={
|
||||
# https://video.qbrick.com/docs/api/examples/library-api.html
|
||||
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
|
||||
})
|
||||
metadata = media.get('metadata') or {}
|
||||
title = metadata['title']
|
||||
|
||||
media_info = playlist['MediaInfo']
|
||||
title = media_info['Title']
|
||||
media_files = playlist['MediaFiles']
|
||||
|
||||
is_live = False
|
||||
duration = None
|
||||
formats = []
|
||||
for kind_case, kind_formats in media_files.items():
|
||||
kind = kind_case.lower()
|
||||
for f in kind_formats:
|
||||
f_url = f.get('Url')
|
||||
if not f_url:
|
||||
continue
|
||||
is_live = f.get('Live') == 'true'
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f_url, video_id, f4m_id=kind, fatal=False))
|
||||
elif kind == 'dash' or 'mpd' in exts:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
f_url, video_id, mpd_id=kind, fatal=False))
|
||||
elif kind == 'silverlight':
|
||||
# TODO: process when ism is supported (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8118)
|
||||
continue
|
||||
else:
|
||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||
'tbr': tbr,
|
||||
})
|
||||
thumbnails = []
|
||||
subtitles = {}
|
||||
for resource in media['asset']['resources']:
|
||||
for rendition in (resource.get('renditions') or []):
|
||||
rendition_type = rendition.get('type')
|
||||
for i, link in enumerate(rendition.get('links') or []):
|
||||
href = link.get('href')
|
||||
if not href:
|
||||
continue
|
||||
if rendition_type == 'image':
|
||||
thumbnails.append({
|
||||
'filesize': int_or_none(rendition.get('size')),
|
||||
'height': int_or_none(rendition.get('height')),
|
||||
'id': rendition.get('id'),
|
||||
'url': href,
|
||||
'width': int_or_none(rendition.get('width')),
|
||||
})
|
||||
elif rendition_type == 'subtitle':
|
||||
subtitles.setdefault(rendition.get('language') or 'en', []).append({
|
||||
'url': href,
|
||||
})
|
||||
elif rendition_type == 'video':
|
||||
f = {
|
||||
'filesize': int_or_none(rendition.get('size')),
|
||||
'format_id': rendition.get('id'),
|
||||
'url': href,
|
||||
}
|
||||
video = try_get(rendition, lambda x: x['videos'][i], dict)
|
||||
if video:
|
||||
if not duration:
|
||||
duration = float_or_none(video.get('duration'))
|
||||
f.update({
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000),
|
||||
'vcodec': video.get('codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
})
|
||||
audio = try_get(video, lambda x: x['audios'][0], dict)
|
||||
if audio:
|
||||
f.update({
|
||||
'acodec': audio.get('codec'),
|
||||
'asr': int_or_none(audio.get('sampleRate')),
|
||||
})
|
||||
formats.append(f)
|
||||
elif rendition_type == 'index':
|
||||
mime_type = link.get('mimeType')
|
||||
if mime_type == 'application/smil+xml':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
href, video_id, fatal=False))
|
||||
elif mime_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif mime_type == 'application/hds+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media_info.get('Description')
|
||||
video_id = media_info.get('VideoId') or video_id
|
||||
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||
thumbnails = [{
|
||||
'url': thumbnail['Url'],
|
||||
'width': int_or_none(thumbnail.get('Size')),
|
||||
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'description': metadata.get('description'),
|
||||
'timestamp': parse_iso8601(media.get('created')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'duration': duration,
|
||||
'tags': media.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -1,27 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import extract_attributes
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_age_limit,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
|
||||
class AsianCrushBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
|
||||
_KALTURA_KEYS = [
|
||||
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
|
||||
'widescreen_thumbnail_url', 'screencap_widescreen',
|
||||
]
|
||||
_API_SUFFIX = {'retrocrush.tv': '-ott'}
|
||||
|
||||
def _call_api(self, host, endpoint, video_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query,
|
||||
headers=self.geo_verification_headers())['objects']
|
||||
|
||||
def _download_object_data(self, host, object_id, resource):
|
||||
return self._call_api(
|
||||
host, 'search', object_id, {'id': object_id}, resource)[0]
|
||||
|
||||
def _get_object_description(self, obj):
|
||||
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
|
||||
|
||||
def _parse_video_data(self, video):
|
||||
title = video['name']
|
||||
|
||||
entry_id, partner_id = [None] * 2
|
||||
for k in self._KALTURA_KEYS:
|
||||
k_url = video.get(k)
|
||||
if k_url:
|
||||
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
|
||||
if mobj:
|
||||
partner_id, entry_id = mobj.groups()
|
||||
break
|
||||
|
||||
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
|
||||
categories = list(filter(None, [c.get('name') for c in meta_categories]))
|
||||
|
||||
show_info = video.get('show_info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': self._get_object_description(video),
|
||||
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
|
||||
'categories': categories,
|
||||
'series': show_info.get('show_name'),
|
||||
'season_number': int_or_none(show_info.get('season_num')),
|
||||
'season_id': show_info.get('season_id'),
|
||||
'episode_number': int_or_none(show_info.get('episode_num')),
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:7e986615808bcfb11756eb503a751487',
|
||||
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
'age_limit': 13,
|
||||
'categories': 'count:5',
|
||||
'duration': 5812,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
@@ -41,67 +105,35 @@ class AsianCrushIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
entry_id, partner_id, title = [None] * 3
|
||||
|
||||
vars = self._parse_json(
|
||||
self._search_regex(
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False)
|
||||
if vars:
|
||||
entry_id = vars.get('entry_id')
|
||||
partner_id = vars.get('partner_id')
|
||||
title = vars.get('vid_label')
|
||||
default='{}'), video_id, fatal=False) or {}
|
||||
video_id = embed_vars.get('entry_id') or video_id
|
||||
|
||||
if not entry_id:
|
||||
entry_id = self._search_regex(
|
||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||
|
||||
player = self._download_webpage(
|
||||
'https://api.%s/embeddedVideoPlayer' % host, video_id,
|
||||
query={'id': entry_id})
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
|
||||
'kaltura id', group='id')
|
||||
|
||||
if not partner_id:
|
||||
partner_id = self._search_regex(
|
||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||
default='513551')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
video = self._download_object_data(host, video_id, 'video')
|
||||
return self._parse_video_data(video)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
|
||||
class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
|
||||
'info_dict': {
|
||||
'id': '12481',
|
||||
'title': 'Scholar Who Walks the Night',
|
||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||
'id': '6447',
|
||||
'title': 'Fruity Samurai',
|
||||
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
@@ -111,35 +143,58 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 1000000000
|
||||
|
||||
def _fetch_page(self, domain, parent_id, page):
|
||||
videos = self._call_api(
|
||||
domain, 'getreferencedobjects', parent_id, {
|
||||
'max': self._PAGE_SIZE,
|
||||
'object_type': 'video',
|
||||
'parent_id': parent_id,
|
||||
'start': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in videos:
|
||||
yield self._parse_video_data(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
host, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
else:
|
||||
show = self._download_object_data(host, playlist_id, 'show')
|
||||
title = show.get('name')
|
||||
description = self._get_object_description(show)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, host, playlist_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
@@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||
_NETRC_MACHINE = 'bbc'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
_MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
|
||||
_MEDIA_SETS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
'iptv-all',
|
||||
'pc',
|
||||
]
|
||||
|
||||
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
||||
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||
|
||||
_NAMESPACES = (
|
||||
_MEDIASELECTION_NS,
|
||||
_EMP_PLAYLIST_NS,
|
||||
)
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
@@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
@@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
|
||||
def _findall_ns(self, element, xpath):
|
||||
elements = []
|
||||
for ns in self._NAMESPACES:
|
||||
elements.extend(element.findall(xpath % ns))
|
||||
return elements
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
|
||||
if error is None:
|
||||
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
|
||||
if error is not None:
|
||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
||||
return self._findall_ns(media_selection, './{%s}media')
|
||||
error = media_selection.get('result')
|
||||
if error:
|
||||
raise BBCCoUkIE.MediaSelectionError(error)
|
||||
return media_selection.get('media') or []
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return self._findall_ns(media, './{%s}connection')
|
||||
return media.get('connection') or []
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
@@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||
if not isinstance(captions, compat_etree_Element):
|
||||
continue
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
subtitles['en'] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
break
|
||||
return subtitles
|
||||
|
||||
def _raise_extractor_error(self, media_selection_error):
|
||||
@@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
last_exception = None
|
||||
for mediaselector_url in self._MEDIASELECTOR_URLS:
|
||||
for media_set in self._MEDIA_SETS:
|
||||
try:
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
||||
last_exception = e
|
||||
@@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML',
|
||||
media_selection = self._download_json(
|
||||
url, programme_id, 'Downloading media selection JSON',
|
||||
expected_status=(403, 404))
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
@@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if kind in ('video', 'audio'):
|
||||
bitrate = int_or_none(media.get('bitrate'))
|
||||
encoding = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
@@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
supplier = connection.get('supplier')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
format_id = supplier or conn_kind or protocol
|
||||
if service:
|
||||
format_id = '%s_%s' % (service, format_id)
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
@@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
if f.get('height') and f['height'] > 720:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
if not service and not supplier and bitrate:
|
||||
if not supplier and bitrate:
|
||||
format_id += '-%d' % bitrate
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
@@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
error = self._search_regex(
|
||||
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
|
||||
r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
@@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams but fails with geolocation in some cases when it's
|
||||
# even not geo restricted at all
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
||||
# notukerror for non UK (?) users (e.g.
|
||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
|
||||
# Provides fewer formats, but works everywhere for everybody (hopefully)
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
|
||||
_MEDIA_SETS = [
|
||||
'mobile-tablet-main',
|
||||
'pc',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
|
||||
@@ -1,194 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class BeamProBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://mixer.com/api/v1'
|
||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||
|
||||
def _extract_channel_info(self, chan):
|
||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||
return {
|
||||
'uploader': chan.get('token') or try_get(
|
||||
chan, lambda x: x['user']['username'], compat_str),
|
||||
'uploader_id': compat_str(user_id) if user_id else None,
|
||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||
}
|
||||
|
||||
|
||||
class BeamProLiveIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:live'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://mixer.com/niterhayven',
|
||||
'info_dict': {
|
||||
'id': '261562',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
|
||||
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
|
||||
'thumbnail': r're:https://.*\.jpg$',
|
||||
'timestamp': 1483477281,
|
||||
'upload_date': '20170103',
|
||||
'uploader': 'niterhayven',
|
||||
'uploader_id': '373396',
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'niterhayven is offline',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
|
||||
chan = self._download_json(
|
||||
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
||||
|
||||
if chan.get('online') is False:
|
||||
raise ExtractorError(
|
||||
'{0} is offline'.format(channel_name), expected=True)
|
||||
|
||||
channel_id = chan['id']
|
||||
|
||||
def manifest_url(kind):
|
||||
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
||||
fatal=False)
|
||||
formats.extend(self._extract_smil_formats(
|
||||
manifest_url('smil'), channel_name, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': compat_str(chan.get('id') or channel_name),
|
||||
'title': self._live_title(chan.get('name') or channel_name),
|
||||
'description': clean_html(chan.get('description')),
|
||||
'thumbnail': try_get(
|
||||
chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||
'is_live': True,
|
||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||
'formats': formats,
|
||||
}
|
||||
info.update(self._extract_channel_info(chan))
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class BeamProVodIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:vod'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||
'info_dict': {
|
||||
'id': '2259830',
|
||||
'ext': 'mp4',
|
||||
'title': 'willow8714\'s Channel',
|
||||
'duration': 6828.15,
|
||||
'thumbnail': r're:https://.*source\.png$',
|
||||
'timestamp': 1494046474,
|
||||
'upload_date': '20170506',
|
||||
'uploader': 'willow8714',
|
||||
'uploader_id': '6085379',
|
||||
'age_limit': 13,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_format(vod, vod_type):
|
||||
if not vod.get('baseUrl'):
|
||||
return []
|
||||
|
||||
if vod_type == 'hls':
|
||||
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
||||
elif vod_type == 'raw':
|
||||
filename, protocol = 'source.mp4', 'https'
|
||||
else:
|
||||
assert False
|
||||
|
||||
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
||||
|
||||
format_id = [vod_type]
|
||||
if isinstance(data.get('Height'), compat_str):
|
||||
format_id.append('%sp' % data['Height'])
|
||||
|
||||
return [{
|
||||
'url': urljoin(vod['baseUrl'], filename),
|
||||
'format_id': '-'.join(format_id),
|
||||
'ext': 'mp4',
|
||||
'protocol': protocol,
|
||||
'width': int_or_none(data.get('Width')),
|
||||
'height': int_or_none(data.get('Height')),
|
||||
'fps': int_or_none(data.get('Fps')),
|
||||
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
vod_id = self._match_id(url)
|
||||
|
||||
vod_info = self._download_json(
|
||||
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
||||
|
||||
state = vod_info.get('state')
|
||||
if state != 'AVAILABLE':
|
||||
raise ExtractorError(
|
||||
'VOD %s is not available (state: %s)' % (vod_id, state),
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
thumbnail_url = None
|
||||
|
||||
for vod in vod_info['vods']:
|
||||
vod_type = vod.get('format')
|
||||
if vod_type in ('hls', 'raw'):
|
||||
formats.extend(self._extract_format(vod, vod_type))
|
||||
elif vod_type == 'thumbnail':
|
||||
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': vod_id,
|
||||
'title': vod_info.get('name') or vod_id,
|
||||
'duration': float_or_none(vod_info.get('duration')),
|
||||
'thumbnail': thumbnail_url,
|
||||
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
||||
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
||||
'formats': formats,
|
||||
}
|
||||
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
||||
|
||||
return info
|
||||
60
youtube_dl/extractor/bongacams.py
Normal file
60
youtube_dl/extractor/bongacams.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
channel_id = mobj.group('id')
|
||||
|
||||
amf = self._download_json(
|
||||
'https://%s/tools/amf.php' % host, channel_id,
|
||||
data=urlencode_postdata((
|
||||
('method', 'getRoomData'),
|
||||
('args[]', channel_id),
|
||||
('args[]', 'false'),
|
||||
)), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
|
||||
server_url = amf['localData']['videoServerUrl']
|
||||
|
||||
uploader_id = try_get(
|
||||
amf, lambda x: x['performerData']['username'], compat_str) or channel_id
|
||||
uploader = try_get(
|
||||
amf, lambda x: x['performerData']['displayName'], compat_str)
|
||||
like_count = int_or_none(try_get(
|
||||
amf, lambda x: x['performerData']['loversCount']))
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
|
||||
channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(uploader or uploader_id),
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
@@ -470,13 +471,18 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
num_drm_sources = 0
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
sources = json_data.get('sources') or []
|
||||
for source in sources:
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
if container == 'WVM' or source.get('key_systems'):
|
||||
num_drm_sources += 1
|
||||
continue
|
||||
elif ext == 'ism':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
@@ -533,20 +539,15 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if not formats:
|
||||
errors = json_data.get('errors')
|
||||
if errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if sources and num_drm_sources == len(sources):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -600,24 +601,27 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
config, lambda x: x['video_cloud']['policy_key'])
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
webpage = self._download_webpage(
|
||||
base_url + 'index.min.js', video_id)
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
||||
@@ -11,7 +11,47 @@ from ..utils import (
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mcp_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
||||
|
||||
|
||||
class CBSLocalArticleIE(AnvatoIE):
|
||||
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
@@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -96,7 +96,10 @@ class CNNIE(TurnerBaseIE):
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
}
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
|
||||
@@ -336,8 +336,8 @@ class InfoExtractor(object):
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "id", "title", "description", "uploader",
|
||||
"uploader_id", "uploader_url" attributes with the same semantics as videos
|
||||
(see above).
|
||||
"uploader_id", "uploader_url", "duration" attributes with the same semantics
|
||||
as videos (see above).
|
||||
|
||||
|
||||
_type "multi_video" indicates that there are multiple videos that
|
||||
@@ -1237,8 +1237,16 @@ class InfoExtractor(object):
|
||||
'ViewAction': 'view',
|
||||
}
|
||||
|
||||
def extract_interaction_type(e):
|
||||
interaction_type = e.get('interactionType')
|
||||
if isinstance(interaction_type, dict):
|
||||
interaction_type = interaction_type.get('@type')
|
||||
return str_or_none(interaction_type)
|
||||
|
||||
def extract_interaction_statistic(e):
|
||||
interaction_statistic = e.get('interactionStatistic')
|
||||
if isinstance(interaction_statistic, dict):
|
||||
interaction_statistic = [interaction_statistic]
|
||||
if not isinstance(interaction_statistic, list):
|
||||
return
|
||||
for is_e in interaction_statistic:
|
||||
@@ -1246,8 +1254,8 @@ class InfoExtractor(object):
|
||||
continue
|
||||
if is_e.get('@type') != 'InteractionCounter':
|
||||
continue
|
||||
interaction_type = is_e.get('interactionType')
|
||||
if not isinstance(interaction_type, compat_str):
|
||||
interaction_type = extract_interaction_type(is_e)
|
||||
if not interaction_type:
|
||||
continue
|
||||
# For interaction count some sites provide string instead of
|
||||
# an integer (as per spec) with non digit characters (e.g. ",")
|
||||
@@ -2513,16 +2521,18 @@ class InfoExtractor(object):
|
||||
# amp-video and amp-audio are very similar to their HTML5 counterparts
|
||||
# so we wll include them right here (see
|
||||
# https://www.ampproject.org/docs/reference/components/amp-video)
|
||||
media_tags = [(media_tag, media_type, '')
|
||||
for media_tag, media_type
|
||||
in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
|
||||
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
||||
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
|
||||
media_tags = [(media_tag, media_tag_name, media_type, '')
|
||||
for media_tag, media_tag_name, media_type
|
||||
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
||||
media_tags.extend(re.findall(
|
||||
# We only allow video|audio followed by a whitespace or '>'.
|
||||
# Allowing more characters may end up in significant slow down (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
|
||||
# http://www.porntrex.com/maps/videositemap.xml).
|
||||
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||
for media_tag, media_type, media_content in media_tags:
|
||||
r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
|
||||
for media_tag, _, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
@@ -2595,6 +2605,13 @@ class InfoExtractor(object):
|
||||
return entries
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||
signed = 'hdnea=' in manifest_url
|
||||
if not signed:
|
||||
# https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
|
||||
manifest_url = re.sub(
|
||||
r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
|
||||
'', manifest_url).strip('?')
|
||||
|
||||
formats = []
|
||||
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
@@ -2614,33 +2631,32 @@ class InfoExtractor(object):
|
||||
hls_host = hosts.get('hls')
|
||||
if hls_host:
|
||||
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
http_host = hosts.get('http')
|
||||
if http_host and 'hdnea=' not in manifest_url:
|
||||
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
|
||||
if http_host and m3u8_formats and not signed:
|
||||
REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
|
||||
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||
qualities_length = len(qualities)
|
||||
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
|
||||
if len(m3u8_formats) in (qualities_length, qualities_length + 1):
|
||||
i = 0
|
||||
http_formats = []
|
||||
for f in formats:
|
||||
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
|
||||
for f in m3u8_formats:
|
||||
if f['vcodec'] != 'none':
|
||||
for protocol in ('http', 'https'):
|
||||
http_f = f.copy()
|
||||
del http_f['manifest_url']
|
||||
http_url = re.sub(
|
||||
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
|
||||
REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
|
||||
http_f.update({
|
||||
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
|
||||
'url': http_url,
|
||||
'protocol': protocol,
|
||||
})
|
||||
http_formats.append(http_f)
|
||||
formats.append(http_f)
|
||||
i += 1
|
||||
formats.extend(http_formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@ from ..utils import (
|
||||
find_xpath_attr,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -98,6 +100,26 @@ class CSpanIE(InfoExtractor):
|
||||
bc_attr['data-bcid'])
|
||||
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
||||
|
||||
def add_referer(formats):
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})['Referer'] = url
|
||||
|
||||
# As of 01.12.2020 this path looks to cover all cases making the rest
|
||||
# of the code unnecessary
|
||||
jwsetup = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if jwsetup:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||
base_url=url)
|
||||
add_referer(info['formats'])
|
||||
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||
return merge_dicts(info, ld_info)
|
||||
|
||||
# Obsolete
|
||||
# We first look for clipid, because clipprog always appears before
|
||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||
@@ -165,6 +187,7 @@ class CSpanIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(
|
||||
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
||||
add_referer(formats)
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': '%s_%d' % (video_id, partnum + 1),
|
||||
|
||||
52
youtube_dl/extractor/ctv.py
Normal file
52
youtube_dl/extractor/ctv.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
|
||||
'info_dict': {
|
||||
'id': '2102249',
|
||||
'ext': 'flv',
|
||||
'title': 'Wednesday, December 23, 2020',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
|
||||
'timestamp': 1608732000,
|
||||
'upload_date': '20201223',
|
||||
'series': 'Your Morning',
|
||||
'season': '2020-2021',
|
||||
'season_number': 5,
|
||||
'episode_number': 88,
|
||||
'tags': ['Your Morning'],
|
||||
'categories': ['Talk Show'],
|
||||
'duration': 7467.126,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
|
||||
'query': '''{
|
||||
resolvedPath(path: "/%s") {
|
||||
lastSegment {
|
||||
content {
|
||||
... on AxisContent {
|
||||
axisId
|
||||
videoPlayerDestCode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['resolvedPath']['lastSegment']['content']
|
||||
video_id = content['axisId']
|
||||
return self.url_result(
|
||||
'9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
|
||||
'NineCNineMedia', video_id)
|
||||
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
@@ -43,7 +43,10 @@ class EpornerIE(InfoExtractor):
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||
'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -57,7 +60,7 @@ class EpornerIE(InfoExtractor):
|
||||
video_id = self._match_id(urlh.geturl())
|
||||
|
||||
hash = self._search_regex(
|
||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?) - EPORNER', webpage, 'title')
|
||||
@@ -115,8 +118,8 @@ class EpornerIE(InfoExtractor):
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
|
||||
webpage, 'view count', fatal=False))
|
||||
r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
|
||||
webpage, 'view count', default=None))
|
||||
|
||||
return merge_dicts(json_ld, {
|
||||
'id': video_id,
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class EveryonesMixtapeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
|
||||
'info_dict': {
|
||||
'id': '5bfseWNmlds',
|
||||
'ext': 'mp4',
|
||||
'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)",
|
||||
'uploader': 'FKR.TV',
|
||||
'uploader_id': 'frenchkissrecords',
|
||||
'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
|
||||
'upload_date': '20081015'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # This is simply YouTube
|
||||
}
|
||||
}, {
|
||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
|
||||
'info_dict': {
|
||||
'id': 'm7m0jJAbMQi',
|
||||
'title': 'Driving',
|
||||
},
|
||||
'playlist_count': 24
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
|
||||
pllist_req = sanitized_Request(pllist_url)
|
||||
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
|
||||
playlist_list = self._download_json(
|
||||
pllist_req, playlist_id, note='Downloading playlist metadata')
|
||||
try:
|
||||
playlist_no = next(playlist['id']
|
||||
for playlist in playlist_list
|
||||
if playlist['code'] == playlist_id)
|
||||
except StopIteration:
|
||||
raise ExtractorError('Playlist id not found')
|
||||
|
||||
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
|
||||
pl_req = sanitized_Request(pl_url)
|
||||
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
playlist = self._download_json(
|
||||
pl_req, playlist_id, note='Downloading playlist info')
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': t['url'],
|
||||
'title': t['title'],
|
||||
} for t in playlist['tracks']]
|
||||
|
||||
if mobj.group('songnr'):
|
||||
songnr = int(mobj.group('songnr')) - 1
|
||||
return entries[songnr]
|
||||
|
||||
playlist_title = playlist['mixData']['name']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': playlist_title,
|
||||
'entries': entries,
|
||||
}
|
||||
@@ -30,7 +30,11 @@ from .adobetv import (
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aenetworks import (
|
||||
AENetworksIE,
|
||||
AENetworksCollectionIE,
|
||||
AENetworksShowIE,
|
||||
HistoryTopicIE,
|
||||
HistoryPlayerIE,
|
||||
BiographyIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
@@ -51,7 +55,9 @@ from .appletrailers import (
|
||||
AppleTrailersIE,
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .applepodcasts import ApplePodcastsIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDBetaMediathekIE,
|
||||
@@ -89,10 +95,6 @@ from .bbc import (
|
||||
BBCCoUkPlaylistIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beampro import (
|
||||
BeamProLiveIE,
|
||||
BeamProVodIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
@@ -121,6 +123,7 @@ from .bleacherreport import (
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .bpb import BpbIE
|
||||
@@ -165,7 +168,10 @@ from .cbc import (
|
||||
CBCOlympicsIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
)
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
@@ -243,6 +249,7 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
@@ -329,7 +336,6 @@ from .espn import (
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
@@ -393,10 +399,10 @@ from .frontendmasters import (
|
||||
FrontendMastersLessonIE,
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamespot import GameSpotIE
|
||||
@@ -417,7 +423,10 @@ from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlepodcasts import (
|
||||
GooglePodcastsIE,
|
||||
GooglePodcastsFeedIE,
|
||||
)
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
@@ -458,6 +467,10 @@ from .ign import (
|
||||
OneUPIE,
|
||||
PCMagIE,
|
||||
)
|
||||
from .iheart import (
|
||||
IHeartRadioIE,
|
||||
IHeartRadioPodcastIE,
|
||||
)
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
@@ -502,7 +515,6 @@ from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
@@ -531,7 +543,10 @@ from .laola1tv import (
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lbry import LBRYIE
|
||||
from .lbry import (
|
||||
LBRYIE,
|
||||
LBRYChannelIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
@@ -677,9 +692,15 @@ from .nationalgeographic import (
|
||||
NationalGeographicTVIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nba import (
|
||||
NBAWatchEmbedIE,
|
||||
NBAWatchIE,
|
||||
NBAWatchCollectionIE,
|
||||
NBAEmbedIE,
|
||||
NBAIE,
|
||||
NBAChannelIE,
|
||||
)
|
||||
from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
@@ -722,8 +743,14 @@ from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfl import NFLIE
|
||||
from .nhk import NhkVodIE
|
||||
from .nfl import (
|
||||
NFLIE,
|
||||
NFLArticleIE,
|
||||
)
|
||||
from .nhk import (
|
||||
NhkVodIE,
|
||||
NhkVodProgramIE,
|
||||
)
|
||||
from .nhl import NHLIE
|
||||
from .nick import (
|
||||
NickIE,
|
||||
@@ -739,7 +766,6 @@ from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .njpwworld import NJPWWorldIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .noco import NocoIE
|
||||
from .nonktube import NonkTubeIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
@@ -772,6 +798,7 @@ from .nrk import (
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKRadioPodkastIE,
|
||||
NRKTVEpisodeIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeasonIE,
|
||||
@@ -1035,16 +1062,11 @@ from .skynewsarabia import (
|
||||
from .sky import (
|
||||
SkyNewsIE,
|
||||
SkySportsIE,
|
||||
SkySportsNewsIE,
|
||||
)
|
||||
from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
SmotriCommunityIE,
|
||||
SmotriUserIE,
|
||||
SmotriBroadcastIE,
|
||||
)
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sonyliv import SonyLIVIE
|
||||
@@ -1078,7 +1100,10 @@ from .spike import (
|
||||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
)
|
||||
from .stitcher import StitcherIE
|
||||
from .stitcher import (
|
||||
StitcherIE,
|
||||
StitcherShowIE,
|
||||
)
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
@@ -1122,7 +1147,6 @@ from .tagesschau import (
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tass import TassIE
|
||||
from .tastytrade import TastyTradeIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
@@ -1149,6 +1173,7 @@ from .telequebec import (
|
||||
TeleQuebecSquatIE,
|
||||
TeleQuebecEmissionIE,
|
||||
TeleQuebecLiveIE,
|
||||
TeleQuebecVideoIE,
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
@@ -1185,7 +1210,10 @@ from .tnaflix import (
|
||||
EMPFlixIE,
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .toggle import (
|
||||
ToggleIE,
|
||||
MeWatchIE,
|
||||
)
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutv import TouTvIE
|
||||
@@ -1218,7 +1246,14 @@ from .tv2dk import (
|
||||
from .tv2hu import TV2HuIE
|
||||
from .tv4 import TV4IE
|
||||
from .tv5mondeplus import TV5MondePlusIE
|
||||
from .tva import TVAIE
|
||||
from .tv5unis import (
|
||||
TV5UnisVideoIE,
|
||||
TV5UnisIE,
|
||||
)
|
||||
from .tva import (
|
||||
TVAIE,
|
||||
QubIE,
|
||||
)
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesIE,
|
||||
TVANouvellesArticleIE,
|
||||
@@ -1227,6 +1262,7 @@ from .tvc import (
|
||||
TVCIE,
|
||||
TVCArticleIE,
|
||||
)
|
||||
from .tver import TVerIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvn24 import TVN24IE
|
||||
@@ -1403,7 +1439,10 @@ from .vshare import VShareIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import VVVVIDIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
VVVVIDShowIE,
|
||||
)
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .wakanim import WakanimIE
|
||||
@@ -1434,7 +1473,10 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wistia import WistiaIE
|
||||
from .wistia import (
|
||||
WistiaIE,
|
||||
WistiaPlaylistIE,
|
||||
)
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wsj import (
|
||||
WSJIE,
|
||||
@@ -1478,6 +1520,8 @@ from .yandexmusic import (
|
||||
YandexMusicTrackIE,
|
||||
YandexMusicAlbumIE,
|
||||
YandexMusicPlaylistIE,
|
||||
YandexMusicArtistTracksIE,
|
||||
YandexMusicArtistAlbumsIE,
|
||||
)
|
||||
from .yandexvideo import YandexVideoIE
|
||||
from .yapfiles import YapFilesIE
|
||||
@@ -1510,11 +1554,11 @@ from .youtube import (
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zaq1 import Zaq1IE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
EinsUndEinsTVIE,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
@@ -8,6 +9,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@@ -16,14 +18,17 @@ from ..utils import (
|
||||
clean_html,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
parse_count,
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -39,11 +44,13 @@ class FacebookIE(InfoExtractor):
|
||||
photo\.php|
|
||||
video\.php|
|
||||
video/embed|
|
||||
story\.php
|
||||
story\.php|
|
||||
watch(?:/live)?/?
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
groups/[^/]+/permalink/
|
||||
groups/[^/]+/permalink/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
@@ -54,8 +61,6 @@ class FacebookIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = 'facebook'
|
||||
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
@@ -72,6 +77,7 @@ class FacebookIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.video
|
||||
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
@@ -133,6 +139,7 @@ class FacebookIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
||||
'info_dict': {
|
||||
@@ -147,6 +154,7 @@ class FacebookIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
@@ -174,6 +182,7 @@ class FacebookIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
@@ -193,18 +202,23 @@ class FacebookIE(InfoExtractor):
|
||||
'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.mediaset.currMedia.edges
|
||||
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'facebook:544765982287235',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.video.creation_story.attachments[].media
|
||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.video
|
||||
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@@ -212,6 +226,7 @@ class FacebookIE(InfoExtractor):
|
||||
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.video
|
||||
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
|
||||
'info_dict': {
|
||||
'id': '359649331226507',
|
||||
@@ -222,7 +237,64 @@ class FacebookIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
|
||||
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
|
||||
'info_dict': {
|
||||
'id': '106560053808006',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/?v=647537299265662',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
|
||||
'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
|
||||
'info_dict': {
|
||||
'id': '10157667649866271',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
|
||||
'info_dict': {
|
||||
'id': '117576630041613',
|
||||
'ext': 'mp4',
|
||||
# TODO: title can be extracted from video page
|
||||
'title': 'Facebook video #117576630041613',
|
||||
'uploader_id': '189393014416438',
|
||||
'upload_date': '20201123',
|
||||
'timestamp': 1606162592,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/',
|
||||
'info_dict': {
|
||||
'id': '211567722618337',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #211567722618337',
|
||||
'uploader_id': '127875227654254',
|
||||
'upload_date': '20161122',
|
||||
'timestamp': 1479793574,
|
||||
},
|
||||
}, {
|
||||
# data.video.creation_story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/watchparty/211641140192478',
|
||||
'info_dict': {
|
||||
'id': '211641140192478',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
'graphURI': '/api/graphql/'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
@@ -305,23 +377,24 @@ class FacebookIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
def _extract_from_url(self, url, video_id):
|
||||
webpage = self._download_webpage(
|
||||
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
|
||||
|
||||
video_data = None
|
||||
|
||||
def extract_video_data(instances):
|
||||
video_data = []
|
||||
for item in instances:
|
||||
if item[1][0] == 'VideoConfig':
|
||||
if try_get(item, lambda x: x[1][0]) == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id'):
|
||||
return video_item['videoData']
|
||||
video_data.append(video_item['videoData'])
|
||||
return video_data
|
||||
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage,
|
||||
'server js data', default='{}'), video_id, fatal=False)
|
||||
[r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'],
|
||||
webpage, 'server js data', default='{}'), video_id, fatal=False)
|
||||
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||
@@ -331,17 +404,118 @@ class FacebookIE(InfoExtractor):
|
||||
return extract_video_data(try_get(
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = video.get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||
|
||||
def process_formats(formats):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
replay_data = extract_relay_data(_filter)
|
||||
for require in (replay_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
|
||||
webpage, 'js data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
||||
r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX
|
||||
], webpage, 'js data', default='{}'), video_id, js_to_json, False)
|
||||
video_data = extract_from_jsmods_instances(server_js_data)
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
return webpage, False
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
if m_msg is not None:
|
||||
raise ExtractorError(
|
||||
@@ -350,6 +524,43 @@ class FacebookIE(InfoExtractor):
|
||||
elif '>You must log in to continue' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
if not video_data and '/watchparty/' in url:
|
||||
post_data = {
|
||||
'doc_id': 3731964053542869,
|
||||
'variables': json.dumps({
|
||||
'livingRoomID': video_id,
|
||||
}),
|
||||
}
|
||||
|
||||
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
|
||||
if prefetched_data:
|
||||
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
|
||||
if lsd:
|
||||
post_data[lsd['name']] = lsd['value']
|
||||
|
||||
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
|
||||
for define in (relay_data.get('define') or []):
|
||||
if define[0] == 'RelayAPIConfigDefaults':
|
||||
self._api_config = define[2]
|
||||
|
||||
living_room = self._download_json(
|
||||
urljoin(url, self._api_config['graphURI']), video_id,
|
||||
data=urlencode_postdata(post_data))['data']['living_room']
|
||||
|
||||
entries = []
|
||||
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
|
||||
video = try_get(edge, lambda x: x['node']['video']) or {}
|
||||
v_id = video.get('id')
|
||||
if not v_id:
|
||||
continue
|
||||
v_id = compat_str(v_id)
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_PAGE_TEMPLATE % v_id,
|
||||
self.ie_key(), v_id, video.get('name')))
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
# Video info not in first request, do a secondary request using
|
||||
# tahoe player specific URL
|
||||
tahoe_data = self._download_webpage(
|
||||
@@ -379,8 +590,19 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_data:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
|
||||
subtitles = {}
|
||||
if len(video_data) > 1:
|
||||
entries = []
|
||||
for v in video_data:
|
||||
video_url = v[0].get('video_url')
|
||||
if not video_url:
|
||||
continue
|
||||
entries.append(self.url_result(urljoin(
|
||||
url, video_url), self.ie_key(), v[0].get('video_id')))
|
||||
return self.playlist_result(entries, video_id)
|
||||
video_data = video_data[0]
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for f in video_data:
|
||||
format_id = f['stream_type']
|
||||
if f and isinstance(f, dict):
|
||||
@@ -399,22 +621,14 @@ class FacebookIE(InfoExtractor):
|
||||
'url': src,
|
||||
'preference': preference,
|
||||
})
|
||||
dash_manifest = f[0].get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||
extract_dash_manifest(f[0], formats)
|
||||
subtitles_src = f[0].get('subtitles_src')
|
||||
if subtitles_src:
|
||||
subtitles.setdefault('en', []).append({'url': subtitles_src})
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
|
||||
self._sort_formats(formats)
|
||||
process_formats(formats)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
||||
@@ -454,35 +668,13 @@ class FacebookIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
return info_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
|
||||
webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
|
||||
|
||||
if info_dict:
|
||||
return info_dict
|
||||
|
||||
if '/posts/' in url:
|
||||
video_id_json = self._search_regex(
|
||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
|
||||
default='')
|
||||
if video_id_json:
|
||||
entries = [
|
||||
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
||||
for vid in self._parse_json(video_id_json, video_id)]
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
# Single Video?
|
||||
video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
|
||||
return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||
else:
|
||||
_, info_dict = self._extract_from_url(
|
||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||
video_id, fatal_if_no_video=True)
|
||||
return info_dict
|
||||
return self._extract_from_url(real_url, video_id)
|
||||
|
||||
|
||||
class FacebookPluginsVideoIE(InfoExtractor):
|
||||
|
||||
35
youtube_dl/extractor/fujitv.py
Normal file
35
youtube_dl/extractor/fujitv.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FujiTVFODPlus7IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
|
||||
_BITRATE_MAP = {
|
||||
300: (320, 180),
|
||||
800: (640, 360),
|
||||
1200: (1280, 720),
|
||||
2000: (1280, 720),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
||||
for f in formats:
|
||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||
if wh:
|
||||
f.update({
|
||||
'width': wh[0],
|
||||
'height': wh[1],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FXNetworksIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fxnetworks.com/video/1032565827847',
|
||||
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
|
||||
'info_dict': {
|
||||
'id': 'dRzwHC_MMqIv',
|
||||
'ext': 'mp4',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NEWA-FNG-FX',
|
||||
'upload_date': '20170825',
|
||||
'timestamp': 1503686274,
|
||||
'episode_number': 0,
|
||||
'season_number': 2,
|
||||
'series': 'Better Things',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.simpsonsworld.com/video/716094019682',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
||||
release_url = video_data['rel']
|
||||
title = video_data['data-title']
|
||||
rating = video_data.get('data-rating')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if player_type == 'movies':
|
||||
query.update({
|
||||
'manifest': 'm3u',
|
||||
})
|
||||
else:
|
||||
query.update({
|
||||
'switch': 'http',
|
||||
})
|
||||
if video_data.get('data-req-auth') == '1':
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data['data-channel'], title,
|
||||
video_data.get('data-guid'), rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'series': video_data.get('data-show-title'),
|
||||
'episode_number': int_or_none(video_data.get('data-episode')),
|
||||
'season_number': int_or_none(video_data.get('data-season')),
|
||||
'thumbnail': video_data.get('data-large-thumb'),
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
@@ -1,16 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .once import OnceIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
dict_get,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class GameSpotIE(OnceIE):
|
||||
@@ -24,17 +15,16 @@ class GameSpotIE(OnceIE):
|
||||
'title': 'Arma 3 - Community Guide: SITREP I',
|
||||
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
|
||||
},
|
||||
'skip': 'manifest URL give HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
||||
'md5': '173ea87ad762cf5d3bf6163dceb255a6',
|
||||
'info_dict': {
|
||||
'id': 'gs-2300-6424837',
|
||||
'ext': 'mp4',
|
||||
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
||||
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.gamespot.com/videos/embed/6439218/',
|
||||
'only_matching': True,
|
||||
@@ -49,90 +39,40 @@ class GameSpotIE(OnceIE):
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
data_video_json = self._search_regex(
|
||||
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
||||
data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
|
||||
data_video = self._parse_json(self._html_search_regex(
|
||||
r'data-video=(["\'])({.*?})\1', webpage,
|
||||
'video data', group=2), page_id)
|
||||
title = compat_urllib_parse_unquote(data_video['title'])
|
||||
streams = data_video['videoStreams']
|
||||
|
||||
manifest_url = None
|
||||
formats = []
|
||||
f4m_url = streams.get('f4m_stream')
|
||||
if f4m_url:
|
||||
manifest_url = f4m_url
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
|
||||
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
|
||||
|
||||
m3u8_url = streams.get('adaptive_stream')
|
||||
if m3u8_url:
|
||||
manifest_url = m3u8_url
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
progressive_url = dict_get(
|
||||
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
|
||||
if progressive_url and manifest_url:
|
||||
qualities_basename = self._search_regex(
|
||||
r'/([^/]+)\.csmil/',
|
||||
manifest_url, 'qualities basename', default=None)
|
||||
if qualities_basename:
|
||||
QUALITIES_RE = r'((,\d+)+,?)'
|
||||
qualities = self._search_regex(
|
||||
QUALITIES_RE, qualities_basename,
|
||||
'qualities', default=None)
|
||||
if qualities:
|
||||
qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
|
||||
qualities.sort()
|
||||
http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
|
||||
http_url_basename = url_basename(progressive_url)
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': progressive_url.replace(
|
||||
http_url_basename, http_template % q),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
'url': progressive_url.replace(
|
||||
http_url_basename, http_template % q),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'http-%d' % q,
|
||||
'tbr': q,
|
||||
})
|
||||
for f in m3u8_formats:
|
||||
formats.append(f)
|
||||
http_f = f.copy()
|
||||
del http_f['manifest_url']
|
||||
http_f.update({
|
||||
'format_id': f['format_id'].replace('hls-', 'http-'),
|
||||
'protocol': 'http',
|
||||
'url': f['url'].replace('.m3u8', '.mp4'),
|
||||
})
|
||||
formats.append(http_f)
|
||||
|
||||
onceux_json = self._search_regex(
|
||||
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
|
||||
if onceux_json:
|
||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
||||
if onceux_url:
|
||||
formats.extend(self._extract_once_formats(re.sub(
|
||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
|
||||
http_formats_preference=-1))
|
||||
mpd_url = streams.get('adaptive_dash')
|
||||
if mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url, page_id, mpd_id='dash', fatal=False))
|
||||
|
||||
if not formats:
|
||||
for quality in ['sd', 'hd']:
|
||||
# It's actually a link to a flv file
|
||||
flv_url = streams.get('f4m_{0}'.format(quality))
|
||||
if flv_url is not None:
|
||||
formats.append({
|
||||
'url': flv_url,
|
||||
'ext': 'flv',
|
||||
'format_id': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': data_video['guid'],
|
||||
'id': data_video.get('guid') or page_id,
|
||||
'display_id': page_id,
|
||||
'title': compat_urllib_parse_unquote(data_video['title']),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
|
||||
@@ -20,19 +20,24 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
is_html,
|
||||
js_to_json,
|
||||
KNOWN_EXTENSIONS,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
from .brightcove import (
|
||||
@@ -48,7 +53,6 @@ from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .tvc import TVCIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .smotri import SmotriIE
|
||||
from .myvi import MyviIE
|
||||
from .condenast import CondeNastIE
|
||||
from .udn import UDNEmbedIE
|
||||
@@ -63,7 +67,10 @@ from .tube8 import Tube8IE
|
||||
from .mofosex import MofosexEmbedIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .youporn import YouPornIE
|
||||
from .vimeo import VimeoIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
@@ -120,6 +127,7 @@ from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -198,11 +206,48 @@ class GenericIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
'info_dict': {
|
||||
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
'ext': 'm4v',
|
||||
'upload_date': '20150228',
|
||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
}
|
||||
'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
'title': 'MSNBC Rachel Maddow (video)',
|
||||
'description': 're:.*her unique approach to storytelling.*',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'ext': 'mov',
|
||||
'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
|
||||
'title': 're:MSNBC Rachel Maddow',
|
||||
'description': 're:.*her unique approach to storytelling.*',
|
||||
'timestamp': int,
|
||||
'upload_date': compat_str,
|
||||
'duration': float,
|
||||
},
|
||||
}],
|
||||
},
|
||||
# RSS feed with item with description and thumbnails
|
||||
{
|
||||
'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||
'info_dict': {
|
||||
'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||
'title': 're:.*100% Hydrogen.*',
|
||||
'description': 're:.*In this episode.*',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'ext': 'm4a',
|
||||
'id': 'c1c879525ce2cb640b344507e682c36d',
|
||||
'title': 're:Hydrogen!',
|
||||
'description': 're:.*In this episode we are going.*',
|
||||
'timestamp': 1567977776,
|
||||
'upload_date': '20190908',
|
||||
'duration': 459,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosures and unsupported link URLs
|
||||
{
|
||||
@@ -1983,22 +2028,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [SpringboardPlatformIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
|
||||
'info_dict': {
|
||||
'id': 'uPDB5I9wfp8',
|
||||
'ext': 'webm',
|
||||
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
|
||||
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
|
||||
'upload_date': '20160219',
|
||||
'uploader': 'Pocoyo - Português (BR)',
|
||||
'uploader_id': 'PocoyoBrazil',
|
||||
},
|
||||
'add_ie': [YoutubeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
|
||||
'info_dict': {
|
||||
@@ -2103,23 +2132,23 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Zype embed
|
||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
'info_dict': {
|
||||
'id': '5b400b834b32992a310622b9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
||||
'upload_date': '20170909',
|
||||
'timestamp': 1504915200,
|
||||
},
|
||||
'add_ie': [ZypeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# {
|
||||
# # Zype embed
|
||||
# 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
# 'info_dict': {
|
||||
# 'id': '5b400b834b32992a310622b9',
|
||||
# 'ext': 'mp4',
|
||||
# 'title': 'Smoky Barbecue Favorites',
|
||||
# 'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
# 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
||||
# 'upload_date': '20170909',
|
||||
# 'timestamp': 1504915200,
|
||||
# },
|
||||
# 'add_ie': [ZypeIE.ie_key()],
|
||||
# 'params': {
|
||||
# 'skip_download': True,
|
||||
# },
|
||||
# },
|
||||
{
|
||||
# videojs embed
|
||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||
@@ -2168,7 +2197,32 @@ class GenericIE(InfoExtractor):
|
||||
# 'params': {
|
||||
# 'force_generic_extractor': True,
|
||||
# },
|
||||
# }
|
||||
# },
|
||||
{
|
||||
# VHX Embed
|
||||
'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
|
||||
'info_dict': {
|
||||
'id': '858208',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled',
|
||||
'uploader_id': 'user80538407',
|
||||
'uploader': 'OTT Videos',
|
||||
},
|
||||
},
|
||||
{
|
||||
# ArcPublishing PoWa video player
|
||||
'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
|
||||
'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
|
||||
'info_dict': {
|
||||
'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Senate candidates wave to voters on Anchorage streets',
|
||||
'description': 'md5:91f51a6511f090617353dc720318b20e',
|
||||
'timestamp': 1604378735,
|
||||
'upload_date': '20201103',
|
||||
'duration': 1581,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2180,6 +2234,10 @@ class GenericIE(InfoExtractor):
|
||||
playlist_desc_el = doc.find('./channel/description')
|
||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||
|
||||
NS_MAP = {
|
||||
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for it in doc.findall('./channel/item'):
|
||||
next_url = None
|
||||
@@ -2195,10 +2253,33 @@ class GenericIE(InfoExtractor):
|
||||
if not next_url:
|
||||
continue
|
||||
|
||||
def itunes(key):
|
||||
return xpath_text(
|
||||
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
|
||||
default=None)
|
||||
|
||||
duration = itunes('duration')
|
||||
explicit = (itunes('explicit') or '').lower()
|
||||
if explicit in ('true', 'yes'):
|
||||
age_limit = 18
|
||||
elif explicit in ('false', 'no'):
|
||||
age_limit = 0
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': next_url,
|
||||
'title': it.find('title').text,
|
||||
'description': xpath_text(it, 'description', default=None),
|
||||
'timestamp': unified_timestamp(
|
||||
xpath_text(it, 'pubDate', default=None)),
|
||||
'duration': int_or_none(duration) or parse_duration(duration),
|
||||
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
|
||||
'episode': itunes('title'),
|
||||
'episode_number': int_or_none(itunes('episode')),
|
||||
'season_number': int_or_none(itunes('season')),
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -2318,7 +2399,7 @@ class GenericIE(InfoExtractor):
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': self._generic_title(url),
|
||||
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
|
||||
'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
|
||||
}
|
||||
|
||||
# Check for direct link to a video
|
||||
@@ -2424,7 +2505,9 @@ class GenericIE(InfoExtractor):
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
||||
# Unescaping the whole page allows to handle those cases in a generic way
|
||||
webpage = compat_urllib_parse_unquote(webpage)
|
||||
# FIXME: unescaping the whole page may break URLs, commenting out for now.
|
||||
# There probably should be a second run of generic extractor on unescaped webpage.
|
||||
# webpage = compat_urllib_parse_unquote(webpage)
|
||||
|
||||
# Unescape squarespace embeds to be detected by generic extractor,
|
||||
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||
@@ -2506,6 +2589,10 @@ class GenericIE(InfoExtractor):
|
||||
if tp_urls:
|
||||
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
||||
|
||||
arc_urls = ArcPublishingIE._extract_urls(webpage)
|
||||
if arc_urls:
|
||||
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
@@ -2517,6 +2604,10 @@ class GenericIE(InfoExtractor):
|
||||
if vimeo_urls:
|
||||
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
|
||||
|
||||
vhx_url = VHXEmbedIE._extract_url(webpage)
|
||||
if vhx_url:
|
||||
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
@@ -2772,11 +2863,6 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded smotri.com player
|
||||
smotri_url = SmotriIE._extract_url(webpage)
|
||||
if smotri_url:
|
||||
return self.url_result(smotri_url, 'Smotri')
|
||||
|
||||
# Look for embedded Myvi.ru player
|
||||
myvi_url = MyviIE._extract_url(webpage)
|
||||
if myvi_url:
|
||||
|
||||
@@ -38,13 +38,17 @@ class GoIE(AdobePassIE):
|
||||
'disneynow': {
|
||||
'brand': '011',
|
||||
'resource_id': 'Disney',
|
||||
}
|
||||
},
|
||||
'fxnow.fxnetworks': {
|
||||
'brand': '025',
|
||||
'requestor_id': 'dtci',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?P<sub_domain>%s)\.)?go|
|
||||
(?P<sub_domain_2>abc|freeform|disneynow)
|
||||
(?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
|
||||
)\.com/
|
||||
(?:
|
||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||
@@ -99,6 +103,19 @@ class GoIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
|
||||
'info_dict': {
|
||||
'id': 'VDKA12782841',
|
||||
'ext': 'mp4',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'md5:fa73584a95761c605d9d54904e35b407',
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import codecs
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class GooglePlusIE(InfoExtractor):
|
||||
IE_DESC = 'Google Plus'
|
||||
_VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
||||
IE_NAME = 'plus.google'
|
||||
_TEST = {
|
||||
'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
|
||||
'info_dict': {
|
||||
'id': 'ZButuJc6CtH',
|
||||
'ext': 'flv',
|
||||
'title': '嘆きの天使 降臨',
|
||||
'upload_date': '20120613',
|
||||
'uploader': '井上ヨシマサ',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Step 1, Retrieve post webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
|
||||
|
||||
title = self._og_search_description(webpage).splitlines()[0]
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
||||
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
||||
webpage, 'upload date', fatal=False, flags=re.VERBOSE))
|
||||
uploader = self._html_search_regex(
|
||||
r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
# Step 2, Simulate clicking the image box to launch video
|
||||
DOMAIN = 'https://plus.google.com/'
|
||||
video_page = self._search_regex(
|
||||
r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
||||
webpage, 'video page URL')
|
||||
if not video_page.startswith(DOMAIN):
|
||||
video_page = DOMAIN + video_page
|
||||
|
||||
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
|
||||
|
||||
def unicode_escape(s):
|
||||
decoder = codecs.getdecoder('unicode_escape')
|
||||
return re.sub(
|
||||
r'\\u[0-9a-fA-F]{4,}',
|
||||
lambda m: decoder(m.group(0))[0],
|
||||
s)
|
||||
|
||||
# Extract video links all sizes
|
||||
formats = [{
|
||||
'url': unicode_escape(video_url),
|
||||
'ext': 'flv',
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
} for width, height, video_url in re.findall(
|
||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
88
youtube_dl/extractor/googlepodcasts.py
Normal file
88
youtube_dl/extractor/googlepodcasts.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class GooglePodcastsBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
|
||||
|
||||
def _batch_execute(self, func_id, video_id, params):
|
||||
return json.loads(self._download_json(
|
||||
'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
|
||||
video_id, data=urlencode_postdata({
|
||||
'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
|
||||
}), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
|
||||
|
||||
def _extract_episode(self, episode):
|
||||
return {
|
||||
'id': episode[4][3],
|
||||
'title': episode[8],
|
||||
'url': clean_podcast_url(episode[13]),
|
||||
'thumbnail': episode[2],
|
||||
'description': episode[9],
|
||||
'creator': try_get(episode, lambda x: x[14]),
|
||||
'timestamp': int_or_none(episode[11]),
|
||||
'duration': int_or_none(episode[12]),
|
||||
'series': episode[1],
|
||||
}
|
||||
|
||||
|
||||
class GooglePodcastsIE(GooglePodcastsBaseIE):
|
||||
IE_NAME = 'google:podcasts'
|
||||
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
|
||||
'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
|
||||
'info_dict': {
|
||||
'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
|
||||
'ext': 'mp3',
|
||||
'title': 'WWDTM New Year 2021',
|
||||
'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
|
||||
'upload_date': '20210102',
|
||||
'timestamp': 1609606800,
|
||||
'duration': 2901,
|
||||
'series': "Wait Wait... Don't Tell Me!",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups()
|
||||
episode = self._batch_execute(
|
||||
'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
|
||||
return self._extract_episode(episode)
|
||||
|
||||
|
||||
class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
|
||||
IE_NAME = 'google:podcasts:feed'
|
||||
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
|
||||
'info_dict': {
|
||||
'title': "Wait Wait... Don't Tell Me!",
|
||||
'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
b64_feed_url = self._match_id(url)
|
||||
data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
|
||||
|
||||
entries = []
|
||||
for episode in (try_get(data, lambda x: x[1][0]) or []):
|
||||
entries.append(self._extract_episode(episode))
|
||||
|
||||
feed = try_get(data, lambda x: x[3]) or []
|
||||
return self.playlist_result(
|
||||
entries, playlist_title=try_get(feed, lambda x: x[0]),
|
||||
playlist_description=try_get(feed, lambda x: x[2]))
|
||||
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
@@ -25,43 +26,50 @@ from ..utils import (
|
||||
class HotStarBaseIE(InfoExtractor):
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
def _call_api_impl(self, path, video_id, query):
|
||||
def _call_api_impl(self, path, video_id, headers, query, data=None):
|
||||
st = int(time.time())
|
||||
exp = st + 6000
|
||||
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
|
||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||
response = self._download_json(
|
||||
'https://api.hotstar.com/' + path, video_id, headers={
|
||||
'hotstarauth': auth,
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'JIO',
|
||||
}, query=query)
|
||||
h = {'hotstarauth': auth}
|
||||
h.update(headers)
|
||||
return self._download_json(
|
||||
'https://api.hotstar.com/' + path,
|
||||
video_id, headers=h, query=query, data=data)
|
||||
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
response = self._call_api_impl(path, video_id, {
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'JIO',
|
||||
}, {
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
})
|
||||
if response['statusCode'] != 'OK':
|
||||
raise ExtractorError(
|
||||
response['body']['message'], expected=True)
|
||||
return response['body']['results']
|
||||
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
return self._call_api_impl(path, video_id, {
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
})
|
||||
|
||||
def _call_api_v2(self, path, video_id):
|
||||
return self._call_api_impl(
|
||||
'%s/in/contents/%s' % (path, video_id), video_id, {
|
||||
'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
|
||||
'client': 'mweb',
|
||||
'clientVersion': '6.18.0',
|
||||
'deviceId': compat_str(uuid.uuid4()),
|
||||
'osName': 'Windows',
|
||||
'osVersion': '10',
|
||||
})
|
||||
def _call_api_v2(self, path, video_id, headers, query=None, data=None):
|
||||
h = {'X-Request-Id': compat_str(uuid.uuid4())}
|
||||
h.update(headers)
|
||||
try:
|
||||
return self._call_api_impl(
|
||||
path, video_id, h, query, data)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
if e.cause.code == 402:
|
||||
self.raise_login_required()
|
||||
message = self._parse_json(e.cause.read().decode(), video_id)['message']
|
||||
if message in ('Content not available in region', 'Country is not supported'):
|
||||
raise self.raise_geo_restricted(message)
|
||||
raise ExtractorError(message)
|
||||
raise e
|
||||
|
||||
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})'
|
||||
_TESTS = [{
|
||||
# contentData
|
||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||
@@ -92,8 +100,13 @@ class HotStarIE(HotStarBaseIE):
|
||||
# only available via api v2
|
||||
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_DEVICE_ID = None
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -121,7 +134,30 @@ class HotStarIE(HotStarBaseIE):
|
||||
headers = {'Referer': url}
|
||||
formats = []
|
||||
geo_restricted = False
|
||||
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
|
||||
|
||||
if not self._USER_TOKEN:
|
||||
self._DEVICE_ID = compat_str(uuid.uuid4())
|
||||
self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, {
|
||||
'X-HS-Platform': 'PCTV',
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'device_ids': [{
|
||||
'id': self._DEVICE_ID,
|
||||
'type': 'device_id',
|
||||
}],
|
||||
}).encode())['user_identity']
|
||||
|
||||
playback_sets = self._call_api_v2(
|
||||
'play/v2/playback/content/' + video_id, video_id, {
|
||||
'X-HS-Platform': 'web',
|
||||
'X-HS-AppVersion': '6.99.1',
|
||||
'X-HS-UserToken': self._USER_TOKEN,
|
||||
}, query={
|
||||
'device-id': self._DEVICE_ID,
|
||||
'desired-config': 'encryption:plain',
|
||||
'os-name': 'Windows',
|
||||
'os-version': '10',
|
||||
})['data']['playBackSets']
|
||||
for playback_set in playback_sets:
|
||||
if not isinstance(playback_set, dict):
|
||||
continue
|
||||
@@ -163,19 +199,22 @@ class HotStarIE(HotStarBaseIE):
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
image = try_get(video_data, lambda x: x['image']['h'], compat_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
|
||||
'formats': formats,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': video_data.get('channelId'),
|
||||
'channel_id': str_or_none(video_data.get('channelId')),
|
||||
'series': video_data.get('showName'),
|
||||
'season': video_data.get('seasonName'),
|
||||
'season_number': int_or_none(video_data.get('seasonNo')),
|
||||
'season_id': video_data.get('seasonId'),
|
||||
'season_id': str_or_none(video_data.get('seasonId')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('episodeNo')),
|
||||
}
|
||||
@@ -183,7 +222,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
|
||||
class HotStarPlaylistIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'info_dict': {
|
||||
@@ -193,6 +232,9 @@ class HotStarPlaylistIE(HotStarBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
97
youtube_dl/extractor/iheart.py
Normal file
97
youtube_dl/extractor/iheart.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class IHeartRadioBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, fatal=True, query=None):
|
||||
return self._download_json(
|
||||
'https://api.iheart.com/api/v3/podcast/' + path,
|
||||
video_id, fatal=fatal, query=query)
|
||||
|
||||
def _extract_episode(self, episode):
|
||||
return {
|
||||
'thumbnail': episode.get('imageUrl'),
|
||||
'description': clean_html(episode.get('description')),
|
||||
'timestamp': int_or_none(episode.get('startDate'), 1000),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
class IHeartRadioIE(IHeartRadioBaseIE):
|
||||
IENAME = 'iheartradio'
|
||||
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
|
||||
'md5': 'c8609c92c8688dcb69d8541042b8abca',
|
||||
'info_dict': {
|
||||
'id': '70346499',
|
||||
'ext': 'mp3',
|
||||
'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
|
||||
'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
|
||||
'timestamp': 1597741200,
|
||||
'upload_date': '20200818',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api(
|
||||
'episodes/' + episode_id, episode_id)['episode']
|
||||
info = self._extract_episode(episode)
|
||||
info.update({
|
||||
'id': episode_id,
|
||||
'title': episode['title'],
|
||||
'url': clean_podcast_url(episode['mediaUrl']),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class IHeartRadioPodcastIE(IHeartRadioBaseIE):
|
||||
IE_NAME = 'iheartradio:podcast'
|
||||
_VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
|
||||
'info_dict': {
|
||||
'id': '30717896',
|
||||
'title': 'It Could Happen Here',
|
||||
'description': 'md5:5842117412a967eb0b01f8088eb663e2',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
podcast_id = self._match_id(url)
|
||||
path = 'podcasts/' + podcast_id
|
||||
episodes = self._call_api(
|
||||
path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
|
||||
|
||||
entries = []
|
||||
for episode in episodes:
|
||||
episode_id = str_or_none(episode.get('id'))
|
||||
if not episode_id:
|
||||
continue
|
||||
info = self._extract_episode(episode)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'id': episode_id,
|
||||
'title': episode.get('title'),
|
||||
'url': 'iheartradio:' + episode_id,
|
||||
'ie_key': IHeartRadioIE.ie_key(),
|
||||
})
|
||||
entries.append(info)
|
||||
|
||||
podcast = self._call_api(path, podcast_id, False) or {}
|
||||
|
||||
return self.playlist_result(
|
||||
entries, podcast_id, podcast.get('title'), podcast.get('description'))
|
||||
@@ -22,7 +22,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
@@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor):
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
'uploader': 'Naomi Leonor Phan-Quang',
|
||||
'uploader': 'B E A U T Y F O R A S H E S',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/tv/aye83DjauH/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -122,9 +125,9 @@ class InstagramIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
(media, video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count, comments, height,
|
||||
width) = [None] * 11
|
||||
width) = [None] * 12
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -137,59 +140,77 @@ class InstagramIE(InfoExtractor):
|
||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/22880)
|
||||
if not media:
|
||||
additional_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||
webpage, 'additional data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
if additional_data:
|
||||
media = try_get(
|
||||
additional_data, lambda x: x['graphql']['shortcode_media'],
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src') or media.get('display_url')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
|
||||
def get_count(key, kind):
|
||||
return int_or_none(try_get(
|
||||
def get_count(keys, kind):
|
||||
if not isinstance(keys, (list, tuple)):
|
||||
keys = [keys]
|
||||
for key in keys:
|
||||
count = int_or_none(try_get(
|
||||
media, (lambda x: x['edge_media_%s' % key]['count'],
|
||||
lambda x: x['%ss' % kind]['count'])))
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count('to_comment', 'comment')
|
||||
if count is not None:
|
||||
return count
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count(
|
||||
('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
|
||||
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
|
||||
if not video_url:
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
@@ -1,29 +1,21 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import uuid
|
||||
import xml.etree.ElementTree as etree
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_etree_register_namespace,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
get_element_by_class,
|
||||
JSON_LD_RE,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
url_or_none,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,14 +23,18 @@ class ITVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||
_GEO_COUNTRIES = ['GB']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
||||
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
||||
'info_dict': {
|
||||
'id': '2a2936a0053',
|
||||
'ext': 'flv',
|
||||
'title': 'Home Movie',
|
||||
'id': '2a4547a0012',
|
||||
'ext': 'mp4',
|
||||
'title': 'Liar - Series 2 - Episode 6',
|
||||
'description': 'md5:d0f91536569dec79ea184f0a44cca089',
|
||||
'series': 'Liar',
|
||||
'season_number': 2,
|
||||
'episode_number': 6,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@@ -61,220 +57,97 @@ class ITVIE(InfoExtractor):
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
||||
|
||||
ns_map = {
|
||||
'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
|
||||
'tem': 'http://tempuri.org/',
|
||||
'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types',
|
||||
'com': 'http://schemas.itv.com/2009/05/Common',
|
||||
}
|
||||
for ns, full_ns in ns_map.items():
|
||||
compat_etree_register_namespace(ns, full_ns)
|
||||
|
||||
def _add_ns(name):
|
||||
return xpath_with_ns(name, ns_map)
|
||||
|
||||
def _add_sub_element(element, name):
|
||||
return etree.SubElement(element, _add_ns(name))
|
||||
|
||||
production_id = (
|
||||
params.get('data-video-autoplay-id')
|
||||
or '%s#001' % (
|
||||
params.get('data-video-episode-id')
|
||||
or video_id.replace('a', '/')))
|
||||
|
||||
req_env = etree.Element(_add_ns('soapenv:Envelope'))
|
||||
_add_sub_element(req_env, 'soapenv:Header')
|
||||
body = _add_sub_element(req_env, 'soapenv:Body')
|
||||
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
|
||||
request = _add_sub_element(get_playlist, 'tem:request')
|
||||
_add_sub_element(request, 'itv:ProductionId').text = production_id
|
||||
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
|
||||
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
|
||||
_add_sub_element(vodcrid, 'com:Id')
|
||||
_add_sub_element(request, 'itv:Partition')
|
||||
user_info = _add_sub_element(get_playlist, 'tem:userInfo')
|
||||
_add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv'
|
||||
_add_sub_element(user_info, 'itv:DM')
|
||||
_add_sub_element(user_info, 'itv:RevenueScienceValue')
|
||||
_add_sub_element(user_info, 'itv:SessionId')
|
||||
_add_sub_element(user_info, 'itv:SsoToken')
|
||||
_add_sub_element(user_info, 'itv:UserToken')
|
||||
site_info = _add_sub_element(get_playlist, 'tem:siteInfo')
|
||||
_add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any'
|
||||
_add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO'
|
||||
_add_sub_element(site_info, 'itv:Category')
|
||||
_add_sub_element(site_info, 'itv:Platform').text = 'DotCom'
|
||||
_add_sub_element(site_info, 'itv:Site').text = 'ItvCom'
|
||||
device_info = _add_sub_element(get_playlist, 'tem:deviceInfo')
|
||||
_add_sub_element(device_info, 'itv:ScreenSize').text = 'Big'
|
||||
player_info = _add_sub_element(get_playlist, 'tem:playerInfo')
|
||||
_add_sub_element(player_info, 'itv:Version').text = '2'
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
|
||||
hmac = params['data-video-hmac']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'text/xml; charset=utf-8',
|
||||
'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
|
||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||
'Content-Type': 'application/json',
|
||||
'hmac': hmac.upper(),
|
||||
})
|
||||
ios_playlist = self._download_json(
|
||||
ios_playlist_url, video_id, data=json.dumps({
|
||||
'user': {
|
||||
'itvUserId': '',
|
||||
'entitlements': [],
|
||||
'token': ''
|
||||
},
|
||||
'device': {
|
||||
'manufacturer': 'Safari',
|
||||
'model': '5',
|
||||
'os': {
|
||||
'name': 'Windows NT',
|
||||
'version': '6.1',
|
||||
'type': 'desktop'
|
||||
}
|
||||
},
|
||||
'client': {
|
||||
'version': '4.1',
|
||||
'id': 'browser'
|
||||
},
|
||||
'variantAvailability': {
|
||||
'featureset': {
|
||||
'min': ['hls', 'aes', 'outband-webvtt'],
|
||||
'max': ['hls', 'aes', 'outband-webvtt']
|
||||
},
|
||||
'platformTag': 'dotcom'
|
||||
}
|
||||
}).encode(), headers=headers)
|
||||
video_data = ios_playlist['Playlist']['Video']
|
||||
ios_base_url = video_data.get('Base')
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
def extract_subtitle(sub_url):
|
||||
ext = determine_ext(sub_url, 'ttml')
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': sub_url,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
resp_env = self._download_xml(
|
||||
params['data-playlist-url'], video_id,
|
||||
headers=headers, data=etree.tostring(req_env), fatal=False)
|
||||
if resp_env:
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_code = xpath_text(resp_env, './/faultcode')
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
if fault_code == 'InvalidGeoRegion':
|
||||
self.raise_geo_restricted(
|
||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||
elif fault_code not in (
|
||||
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'episode_title': params.get('data-video-episode'),
|
||||
'series': params.get('data-video-title'),
|
||||
})
|
||||
for media_file in (video_data.get('MediaFiles') or []):
|
||||
href = media_file.get('Href')
|
||||
if not href:
|
||||
continue
|
||||
if ios_base_url:
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
||||
info.update({
|
||||
'title': title,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
formats.append({
|
||||
'url': href,
|
||||
})
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if caption_url.text:
|
||||
extract_subtitle(caption_url.text)
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
|
||||
hmac = params.get('data-video-hmac')
|
||||
if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url):
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||
'Content-Type': 'application/json',
|
||||
'hmac': hmac.upper(),
|
||||
})
|
||||
ios_playlist = self._download_json(
|
||||
ios_playlist_url, video_id, data=json.dumps({
|
||||
'user': {
|
||||
'itvUserId': '',
|
||||
'entitlements': [],
|
||||
'token': ''
|
||||
},
|
||||
'device': {
|
||||
'manufacturer': 'Safari',
|
||||
'model': '5',
|
||||
'os': {
|
||||
'name': 'Windows NT',
|
||||
'version': '6.1',
|
||||
'type': 'desktop'
|
||||
}
|
||||
},
|
||||
'client': {
|
||||
'version': '4.1',
|
||||
'id': 'browser'
|
||||
},
|
||||
'variantAvailability': {
|
||||
'featureset': {
|
||||
'min': ['hls', 'aes', 'outband-webvtt'],
|
||||
'max': ['hls', 'aes', 'outband-webvtt']
|
||||
},
|
||||
'platformTag': 'dotcom'
|
||||
}
|
||||
}).encode(), headers=headers, fatal=False)
|
||||
if ios_playlist:
|
||||
video_data = ios_playlist.get('Playlist', {}).get('Video', {})
|
||||
ios_base_url = video_data.get('Base')
|
||||
for media_file in video_data.get('MediaFiles', []):
|
||||
href = media_file.get('Href')
|
||||
if not href:
|
||||
continue
|
||||
if ios_base_url:
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
})
|
||||
subs = video_data.get('Subtitles')
|
||||
if isinstance(subs, list):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
href = url_or_none(sub.get('Href'))
|
||||
if href:
|
||||
extract_subtitle(href)
|
||||
if not info.get('duration'):
|
||||
info['duration'] = parse_duration(video_data.get('Duration'))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
subtitles = {}
|
||||
subs = video_data.get('Subtitles') or []
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
href = url_or_none(sub.get('Href'))
|
||||
if not href:
|
||||
continue
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': href,
|
||||
'ext': determine_ext(href, 'vtt'),
|
||||
})
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
if not info:
|
||||
json_ld = self._parse_json(self._search_regex(
|
||||
JSON_LD_RE, webpage, 'JSON-LD', '{}',
|
||||
group='json_ld'), video_id, fatal=False)
|
||||
if json_ld and json_ld.get('@type') == 'BreadcrumbList':
|
||||
for ile in (json_ld.get('itemListElement:') or []):
|
||||
item = ile.get('item:') or {}
|
||||
if item.get('@type') == 'TVEpisode':
|
||||
item['@context'] = 'http://schema.org'
|
||||
info = self._json_ld(item, video_id, fatal=False) or {}
|
||||
break
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
webpage_info = self._search_json_ld(webpage, video_id, default={})
|
||||
if not webpage_info.get('title'):
|
||||
webpage_info['title'] = self._html_search_regex(
|
||||
r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or webpage_info['episode']
|
||||
|
||||
return merge_dicts(info, webpage_info)
|
||||
'duration': parse_duration(video_data.get('Duration')),
|
||||
'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
|
||||
}, info)
|
||||
|
||||
|
||||
class ITVBTCCIE(InfoExtractor):
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
srt_subtitles_timecode,
|
||||
)
|
||||
|
||||
|
||||
class KanalPlayIE(InfoExtractor):
|
||||
IE_DESC = 'Kanal 5/9/11 Play'
|
||||
_VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
|
||||
'info_dict': {
|
||||
'id': '3270012277',
|
||||
'ext': 'flv',
|
||||
'title': 'Saknar både dusch och avlopp',
|
||||
'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
|
||||
'duration': 2636.36,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _fix_subtitles(self, subs):
|
||||
return '\r\n\r\n'.join(
|
||||
'%s\r\n%s --> %s\r\n%s'
|
||||
% (
|
||||
num,
|
||||
srt_subtitles_timecode(item['startMillis'] / 1000.0),
|
||||
srt_subtitles_timecode(item['endMillis'] / 1000.0),
|
||||
item['text'],
|
||||
) for num, item in enumerate(subs, 1))
|
||||
|
||||
def _get_subtitles(self, channel_id, video_id):
|
||||
subs = self._download_json(
|
||||
'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
|
||||
video_id, 'Downloading subtitles JSON', fatal=False)
|
||||
return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
channel_id = mobj.group('channel_id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
|
||||
video_id)
|
||||
|
||||
reasons_for_no_streams = video.get('reasonsForNoStreams')
|
||||
if reasons_for_no_streams:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
|
||||
expected=True)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
duration = float_or_none(video.get('length'), 1000)
|
||||
thumbnail = video.get('posterUrl')
|
||||
|
||||
stream_base_url = video['streamBaseUrl']
|
||||
|
||||
formats = [{
|
||||
'url': stream_base_url,
|
||||
'play_path': stream['source'],
|
||||
'ext': 'flv',
|
||||
'tbr': float_or_none(stream.get('bitrate'), 1000),
|
||||
'rtmp_real_time': True,
|
||||
} for stream in video['streams']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
if video.get('hasSubtitle'):
|
||||
subtitles = self.extract_subtitles(channel_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,13 +11,73 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
OnDemandPagedList,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class LBRYIE(InfoExtractor):
|
||||
IE_NAME = 'lbry.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
|
||||
class LBRYBaseIE(InfoExtractor):
|
||||
_BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
|
||||
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
||||
_OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
|
||||
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params, resource):
|
||||
return self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy',
|
||||
display_id, 'Downloading %s JSON metadata' % resource,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
}).encode())['result']
|
||||
|
||||
def _resolve_url(self, url, display_id, resource):
|
||||
return self._call_api_proxy(
|
||||
'resolve', display_id, {'urls': url}, resource)[url]
|
||||
|
||||
def _permanent_url(self, url, claim_name, claim_id):
|
||||
return urljoin(url, '/%s:%s' % (claim_name, claim_id))
|
||||
|
||||
def _parse_stream(self, stream, url):
|
||||
stream_value = stream.get('value') or {}
|
||||
stream_type = stream_value.get('stream_type')
|
||||
source = stream_value.get('source') or {}
|
||||
media = stream_value.get(stream_type) or {}
|
||||
signing_channel = stream.get('signing_channel') or {}
|
||||
channel_name = signing_channel.get('name')
|
||||
channel_claim_id = signing_channel.get('claim_id')
|
||||
channel_url = None
|
||||
if channel_name and channel_claim_id:
|
||||
channel_url = self._permanent_url(url, channel_name, channel_claim_id)
|
||||
|
||||
info = {
|
||||
'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': stream_value.get('description'),
|
||||
'license': stream_value.get('license'),
|
||||
'timestamp': int_or_none(stream.get('timestamp')),
|
||||
'tags': stream_value.get('tags'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
||||
'channel_id': channel_claim_id,
|
||||
'channel_url': channel_url,
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
}
|
||||
if stream_type == 'audio':
|
||||
info['vcodec'] = 'none'
|
||||
else:
|
||||
info.update({
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
|
||||
_TESTS = [{
|
||||
# Video
|
||||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||
@@ -28,6 +89,8 @@ class LBRYIE(InfoExtractor):
|
||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||
'timestamp': 1595694354,
|
||||
'upload_date': '20200725',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}
|
||||
}, {
|
||||
# Audio
|
||||
@@ -40,6 +103,12 @@ class LBRYIE(InfoExtractor):
|
||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||
'timestamp': 1591312601,
|
||||
'upload_date': '20200604',
|
||||
'tags': list,
|
||||
'duration': 2570,
|
||||
'channel': 'The LBRY Foundation',
|
||||
'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
@@ -47,45 +116,99 @@ class LBRYIE(InfoExtractor):
|
||||
}, {
|
||||
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/$/embed/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/Episode-1:e7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@LBRYFoundation/Episode-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params):
|
||||
return self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy', display_id,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
}).encode())['result']
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
if display_id.startswith('$/'):
|
||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||
else:
|
||||
display_id = display_id.replace(':', '#')
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
result_value = result['value']
|
||||
if result_value.get('stream_type') not in self._SUPPORTED_STREAM_TYPES:
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
claim_id = result['claim_id']
|
||||
title = result_value['title']
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
info = self._parse_stream(result, url)
|
||||
info.update({
|
||||
'id': claim_id,
|
||||
'title': title,
|
||||
'url': streaming_url,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
'info_dict': {
|
||||
'id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'title': 'The LBRY Foundation',
|
||||
'description': 'Channel for the LBRY Foundation. Follow for updates and news.',
|
||||
},
|
||||
'playlist_count': 29,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@LBRYFoundation',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, claim_id, url, page):
|
||||
page += 1
|
||||
result = self._call_api_proxy(
|
||||
'claim_search', claim_id, {
|
||||
'channel_ids': [claim_id],
|
||||
'claim_type': 'stream',
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
'stream_types': self._SUPPORTED_STREAM_TYPES,
|
||||
}, 'page %d' % page)
|
||||
for item in (result.get('items') or []):
|
||||
stream_claim_name = item.get('name')
|
||||
stream_claim_id = item.get('claim_id')
|
||||
if not (stream_claim_name and stream_claim_id):
|
||||
continue
|
||||
|
||||
info = self._parse_stream(item, url)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'id': stream_claim_id,
|
||||
'title': try_get(item, lambda x: x['value']['title']),
|
||||
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
|
||||
})
|
||||
yield info
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._call_api_proxy(
|
||||
'resolve', display_id, {'urls': [uri]})[uri]
|
||||
result_value = result['value']
|
||||
if result_value.get('stream_type') not in ('video', 'audio'):
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', display_id, {'uri': uri})['streaming_url']
|
||||
source = result_value.get('source') or {}
|
||||
media = result_value.get('video') or result_value.get('audio') or {}
|
||||
signing_channel = result_value.get('signing_channel') or {}
|
||||
|
||||
return {
|
||||
'id': result['claim_id'],
|
||||
'title': result_value['title'],
|
||||
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': result_value.get('description'),
|
||||
'license': result_value.get('license'),
|
||||
'timestamp': int_or_none(result.get('timestamp')),
|
||||
'tags': result_value.get('tags'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': signing_channel.get('name'),
|
||||
'channel_id': signing_channel.get('claim_id'),
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'url': streaming_url,
|
||||
}
|
||||
result = self._resolve_url(
|
||||
'lbry://' + display_id, display_id, 'channel')
|
||||
claim_id = result['claim_id']
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, claim_id, url),
|
||||
self._PAGE_SIZE)
|
||||
result_value = result.get('value') or {}
|
||||
return self.playlist_result(
|
||||
entries, claim_id, result_value.get('title'),
|
||||
result_value.get('description'))
|
||||
|
||||
@@ -8,11 +8,15 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
orderedSet,
|
||||
unescapeHTML,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@@ -28,11 +32,15 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
|
||||
'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675',
|
||||
'info_dict': {
|
||||
'id': '1498-2',
|
||||
'id': '7971-2',
|
||||
'ext': 'mp4',
|
||||
'title': "Introduction to the Practitioner's Brief",
|
||||
'title': 'What Is Data Science',
|
||||
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
|
||||
'timestamp': 1607387907,
|
||||
'upload_date': '20201208',
|
||||
'duration': 304,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -46,7 +54,8 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '154',
|
||||
'title': 'AWS Certified Cloud Practitioner',
|
||||
'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
|
||||
'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c',
|
||||
'duration': 28835,
|
||||
},
|
||||
'playlist_count': 41,
|
||||
'skip': 'Requires Linux Academy account credentials',
|
||||
@@ -74,6 +83,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
|
||||
'client_id': self._CLIENT_ID,
|
||||
'response_type': 'token id_token',
|
||||
'response_mode': 'web_message',
|
||||
'redirect_uri': self._ORIGIN_URL,
|
||||
'scope': 'openid email user_impersonation profile',
|
||||
'audience': self._ORIGIN_URL,
|
||||
@@ -129,7 +139,13 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
|
||||
access_token = self._search_regex(
|
||||
r'access_token=([^=&]+)', urlh.geturl(),
|
||||
'access token')
|
||||
'access token', default=None)
|
||||
if not access_token:
|
||||
access_token = self._parse_json(
|
||||
self._search_regex(
|
||||
r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page,
|
||||
'authorization response'), None,
|
||||
transform_source=js_to_json)['response']['access_token']
|
||||
|
||||
self._download_webpage(
|
||||
'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
|
||||
@@ -144,30 +160,84 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
|
||||
# course path
|
||||
if course_id:
|
||||
entries = [
|
||||
self.url_result(
|
||||
urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
|
||||
for lesson_url in orderedSet(re.findall(
|
||||
r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
|
||||
webpage))]
|
||||
title = unescapeHTML(self._html_search_regex(
|
||||
(r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
|
||||
r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
|
||||
webpage, 'title', default=None, group='value'))
|
||||
description = unescapeHTML(self._html_search_regex(
|
||||
r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, 'description', default=None, group='value'))
|
||||
return self.playlist_result(entries, course_id, title, description)
|
||||
module = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
|
||||
item_id)
|
||||
entries = []
|
||||
chapter_number = None
|
||||
chapter = None
|
||||
chapter_id = None
|
||||
for item in module['items']:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
def type_field(key):
|
||||
return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower()
|
||||
type_fields = (type_field('name'), type_field('slug'))
|
||||
# Move to next module section
|
||||
if 'section' in type_fields:
|
||||
chapter = item.get('course_name')
|
||||
chapter_id = item.get('course_module')
|
||||
chapter_number = 1 if not chapter_number else chapter_number + 1
|
||||
continue
|
||||
# Skip non-lessons
|
||||
if 'lesson' not in type_fields:
|
||||
continue
|
||||
lesson_url = urljoin(url, item.get('url'))
|
||||
if not lesson_url:
|
||||
continue
|
||||
title = item.get('title') or item.get('lesson_name')
|
||||
description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text'))
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': lesson_url,
|
||||
'ie_key': LinuxAcademyIE.ie_key(),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')),
|
||||
'duration': parse_duration(item.get('duration')),
|
||||
'chapter': chapter,
|
||||
'chapter_id': chapter_id,
|
||||
'chapter_number': chapter_number,
|
||||
})
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': course_id,
|
||||
'title': module.get('title'),
|
||||
'description': module.get('md_desc') or clean_html(module.get('desc')),
|
||||
'duration': parse_duration(module.get('duration')),
|
||||
}
|
||||
|
||||
# single video path
|
||||
info = self._extract_jwplayer_data(
|
||||
webpage, item_id, require_title=False, m3u8_id='hls',)
|
||||
title = self._search_regex(
|
||||
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
||||
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'title', group='value')
|
||||
info.update({
|
||||
m3u8_url = self._parse_json(
|
||||
self._search_regex(
|
||||
r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'),
|
||||
item_id)[0]['file']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
info = {
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
})
|
||||
'formats': formats,
|
||||
}
|
||||
lesson = self._parse_json(
|
||||
self._search_regex(
|
||||
(r'window\.lesson\s*=\s*({.+?})\s*;',
|
||||
r'player\.lesson\s*=\s*({.+?})\s*;'),
|
||||
webpage, 'lesson', default='{}'), item_id, fatal=False)
|
||||
if lesson:
|
||||
info.update({
|
||||
'title': lesson.get('lesson_name'),
|
||||
'description': lesson.get('md_desc') or clean_html(lesson.get('desc')),
|
||||
'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')),
|
||||
'duration': parse_duration(lesson.get('duration')),
|
||||
})
|
||||
if not info.get('title'):
|
||||
info['title'] = self._search_regex(
|
||||
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
||||
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'title', group='value')
|
||||
return info
|
||||
|
||||
@@ -2,12 +2,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -16,6 +20,8 @@ class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
# MDR regularly deletes its videos
|
||||
'url': 'http://www.mdr.de/fakt/video189002.html',
|
||||
@@ -66,6 +72,22 @@ class MDRIE(InfoExtractor):
|
||||
'duration': 3239,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
# empty bitrateVideo and bitrateAudio
|
||||
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
|
||||
'info_dict': {
|
||||
'id': '128372',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der kleine Wichtel kehrt zurück',
|
||||
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
|
||||
'duration': 4876,
|
||||
'timestamp': 1607823300,
|
||||
'upload_date': '20201213',
|
||||
'uploader': 'ZDF',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
@@ -91,10 +113,13 @@ class MDRIE(InfoExtractor):
|
||||
|
||||
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
|
||||
|
||||
type_ = xpath_text(doc, './type', default=None)
|
||||
|
||||
formats = []
|
||||
processed_urls = []
|
||||
for asset in doc.findall('./assets/asset'):
|
||||
for source in (
|
||||
'download',
|
||||
'progressiveDownload',
|
||||
'dynamicHttpStreamingRedirector',
|
||||
'adaptiveHttpStreamingRedirector'):
|
||||
@@ -102,63 +127,49 @@ class MDRIE(InfoExtractor):
|
||||
if url_el is None:
|
||||
continue
|
||||
|
||||
video_url = url_el.text
|
||||
if video_url in processed_urls:
|
||||
video_url = url_or_none(url_el.text)
|
||||
if not video_url or video_url in processed_urls:
|
||||
continue
|
||||
|
||||
processed_urls.append(video_url)
|
||||
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||
|
||||
ext = determine_ext(url_el.text)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
url_formats = self._extract_m3u8_formats(
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=0, m3u8_id='HLS', fatal=False)
|
||||
preference=0, m3u8_id='HLS', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
url_formats = self._extract_f4m_formats(
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
|
||||
preference=0, f4m_id='HDS', fatal=False)
|
||||
preference=0, f4m_id='HDS', fatal=False))
|
||||
else:
|
||||
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
|
||||
|
||||
format_id = [media_type]
|
||||
if vbr or abr:
|
||||
format_id.append(compat_str(vbr or abr))
|
||||
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%s-%d' % (media_type, vbr or abr),
|
||||
'format_id': '-'.join(format_id),
|
||||
'filesize': filesize,
|
||||
'abr': abr,
|
||||
'preference': 1,
|
||||
'vbr': vbr,
|
||||
}
|
||||
|
||||
if vbr:
|
||||
width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
|
||||
height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
|
||||
f.update({
|
||||
'vbr': vbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
|
||||
'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
|
||||
})
|
||||
|
||||
url_formats = [f]
|
||||
if type_ == 'audio':
|
||||
f['vcodec'] = 'none'
|
||||
|
||||
if not url_formats:
|
||||
continue
|
||||
|
||||
if not vbr:
|
||||
for f in url_formats:
|
||||
abr = f.get('tbr') or abr
|
||||
if 'tbr' in f:
|
||||
del f['tbr']
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
formats.extend(url_formats)
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
https?://
|
||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||
(?:
|
||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
|
||||
player/index\.html\?.*?\bprogramGuid=
|
||||
)
|
||||
)(?P<id>[0-9A-Z]{16,})
|
||||
@@ -88,6 +88,9 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -1,15 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .telecinco import TelecincoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
class MiTeleIE(TelecincoIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
|
||||
@@ -31,7 +30,6 @@ class MiTeleIE(InfoExtractor):
|
||||
'timestamp': 1471209401,
|
||||
'upload_date': '20160814',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
@@ -54,7 +52,6 @@ class MiTeleIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||
'only_matching': True,
|
||||
@@ -70,16 +67,11 @@ class MiTeleIE(InfoExtractor):
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
||||
webpage, 'Pre Player'), display_id)['prePlayer']
|
||||
title = pre_player['title']
|
||||
video = pre_player['video']
|
||||
video_id = video['dataMediaId']
|
||||
video_info = self._parse_content(pre_player['video'], url)
|
||||
content = pre_player.get('content') or {}
|
||||
info = content.get('info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
# for some reason only HLS is supported
|
||||
'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
|
||||
'id': video_id,
|
||||
video_info.update({
|
||||
'title': title,
|
||||
'description': info.get('synopsis'),
|
||||
'series': content.get('title'),
|
||||
@@ -87,7 +79,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'episode': content.get('subtitle'),
|
||||
'episode_number': int_or_none(info.get('episode_number')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'thumbnail': video.get('dataPoster'),
|
||||
'age_limit': int_or_none(info.get('rating')),
|
||||
'timestamp': parse_iso8601(pre_player.get('publishedTime')),
|
||||
}
|
||||
})
|
||||
return video_info
|
||||
|
||||
@@ -5,33 +5,137 @@ import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
OnDemandPagedList,
|
||||
remove_start,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
class NBACVPBaseIE(TurnerBaseIE):
|
||||
def _extract_nba_cvp_info(self, path, video_id, fatal=False):
|
||||
return self._extract_cvp_info(
|
||||
'http://secure.nba.com/%s' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
}, fatal=fatal)
|
||||
|
||||
|
||||
class NBAWatchBaseIE(NBACVPBaseIE):
|
||||
_VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
|
||||
|
||||
def _extract_video(self, filter_key, filter_value):
|
||||
video = self._download_json(
|
||||
'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
|
||||
filter_value, query={
|
||||
'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
|
||||
'q': filter_key + ':' + filter_value,
|
||||
'wt': 'json',
|
||||
})['response']['docs'][0]
|
||||
|
||||
video_id = str(video['pid'])
|
||||
title = video['name']
|
||||
|
||||
formats = []
|
||||
m3u8_url = (self._download_json(
|
||||
'https://watch.nba.com/service/publishpoint', video_id, query={
|
||||
'type': 'video',
|
||||
'format': 'json',
|
||||
'id': video_id,
|
||||
}, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
|
||||
}, fatal=False) or {}).get('path')
|
||||
if m3u8_url:
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
for f in m3u8_formats:
|
||||
http_f = f.copy()
|
||||
http_f.update({
|
||||
'format_id': http_f['format_id'].replace('hls-', 'http-'),
|
||||
'protocol': 'http',
|
||||
'url': http_f['url'].replace('.m3u8', ''),
|
||||
})
|
||||
formats.append(http_f)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('runtime')),
|
||||
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||
'tags': video.get('tags'),
|
||||
}
|
||||
|
||||
seo_name = video.get('seoName')
|
||||
if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
|
||||
base_path = ''
|
||||
if seo_name.startswith('teams/'):
|
||||
base_path += seo_name.split('/')[1] + '/'
|
||||
base_path += 'video/'
|
||||
cvp_info = self._extract_nba_cvp_info(
|
||||
base_path + seo_name + '.xml', video_id, False)
|
||||
if cvp_info:
|
||||
formats.extend(cvp_info['formats'])
|
||||
info = merge_dicts(info, cvp_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||
IENAME = 'nba:watch:embed'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.nba.com/embed?id=659395',
|
||||
'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
|
||||
'info_dict': {
|
||||
'id': '659395',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||
'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||
'timestamp': 1492228800,
|
||||
'upload_date': '20170415',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video('pid', video_id)
|
||||
|
||||
|
||||
class NBAWatchIE(NBAWatchBaseIE):
|
||||
IE_NAME = 'nba:watch'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||
'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap',
|
||||
'id': '70946',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
'timestamp': 1354638466,
|
||||
'timestamp': 1354597200,
|
||||
'upload_date': '20121204',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||
'only_matching': True,
|
||||
@@ -39,116 +143,286 @@ class NBAIE(TurnerBaseIE):
|
||||
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
|
||||
'info_dict': {
|
||||
'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'id': '330865',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hawks vs. Cavaliers Game 1',
|
||||
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
||||
'duration': 228,
|
||||
'timestamp': 1432134543,
|
||||
'upload_date': '20150520',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
||||
'info_dict': {
|
||||
'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
|
||||
'ext': 'mp4',
|
||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
||||
'upload_date': '20160216',
|
||||
'timestamp': 1455672000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'timberwolves',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'params': {
|
||||
# Download the whole playlist takes too long time
|
||||
'playlist_items': '1-30',
|
||||
'timestamp': 1432094400,
|
||||
'upload_date': '20150521',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
||||
'upload_date': '20141212',
|
||||
'timestamp': 1418418600,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# only CVP mp4 format available
|
||||
'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
|
||||
if collection_id:
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
|
||||
return self.url_result(
|
||||
'https://www.nba.com/watch/list/collection/' + collection_id,
|
||||
NBAWatchCollectionIE.ie_key(), collection_id)
|
||||
return self._extract_video('seoName', display_id)
|
||||
|
||||
def _fetch_page(self, team, video_id, page):
|
||||
search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({
|
||||
'type': 'teamvideo',
|
||||
'start': page * self._PAGE_SIZE + 1,
|
||||
'npp': (page + 1) * self._PAGE_SIZE + 1,
|
||||
'sort': 'recent',
|
||||
'output': 'json',
|
||||
'site': team,
|
||||
})
|
||||
results = self._download_json(
|
||||
search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
|
||||
for item in results:
|
||||
yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
|
||||
|
||||
def _extract_playlist(self, orig_path, video_id, webpage):
|
||||
team = orig_path.split('/')[0]
|
||||
class NBAWatchCollectionIE(NBAWatchBaseIE):
|
||||
IE_NAME = 'nba:watch:collection'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.nba.com/list/collection/season-preview-2020',
|
||||
'info_dict': {
|
||||
'id': 'season-preview-2020',
|
||||
},
|
||||
'playlist_mincount': 43,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video because of --no-playlist')
|
||||
video_path = self._search_regex(
|
||||
r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
|
||||
video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
|
||||
return self.url_result(video_url)
|
||||
|
||||
self.to_screen('Downloading playlist - add --no-playlist to just download video')
|
||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, video_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, team, playlist_title)
|
||||
def _fetch_page(self, collection_id, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
|
||||
collection_id, 'Downloading page %d JSON metadata' % page, query={
|
||||
'count': self._PAGE_SIZE,
|
||||
'page': page,
|
||||
})['results']['videos']
|
||||
for video in videos:
|
||||
program = video.get('program') or {}
|
||||
seo_name = program.get('seoName') or program.get('slug')
|
||||
if not seo_name:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': program.get('id'),
|
||||
'title': program.get('title') or video.get('title'),
|
||||
'url': 'https://www.nba.com/watch/video/' + seo_name,
|
||||
'thumbnail': video.get('image'),
|
||||
'description': program.get('description') or video.get('description'),
|
||||
'duration': parse_duration(program.get('runtimeHours')),
|
||||
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
orig_path = path
|
||||
if path.startswith('nba/'):
|
||||
path = path[3:]
|
||||
collection_id = self._match_id(url)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, collection_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, collection_id)
|
||||
|
||||
if 'video/' not in path:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
|
||||
|
||||
if path == '{{id}}':
|
||||
return self._extract_playlist(orig_path, video_id, webpage)
|
||||
class NBABaseIE(NBACVPBaseIE):
|
||||
_VALID_URL_BASE = r'''(?x)
|
||||
https?://(?:www\.)?nba\.com/
|
||||
(?P<team>
|
||||
blazers|
|
||||
bucks|
|
||||
bulls|
|
||||
cavaliers|
|
||||
celtics|
|
||||
clippers|
|
||||
grizzlies|
|
||||
hawks|
|
||||
heat|
|
||||
hornets|
|
||||
jazz|
|
||||
kings|
|
||||
knicks|
|
||||
lakers|
|
||||
magic|
|
||||
mavericks|
|
||||
nets|
|
||||
nuggets|
|
||||
pacers|
|
||||
pelicans|
|
||||
pistons|
|
||||
raptors|
|
||||
rockets|
|
||||
sixers|
|
||||
spurs|
|
||||
suns|
|
||||
thunder|
|
||||
timberwolves|
|
||||
warriors|
|
||||
wizards
|
||||
)
|
||||
(?:/play\#)?/'''
|
||||
_CHANNEL_PATH_REGEX = r'video/channel|series'
|
||||
|
||||
# See prepareContentId() of pkgCvp.js
|
||||
if path.startswith('video/teams'):
|
||||
path = 'video/channels/proxy/' + path[6:]
|
||||
def _embed_url_result(self, team, content_id):
|
||||
return self.url_result(update_url_query(
|
||||
'https://secure.nba.com/assets/amp/include/video/iframe.html', {
|
||||
'contentId': content_id,
|
||||
'team': team,
|
||||
}), NBAEmbedIE.ie_key())
|
||||
|
||||
return self._extract_cvp_info(
|
||||
'http://www.nba.com/%s.xml' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
def _call_api(self, team, content_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
|
||||
content_id, 'Download %s JSON metadata' % resource,
|
||||
query=query, headers={
|
||||
'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
|
||||
})['response']['result']
|
||||
|
||||
def _extract_video(self, video, team, extract_all=True):
|
||||
video_id = compat_str(video['nid'])
|
||||
team = video['brand']
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video.get('title') or video.get('headline') or video['shortHeadline'],
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('published')),
|
||||
}
|
||||
|
||||
subtitles = {}
|
||||
captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
|
||||
for caption_url in captions.values():
|
||||
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||
|
||||
formats = []
|
||||
mp4_url = video.get('mp4')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
if extract_all:
|
||||
source_url = video.get('videoSource')
|
||||
if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
|
||||
formats.append({
|
||||
'format_id': 'source',
|
||||
'url': source_url,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
m3u8_url = video.get('m3u8')
|
||||
if m3u8_url:
|
||||
if '.akamaihd.net/i/' in m3u8_url:
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
content_xml = video.get('contentXml')
|
||||
if team and content_xml:
|
||||
cvp_info = self._extract_nba_cvp_info(
|
||||
team + content_xml, video_id, fatal=False)
|
||||
if cvp_info:
|
||||
formats.extend(cvp_info['formats'])
|
||||
subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
|
||||
info = merge_dicts(info, cvp_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
info.update(self._embed_url_result(team, video['videoId']))
|
||||
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
team, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if '/play#/' in url:
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
else:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
display_id = self._search_regex(
|
||||
self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
|
||||
return self._extract_url_results(team, display_id)
|
||||
|
||||
|
||||
class NBAEmbedIE(NBABaseIE):
|
||||
IENAME = 'nba:embed'
|
||||
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
content_id = qs['contentId'][0]
|
||||
team = qs.get('team', [None])[0]
|
||||
if not team:
|
||||
return self.url_result(
|
||||
'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
|
||||
video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
|
||||
return self._extract_video(video, team)
|
||||
|
||||
|
||||
class NBAIE(NBABaseIE):
|
||||
IENAME = 'nba'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
|
||||
'info_dict': {
|
||||
'id': '45039',
|
||||
'ext': 'mp4',
|
||||
'title': 'AND WE BACK.',
|
||||
'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
|
||||
'duration': 94,
|
||||
'timestamp': 1607112000,
|
||||
'upload_date': '20201218',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CONTENT_ID_REGEX = r'videoID'
|
||||
|
||||
def _extract_url_results(self, team, content_id):
|
||||
return self._embed_url_result(team, content_id)
|
||||
|
||||
|
||||
class NBAChannelIE(NBABaseIE):
|
||||
IENAME = 'nba:channel'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nba.com/blazers/video/channel/summer_league',
|
||||
'info_dict': {
|
||||
'title': 'Summer League',
|
||||
},
|
||||
'playlist_mincount': 138,
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CONTENT_ID_REGEX = r'videoSubCategory'
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, team, channel, page):
|
||||
results = self._call_api(team, channel, {
|
||||
'channels': channel,
|
||||
'count': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in results:
|
||||
yield self._extract_video(video, team, False)
|
||||
|
||||
def _extract_url_results(self, team, content_id):
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, content_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, playlist_title=content_id)
|
||||
|
||||
@@ -158,7 +158,8 @@ class NBCIE(AdobePassIE):
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||
@@ -174,12 +175,15 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
iframe_m = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
||||
r'<(?:iframe[^>]+|div[^>]+data-(?:mpx-)?)src="(?P<url>%s[^"]+)"' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
|
||||
if iframe_m:
|
||||
return iframe_m.group('url')
|
||||
|
||||
@@ -192,21 +196,29 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCSportsIE(InfoExtractor):
|
||||
# Does not include https because its certificate is invalid
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# iframe src
|
||||
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||
'info_dict': {
|
||||
'id': 'PHJSaFWbrTY9',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'upload_date': '20150330',
|
||||
'timestamp': 1427726529,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# data-mpx-src
|
||||
'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data-src
|
||||
'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -274,33 +286,6 @@ class NBCSportsStreamIE(AdobePassIE):
|
||||
}
|
||||
|
||||
|
||||
class CSNNEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
|
||||
'info_dict': {
|
||||
'id': 'yvBLLUgQ8WU0',
|
||||
'ext': 'mp4',
|
||||
'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
|
||||
'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
|
||||
'timestamp': 1459369979,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': self._html_search_meta('twitter:player:stream', webpage),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
||||
|
||||
|
||||
@@ -4,19 +4,15 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
get_element_by_class,
|
||||
)
|
||||
|
||||
|
||||
class NFLIE(InfoExtractor):
|
||||
IE_NAME = 'nfl.com'
|
||||
_VALID_URL = r'''(?x)
|
||||
class NFLBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'''(?x)
|
||||
https?://
|
||||
(?P<host>
|
||||
(?:www\.)?
|
||||
@@ -34,15 +30,15 @@ class NFLIE(InfoExtractor):
|
||||
houstontexans|
|
||||
colts|
|
||||
jaguars|
|
||||
titansonline|
|
||||
(?:titansonline|tennesseetitans)|
|
||||
denverbroncos|
|
||||
kcchiefs|
|
||||
(?:kc)?chiefs|
|
||||
raiders|
|
||||
chargers|
|
||||
dallascowboys|
|
||||
giants|
|
||||
philadelphiaeagles|
|
||||
redskins|
|
||||
(?:redskins|washingtonfootball)|
|
||||
chicagobears|
|
||||
detroitlions|
|
||||
packers|
|
||||
@@ -52,180 +48,113 @@ class NFLIE(InfoExtractor):
|
||||
neworleanssaints|
|
||||
buccaneers|
|
||||
azcardinals|
|
||||
stlouisrams|
|
||||
(?:stlouis|the)rams|
|
||||
49ers|
|
||||
seahawks
|
||||
)\.com|
|
||||
.+?\.clubs\.nfl\.com
|
||||
)
|
||||
)/
|
||||
(?:.+?/)*
|
||||
(?P<id>[^/#?&]+)
|
||||
'''
|
||||
_VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+})'
|
||||
_WORKING = False
|
||||
|
||||
def _parse_video_config(self, video_config, display_id):
|
||||
video_config = self._parse_json(video_config, display_id)
|
||||
item = video_config['playlist'][0]
|
||||
mcp_id = item.get('mcpID')
|
||||
if mcp_id:
|
||||
info = self.url_result(
|
||||
'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + mcp_id,
|
||||
'Anvato', mcp_id)
|
||||
else:
|
||||
media_id = item.get('id') or item['entityId']
|
||||
title = item['title']
|
||||
item_url = item['url']
|
||||
info = {'id': media_id}
|
||||
ext = determine_ext(item_url)
|
||||
if ext == 'm3u8':
|
||||
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
|
||||
self._sort_formats(info['formats'])
|
||||
else:
|
||||
info['url'] = item_url
|
||||
if item.get('audio') is True:
|
||||
info['vcodec'] = 'none'
|
||||
is_live = video_config.get('live') is True
|
||||
thumbnails = None
|
||||
image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
|
||||
if image_url:
|
||||
thumbnails = [{
|
||||
'url': image_url,
|
||||
'ext': determine_ext(image_url, 'jpg'),
|
||||
}]
|
||||
info.update({
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'is_live': is_live,
|
||||
'description': clean_html(item.get('description')),
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class NFLIE(NFLBaseIE):
|
||||
IE_NAME = 'nfl.com'
|
||||
_VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
||||
'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000398478',
|
||||
'id': '899441',
|
||||
'ext': 'mp4',
|
||||
'title': 'Week 3: Redskins vs. Eagles highlights',
|
||||
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||
'upload_date': '20140921',
|
||||
'timestamp': 1411337580,
|
||||
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
|
||||
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||
'upload_date': '20201215',
|
||||
'timestamp': 1608009755,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'NFL',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
||||
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
|
||||
'md5': '6886b32c24b463038c760ceb55a34566',
|
||||
'info_dict': {
|
||||
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE: Post Game vs. Browns',
|
||||
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
|
||||
'upload_date': '20131229',
|
||||
'timestamp': 1388354455,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
|
||||
'ext': 'mp3',
|
||||
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
|
||||
'description': 'md5:12ada8ee70e6762658c30e223e095075',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000467607',
|
||||
'ext': 'mp4',
|
||||
'title': 'Frustrations flare on the field',
|
||||
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
||||
'timestamp': 1422850320,
|
||||
'upload_date': '20150202',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
|
||||
'md5': '4c319e2f625ffd0b481b4382c6fc124c',
|
||||
'info_dict': {
|
||||
'id': 'n-238346',
|
||||
'ext': 'mp4',
|
||||
'title': '10 Days at Gillette',
|
||||
'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
|
||||
'timestamp': 1442618809,
|
||||
'upload_date': '20150918',
|
||||
},
|
||||
}, {
|
||||
# lowercase data-contentid
|
||||
'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7',
|
||||
'info_dict': {
|
||||
'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tomlin looks ahead to Ravens on a short week',
|
||||
'description': 'md5:32f3f7b139f43913181d5cbb24ecad75',
|
||||
'timestamp': 1443459651,
|
||||
'upload_date': '20150928',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
||||
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
|
||||
'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def prepend_host(host, url):
|
||||
if not url.startswith('http'):
|
||||
if not url.startswith('/'):
|
||||
url = '/%s' % url
|
||||
url = 'http://{0:}{1:}'.format(host, url)
|
||||
return url
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return self._parse_video_config(self._search_regex(
|
||||
self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id)
|
||||
|
||||
@staticmethod
|
||||
def format_from_stream(stream, protocol, host, path_prefix='',
|
||||
preference=0, note=None):
|
||||
url = '{protocol:}://{host:}/{prefix:}{path:}'.format(
|
||||
protocol=protocol,
|
||||
host=host,
|
||||
prefix=path_prefix,
|
||||
path=stream.get('path'),
|
||||
)
|
||||
return {
|
||||
'url': url,
|
||||
'vbr': int_or_none(stream.get('rate', 0), 1000),
|
||||
'preference': preference,
|
||||
'format_note': note,
|
||||
}
|
||||
|
||||
class NFLArticleIE(NFLBaseIE):
|
||||
IE_NAME = 'nfl.com:article'
|
||||
_VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
|
||||
'info_dict': {
|
||||
'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
|
||||
'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations",
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, host = mobj.group('id'), mobj.group('host')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||
r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
|
||||
webpage, 'config URL', default='static/content/static/config/video/config.json',
|
||||
group='config'))
|
||||
# For articles, the id in the url is not the video id
|
||||
video_id = self._search_regex(
|
||||
r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=video_id, group='id')
|
||||
config = self._download_json(config_url, video_id, 'Downloading player config')
|
||||
url_template = NFLIE.prepend_host(
|
||||
host, '{contentURLTemplate:}'.format(**config))
|
||||
video_data = self._download_json(
|
||||
url_template.format(id=video_id), video_id)
|
||||
|
||||
formats = []
|
||||
cdn_data = video_data.get('cdnData', {})
|
||||
streams = cdn_data.get('bitrateInfo', [])
|
||||
if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM':
|
||||
parts = compat_urllib_parse_urlparse(cdn_data.get('uri'))
|
||||
protocol, host = parts.scheme, parts.netloc
|
||||
for stream in streams:
|
||||
formats.append(
|
||||
NFLIE.format_from_stream(stream, protocol, host))
|
||||
else:
|
||||
cdns = config.get('cdns')
|
||||
if not cdns:
|
||||
raise ExtractorError('Failed to get CDN data', expected=True)
|
||||
|
||||
for name, cdn in cdns.items():
|
||||
# LimeLight streams don't seem to work
|
||||
if cdn.get('name') == 'LIMELIGHT':
|
||||
continue
|
||||
|
||||
protocol = cdn.get('protocol')
|
||||
host = remove_end(cdn.get('host', ''), '/')
|
||||
if not (protocol and host):
|
||||
continue
|
||||
|
||||
prefix = cdn.get('pathprefix', '')
|
||||
if prefix and not prefix.endswith('/'):
|
||||
prefix = '%s/' % prefix
|
||||
|
||||
preference = 0
|
||||
if protocol == 'rtmp':
|
||||
preference = -2
|
||||
elif 'prog' in name.lower():
|
||||
preference = 1
|
||||
|
||||
for stream in streams:
|
||||
formats.append(
|
||||
NFLIE.format_from_stream(stream, protocol, host,
|
||||
prefix, preference, name))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = None
|
||||
for q in ('xl', 'l', 'm', 's', 'xs'):
|
||||
thumbnail = video_data.get('imagePaths', {}).get(q)
|
||||
if thumbnail:
|
||||
break
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data.get('headline'),
|
||||
'formats': formats,
|
||||
'description': video_data.get('caption'),
|
||||
'duration': video_data.get('duration'),
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': int_or_none(video_data.get('posted'), 1000),
|
||||
}
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
entries = []
|
||||
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
||||
entries.append(self._parse_video_config(video_config, display_id))
|
||||
title = clean_html(get_element_by_class(
|
||||
'nfl-c-article__title', webpage)) or self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage)
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
|
||||
@@ -3,51 +3,33 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urljoin
|
||||
|
||||
|
||||
class NhkVodIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)'
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
||||
'info_dict': {
|
||||
'id': 'a95j5iza',
|
||||
'ext': 'mp4',
|
||||
'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
|
||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||
'timestamp': 1565965194,
|
||||
'upload_date': '20190816',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
|
||||
class NhkBaseIE(InfoExtractor):
|
||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
|
||||
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
|
||||
_TYPE_REGEX = r'/(?P<type>video|audio)/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
|
||||
return self._download_json(
|
||||
self._API_URL_TEMPLATE % (
|
||||
'v' if is_video else 'r',
|
||||
'clip' if is_clip else 'esd',
|
||||
'episode' if is_episode else 'program',
|
||||
m_id, lang, '/all' if is_video else ''),
|
||||
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
|
||||
|
||||
def _extract_episode_info(self, url, episode=None):
|
||||
fetch_episode = episode is None
|
||||
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
||||
if episode_id.isdigit():
|
||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||
|
||||
is_video = m_type == 'video'
|
||||
episode = self._download_json(
|
||||
self._API_URL_TEMPLATE % (
|
||||
'v' if is_video else 'r',
|
||||
'clip' if episode_id[:4] == '9999' else 'esd',
|
||||
episode_id, lang, '/all' if is_video else ''),
|
||||
episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
|
||||
if fetch_episode:
|
||||
episode = self._call_api(
|
||||
episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
|
||||
title = episode.get('sub_title_clean') or episode['sub_title']
|
||||
|
||||
def get_clean_field(key):
|
||||
@@ -76,18 +58,121 @@ class NhkVodIE(InfoExtractor):
|
||||
'episode': title,
|
||||
}
|
||||
if is_video:
|
||||
vod_id = episode['vod_id']
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Piksel',
|
||||
'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
|
||||
'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id,
|
||||
'id': vod_id,
|
||||
})
|
||||
else:
|
||||
audio = episode['audio']
|
||||
audio_path = audio['audio']
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
|
||||
episode_id, 'm4a', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in info['formats']:
|
||||
f['language'] = lang
|
||||
if fetch_episode:
|
||||
audio_path = episode['audio']['audio']
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
|
||||
episode_id, 'm4a', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in info['formats']:
|
||||
f['language'] = lang
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': NhkVodIE.ie_key(),
|
||||
'url': url,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class NhkVodIE(NhkBaseIE):
|
||||
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
# video clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
|
||||
'info_dict': {
|
||||
'id': 'a95j5iza',
|
||||
'ext': 'mp4',
|
||||
'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
|
||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||
'timestamp': 1565965194,
|
||||
'upload_date': '20190816',
|
||||
},
|
||||
}, {
|
||||
# audio clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
|
||||
'info_dict': {
|
||||
'id': 'r_inventions-20201104-1-en',
|
||||
'ext': 'm4a',
|
||||
'title': "Japan's Top Inventions - Miniature Video Cameras",
|
||||
'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_episode_info(url)
|
||||
|
||||
|
||||
class NhkVodProgramIE(NhkBaseIE):
|
||||
_VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||
_TESTS = [{
|
||||
# video program episodes
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
|
||||
'info_dict': {
|
||||
'id': 'japanrailway',
|
||||
'title': 'Japan Railway Journal',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
# video program clips
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
|
||||
'info_dict': {
|
||||
'id': 'japanrailway',
|
||||
'title': 'Japan Railway Journal',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio program
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, m_type, program_id, episode_type = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
episodes = self._call_api(
|
||||
program_id, lang, m_type == 'video', False, episode_type == 'clip')
|
||||
|
||||
entries = []
|
||||
for episode in episodes:
|
||||
episode_path = episode.get('url')
|
||||
if not episode_path:
|
||||
continue
|
||||
entries.append(self._extract_episode_info(
|
||||
urljoin(url, episode_path), episode))
|
||||
|
||||
program_title = None
|
||||
if entries:
|
||||
program_title = entries[0].get('series')
|
||||
|
||||
return self.playlist_result(entries, program_id, program_title)
|
||||
|
||||
@@ -1,20 +1,23 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import datetime
|
||||
import functools
|
||||
import json
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
@@ -181,7 +184,7 @@ class NiconicoIE(InfoExtractor):
|
||||
if urlh is False:
|
||||
login_ok = False
|
||||
else:
|
||||
parts = compat_urlparse.urlparse(urlh.geturl())
|
||||
parts = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||
login_ok = False
|
||||
if not login_ok:
|
||||
@@ -292,7 +295,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
|
||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
||||
flv_info = compat_parse_qs(flv_info_webpage)
|
||||
if 'url' not in flv_info:
|
||||
if 'deleted' in flv_info:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
@@ -437,34 +440,76 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
|
||||
class NiconicoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||
'info_dict': {
|
||||
'id': '27411728',
|
||||
'title': 'AKB48のオールナイトニッポン',
|
||||
'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
|
||||
'uploader': 'のっく',
|
||||
'uploader_id': '805442',
|
||||
},
|
||||
'playlist_mincount': 225,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _call_api(self, list_id, resource, query):
|
||||
return self._download_json(
|
||||
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||
'Downloading %s JSON metatdata' % resource, query=query,
|
||||
headers={'X-Frontend-Id': 6})['data']['mylist']
|
||||
|
||||
def _parse_owner(self, item):
|
||||
owner = item.get('owner') or {}
|
||||
if owner:
|
||||
return {
|
||||
'uploader': owner.get('name'),
|
||||
'uploader_id': owner.get('id'),
|
||||
}
|
||||
return {}
|
||||
|
||||
def _fetch_page(self, list_id, page):
|
||||
page += 1
|
||||
items = self._call_api(list_id, 'page %d' % page, {
|
||||
'page': page,
|
||||
'pageSize': self._PAGE_SIZE,
|
||||
})['items']
|
||||
for item in items:
|
||||
video = item.get('video') or {}
|
||||
video_id = video.get('id')
|
||||
if not video_id:
|
||||
continue
|
||||
count = video.get('count') or {}
|
||||
get_count = lambda x: int_or_none(count.get(x))
|
||||
info = {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
'description': video.get('shortDescription'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'view_count': get_count('view'),
|
||||
'comment_count': get_count('comment'),
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
}
|
||||
info.update(self._parse_owner(video))
|
||||
yield info
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
|
||||
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
|
||||
webpage, 'entries')
|
||||
entries = json.loads(entries_json)
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
||||
entry['item_data']['video_id']),
|
||||
} for entry in entries]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'),
|
||||
'id': list_id,
|
||||
'entries': entries,
|
||||
}
|
||||
mylist = self._call_api(list_id, 'list', {
|
||||
'pageSize': 1,
|
||||
})
|
||||
entries = InAdvancePagedList(
|
||||
functools.partial(self._fetch_page, list_id),
|
||||
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
||||
self._PAGE_SIZE)
|
||||
result = self.playlist_result(
|
||||
entries, list_id, mylist.get('name'), mylist.get('description'))
|
||||
result.update(self._parse_owner(mylist))
|
||||
return result
|
||||
|
||||
@@ -5,10 +5,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +36,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
'$include': '[HasClosedCaptions]',
|
||||
})
|
||||
|
||||
if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
|
||||
if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
manifest_base_url = content_package_url + 'manifest.'
|
||||
@@ -52,7 +53,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for image in content.get('Images', []):
|
||||
for image in (content.get('Images') or []):
|
||||
image_url = image.get('Url')
|
||||
if not image_url:
|
||||
continue
|
||||
@@ -70,7 +71,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
continue
|
||||
container.append(e_name)
|
||||
|
||||
season = content.get('Season', {})
|
||||
season = content.get('Season') or {}
|
||||
|
||||
info = {
|
||||
'id': content_id,
|
||||
@@ -79,13 +80,14 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'episode_number': int_or_none(content.get('Episode')),
|
||||
'season': season.get('Name'),
|
||||
'season_number': season.get('Number'),
|
||||
'season_number': int_or_none(season.get('Number')),
|
||||
'season_id': season.get('Id'),
|
||||
'series': content.get('Media', {}).get('Name'),
|
||||
'series': try_get(content, lambda x: x['Media']['Name']),
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
'duration': float_or_none(content_package.get('Duration')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
if content_package.get('HasClosedCaptions'):
|
||||
|
||||
@@ -1,235 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'https://noco.tv/do.php'
|
||||
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
|
||||
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||
'md5': '0a993f0058ddbcd902630b2047ef710e',
|
||||
'info_dict': {
|
||||
'id': '11538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ami Ami Idol - Hello! France',
|
||||
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
|
||||
'upload_date': '20140412',
|
||||
'uploader': 'Nolife',
|
||||
'uploader_id': 'NOL',
|
||||
'duration': 2851.2,
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
},
|
||||
{
|
||||
'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
|
||||
'md5': 'c190f1f48e313c55838f1f412225934d',
|
||||
'info_dict': {
|
||||
'id': '12610',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Guild #1 - Wake-Up Call',
|
||||
'timestamp': 1403863200,
|
||||
'upload_date': '20140627',
|
||||
'uploader': 'LBL42',
|
||||
'uploader_id': 'LBL',
|
||||
'duration': 233.023,
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login = self._download_json(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'a': 'login',
|
||||
'cookie': '1',
|
||||
'username': username,
|
||||
'password': password,
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
})
|
||||
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
@staticmethod
|
||||
def _ts():
|
||||
return int(time.time() * 1000)
|
||||
|
||||
def _call_api(self, path, video_id, note, sub_lang=None):
|
||||
ts = compat_str(self._ts() + self._ts_offset)
|
||||
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
||||
if sub_lang:
|
||||
url += self._SUB_LANG_TEMPLATE % sub_lang
|
||||
|
||||
request = sanitized_Request(url)
|
||||
request.add_header('Referer', self._referer)
|
||||
|
||||
resp = self._download_json(request, video_id, note)
|
||||
|
||||
if isinstance(resp, dict) and resp.get('error'):
|
||||
self._raise_error(resp['error'], resp['description'])
|
||||
|
||||
return resp
|
||||
|
||||
def _raise_error(self, error, description):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (self.IE_NAME, error, description),
|
||||
expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Timestamp adjustment offset between server time and local time
|
||||
# must be calculated in order to use timestamps closest to server's
|
||||
# in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_url = self._search_regex(
|
||||
r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1',
|
||||
webpage, 'noco player', group='player',
|
||||
default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf')
|
||||
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
|
||||
ts = int_or_none(qs.get('ts', [None])[0])
|
||||
self._ts_offset = ts - self._ts() if ts else 0
|
||||
self._referer = player_url
|
||||
|
||||
medias = self._call_api(
|
||||
'shows/%s/medias' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
show = self._call_api(
|
||||
'shows/by_id/%s' % video_id,
|
||||
video_id, 'Downloading show JSON')[0]
|
||||
|
||||
options = self._call_api(
|
||||
'users/init', video_id,
|
||||
'Downloading user options JSON')['options']
|
||||
audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
|
||||
|
||||
if audio_lang_pref == 'original':
|
||||
audio_lang_pref = show['original_lang']
|
||||
if len(medias) == 1:
|
||||
audio_lang_pref = list(medias.keys())[0]
|
||||
elif audio_lang_pref not in medias:
|
||||
audio_lang_pref = 'fr'
|
||||
|
||||
qualities = self._call_api(
|
||||
'qualities',
|
||||
video_id, 'Downloading qualities JSON')
|
||||
|
||||
formats = []
|
||||
|
||||
for audio_lang, audio_lang_dict in medias.items():
|
||||
preference = 1 if audio_lang == audio_lang_pref else 0
|
||||
for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
|
||||
for format_id, fmt in lang_dict['quality_list'].items():
|
||||
format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
|
||||
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
|
||||
video_id, 'Downloading %s video JSON' % format_id_extended,
|
||||
sub_lang if sub_lang != 'none' else None)
|
||||
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id_extended,
|
||||
'width': int_or_none(fmt.get('res_width')),
|
||||
'height': int_or_none(fmt.get('res_lines')),
|
||||
'abr': int_or_none(fmt.get('audiobitrate'), 1000),
|
||||
'vbr': int_or_none(fmt.get('videobitrate'), 1000),
|
||||
'filesize': int_or_none(fmt.get('filesize')),
|
||||
'format_note': qualities[format_id].get('quality_name'),
|
||||
'quality': qualities[format_id].get('priority'),
|
||||
'preference': preference,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
|
||||
|
||||
if timestamp is not None and timestamp < 0:
|
||||
timestamp = None
|
||||
|
||||
uploader = show.get('partner_name')
|
||||
uploader_id = show.get('partner_key')
|
||||
duration = float_or_none(show.get('duration_ms'), 1000)
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_key, thumbnail_url in show.items():
|
||||
m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
|
||||
if not m:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
|
||||
episode = show.get('show_TT') or show.get('show_OT')
|
||||
family = show.get('family_TT') or show.get('family_OT')
|
||||
episode_number = show.get('episode_number')
|
||||
|
||||
title = ''
|
||||
if family:
|
||||
title += family
|
||||
if episode_number:
|
||||
title += ' #' + compat_str(episode_number)
|
||||
if episode:
|
||||
title += ' - ' + compat_str(episode)
|
||||
|
||||
description = show.get('show_resume') or show.get('family_resume')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -541,6 +541,10 @@ class PeerTubeIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'filesize': file_size,
|
||||
})
|
||||
if format_id == '0p':
|
||||
f['vcodec'] = 'none'
|
||||
else:
|
||||
f['fps'] = int_or_none(file_.get('fps'))
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -6,16 +6,33 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class PikselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
(?:
|
||||
player\.
|
||||
(?:
|
||||
olympusattelecom|
|
||||
vibebyvista
|
||||
)|
|
||||
(?:api|player)\.multicastmedia|
|
||||
(?:api-ovp|player)\.piksel
|
||||
)\.com|
|
||||
(?:
|
||||
mz-edge\.stream\.co|
|
||||
movie-s\.nhk\.or
|
||||
)\.jp|
|
||||
vidego\.baltimorecity\.gov
|
||||
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.piksel.com/v/ums2867l',
|
||||
@@ -56,46 +73,41 @@ class PikselIE(InfoExtractor):
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _call_api(self, app_token, resource, display_id, query, fatal=True):
|
||||
response = (self._download_json(
|
||||
'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
|
||||
display_id, query=query, fatal=fatal) or {}).get('response')
|
||||
failure = try_get(response, lambda x: x['failure']['reason'])
|
||||
if failure:
|
||||
if fatal:
|
||||
raise ExtractorError(failure, expected=True)
|
||||
self.report_warning(failure)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
ref_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-de-program-uuid=[\'"]([a-z0-9]+)',
|
||||
webpage, 'program uuid', default=display_id)
|
||||
app_token = self._search_regex([
|
||||
r'clientAPI\s*:\s*"([^"]+)"',
|
||||
r'data-de-api-key\s*=\s*"([^"]+)"'
|
||||
], webpage, 'app token')
|
||||
response = self._download_json(
|
||||
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
|
||||
video_id, query={
|
||||
'v': video_id
|
||||
})['response']
|
||||
failure = response.get('failure')
|
||||
if failure:
|
||||
raise ExtractorError(response['failure']['reason'], expected=True)
|
||||
video_data = response['WsProgramResponse']['program']['asset']
|
||||
query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
|
||||
program = self._call_api(
|
||||
app_token, 'program', display_id, query)['WsProgramResponse']['program']
|
||||
video_id = program['uuid']
|
||||
video_data = program['asset']
|
||||
title = video_data['title']
|
||||
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = dict_get(video_data, [
|
||||
'm3u8iPadURL',
|
||||
'ipadM3u8Url',
|
||||
'm3u8AndroidURL',
|
||||
'm3u8iPhoneURL',
|
||||
'iphoneM3u8Url'])
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||
for asset_file in video_data.get('assetFiles', []):
|
||||
def process_asset_file(asset_file):
|
||||
if not asset_file:
|
||||
return
|
||||
# TODO: extract rtmp formats
|
||||
http_url = asset_file.get('http_url')
|
||||
if not http_url:
|
||||
continue
|
||||
return
|
||||
tbr = None
|
||||
vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
|
||||
abr = int_or_none(asset_file.get('audioBitrate'), 1024)
|
||||
@@ -118,6 +130,43 @@ class PikselIE(InfoExtractor):
|
||||
'filesize': int_or_none(asset_file.get('filesize')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
def process_asset_files(asset_files):
|
||||
for asset_file in (asset_files or []):
|
||||
process_asset_file(asset_file)
|
||||
|
||||
process_asset_files(video_data.get('assetFiles'))
|
||||
process_asset_file(video_data.get('referenceFile'))
|
||||
if not formats:
|
||||
asset_id = video_data.get('assetid') or program.get('assetid')
|
||||
if asset_id:
|
||||
process_asset_files(try_get(self._call_api(
|
||||
app_token, 'asset_file', display_id, {
|
||||
'assetid': asset_id,
|
||||
}, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
|
||||
|
||||
m3u8_url = dict_get(video_data, [
|
||||
'm3u8iPadURL',
|
||||
'ipadM3u8Url',
|
||||
'm3u8AndroidURL',
|
||||
'm3u8iPhoneURL',
|
||||
'iphoneM3u8Url'])
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
|
||||
if smil_url:
|
||||
transform_source = None
|
||||
if ref_id == 'nhkworld':
|
||||
# TODO: figure out if this is something to be fixed in urljoin,
|
||||
# _parse_smil_formats or keep it here
|
||||
transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
|
||||
formats.extend(self._extract_smil_formats(
|
||||
re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
|
||||
transform_source=transform_source, fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
||||
@@ -31,7 +31,12 @@ class PornHubBaseIE(InfoExtractor):
|
||||
def dl(*args, **kwargs):
|
||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
ret = dl(*args, **kwargs)
|
||||
|
||||
if not ret:
|
||||
return ret
|
||||
|
||||
webpage, urlh = ret
|
||||
|
||||
if any(re.search(p, webpage) for p in (
|
||||
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||
@@ -53,7 +58,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[\da-z]+)
|
||||
@@ -152,6 +157,9 @@ class PornHubIE(PornHubBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
||||
'only_matching': True,
|
||||
@@ -160,7 +168,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
|
||||
webpage)
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
@@ -280,14 +288,24 @@ class PornHubIE(PornHubBaseIE):
|
||||
video_urls.append((v_url, None))
|
||||
video_urls_set.add(v_url)
|
||||
|
||||
def parse_quality_items(quality_items):
|
||||
q_items = self._parse_json(quality_items, video_id, fatal=False)
|
||||
if not isinstance(q_items, list):
|
||||
return
|
||||
for item in q_items:
|
||||
if isinstance(item, dict):
|
||||
add_video_url(item.get('url'))
|
||||
|
||||
if not video_urls:
|
||||
FORMAT_PREFIXES = ('media', 'quality')
|
||||
FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
|
||||
js_vars = extract_js_vars(
|
||||
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
|
||||
default=None)
|
||||
if js_vars:
|
||||
for key, format_url in js_vars.items():
|
||||
if any(key.startswith(p) for p in FORMAT_PREFIXES):
|
||||
if key.startswith(FORMAT_PREFIXES[-1]):
|
||||
parse_quality_items(format_url)
|
||||
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
|
||||
add_video_url(format_url)
|
||||
if not video_urls and re.search(
|
||||
r'<[^>]+\bid=["\']lockedPlayer', webpage):
|
||||
@@ -343,12 +361,16 @@ class PornHubIE(PornHubBaseIE):
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', default=None)
|
||||
|
||||
def extract_vote_count(kind, name):
|
||||
return self._extract_count(
|
||||
(r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
|
||||
r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
|
||||
webpage, name)
|
||||
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
||||
like_count = self._extract_count(
|
||||
r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(
|
||||
r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
|
||||
like_count = extract_vote_count('Up', 'like')
|
||||
dislike_count = extract_vote_count('Down', 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
@@ -422,7 +444,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
|
||||
|
||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||
'playlist_mincount': 118,
|
||||
@@ -490,7 +512,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||
'only_matching': True,
|
||||
@@ -605,7 +627,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||
'info_dict': {
|
||||
|
||||
@@ -7,6 +7,8 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -55,10 +57,12 @@ class RedditRIE(InfoExtractor):
|
||||
'id': 'zv89llsvexdz',
|
||||
'ext': 'mp4',
|
||||
'title': 'That small heart attack.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:4',
|
||||
'timestamp': 1501941939,
|
||||
'upload_date': '20170805',
|
||||
'uploader': 'Antw87',
|
||||
'duration': 12,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
@@ -116,13 +120,40 @@ class RedditRIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
thumbnails = []
|
||||
|
||||
def add_thumbnail(src):
|
||||
if not isinstance(src, dict):
|
||||
return
|
||||
thumbnail_url = url_or_none(src.get('url'))
|
||||
if not thumbnail_url:
|
||||
return
|
||||
thumbnails.append({
|
||||
'url': unescapeHTML(thumbnail_url),
|
||||
'width': int_or_none(src.get('width')),
|
||||
'height': int_or_none(src.get('height')),
|
||||
})
|
||||
|
||||
for image in try_get(data, lambda x: x['preview']['images']) or []:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
add_thumbnail(image.get('source'))
|
||||
resolutions = image.get('resolutions')
|
||||
if isinstance(resolutions, list):
|
||||
for resolution in resolutions:
|
||||
add_thumbnail(resolution)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'title': data.get('title'),
|
||||
'thumbnail': url_or_none(data.get('thumbnail')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': float_or_none(data.get('created_utc')),
|
||||
'uploader': data.get('author'),
|
||||
'duration': int_or_none(try_get(
|
||||
data,
|
||||
(lambda x: x['media']['reddit_video']['duration'],
|
||||
lambda x: x['secure_media']['reddit_video']['duration']))),
|
||||
'like_count': int_or_none(data.get('ups')),
|
||||
'dislike_count': int_or_none(data.get('downs')),
|
||||
'comment_count': int_or_none(data.get('num_comments')),
|
||||
|
||||
@@ -6,14 +6,24 @@ from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class RuutuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla)/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/|
|
||||
static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ruutu.fi/video/2058907',
|
||||
@@ -71,15 +81,53 @@ class RuutuIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 502: Bad Gateway'],
|
||||
}
|
||||
'expected_warnings': [
|
||||
'HTTP Error 502: Bad Gateway',
|
||||
'Failed to download m3u8 information',
|
||||
],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.supla.fi/audio/2231370',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# episode
|
||||
'url': 'https://www.ruutu.fi/video/3401964',
|
||||
'info_dict': {
|
||||
'id': '3401964',
|
||||
'ext': 'mp4',
|
||||
'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17',
|
||||
'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2582,
|
||||
'age_limit': 12,
|
||||
'upload_date': '20190508',
|
||||
'series': 'Temptation Island Suomi',
|
||||
'season_number': 5,
|
||||
'episode_number': 17,
|
||||
'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# premium
|
||||
'url': 'https://www.ruutu.fi/video/3618715',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_API_BASE = 'https://gatling.nelonenmedia.fi'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
|
||||
'%s/media-xml-cache' % self._API_BASE, video_id,
|
||||
query={'id': video_id})
|
||||
|
||||
formats = []
|
||||
@@ -96,9 +144,18 @@ class RuutuIE(InfoExtractor):
|
||||
continue
|
||||
processed_urls.append(video_url)
|
||||
ext = determine_ext(video_url)
|
||||
auth_video_url = url_or_none(self._download_webpage(
|
||||
'%s/auth/access/v2' % self._API_BASE, video_id,
|
||||
note='Downloading authenticated %s stream URL' % ext,
|
||||
fatal=False, query={'stream': video_url}))
|
||||
if auth_video_url:
|
||||
processed_urls.append(auth_video_url)
|
||||
video_url = auth_video_url
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
@@ -136,18 +193,35 @@ class RuutuIE(InfoExtractor):
|
||||
|
||||
extract_formats(video_xml.find('./Clip'))
|
||||
|
||||
drm = xpath_text(video_xml, './Clip/DRM', default=None)
|
||||
if not formats and drm:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
def pv(name):
|
||||
node = find_xpath_attr(
|
||||
video_xml, './Clip/PassthroughVariables/variable', 'name', name)
|
||||
if node is not None:
|
||||
return node.get('value')
|
||||
|
||||
if not formats:
|
||||
drm = xpath_text(video_xml, './Clip/DRM', default=None)
|
||||
if drm:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
ns_st_cds = pv('ns_st_cds')
|
||||
if ns_st_cds != 'free':
|
||||
raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
themes = pv('themes')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True),
|
||||
'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'),
|
||||
'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'),
|
||||
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
|
||||
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')),
|
||||
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
|
||||
'upload_date': unified_strdate(pv('date_start')),
|
||||
'series': pv('series_name'),
|
||||
'season_number': int_or_none(pv('season_number')),
|
||||
'episode_number': int_or_none(pv('episode_number')),
|
||||
'categories': themes.split(',') if themes else [],
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -4,8 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -41,16 +45,22 @@ class SevenPlusIE(BrightcoveNewIE):
|
||||
def _real_extract(self, url):
|
||||
path, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
try:
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read().decode(), episode_id)[0]['error_code'], expected=True)
|
||||
raise
|
||||
|
||||
for source in media.get('sources', {}):
|
||||
src = source.get('src')
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
@@ -11,38 +13,61 @@ from ..utils import (
|
||||
|
||||
|
||||
class SkyBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)',
|
||||
webpage, 'video data'))
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
_SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)'
|
||||
|
||||
video_url = 'ooyala:%s' % video_data['data-video-id']
|
||||
if video_data.get('data-token-required') == 'true':
|
||||
token_fetch_options = self._parse_json(video_data.get(
|
||||
'data-token-fetch-options', '{}'), video_id, fatal=False) or {}
|
||||
token_fetch_url = token_fetch_options.get('url')
|
||||
if token_fetch_url:
|
||||
embed_token = self._download_webpage(urljoin(
|
||||
url, token_fetch_url), video_id, fatal=False)
|
||||
if embed_token:
|
||||
video_url = smuggle_url(
|
||||
video_url, {'embed_token': embed_token.strip('"')})
|
||||
def _process_ooyala_element(self, webpage, sdc_el, url):
|
||||
sdc = extract_attributes(sdc_el)
|
||||
provider = sdc.get('data-provider')
|
||||
if provider == 'ooyala':
|
||||
video_id = sdc['data-sdc-video-id']
|
||||
video_url = 'ooyala:%s' % video_id
|
||||
ie_key = 'Ooyala'
|
||||
ooyala_el = self._search_regex(
|
||||
r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id,
|
||||
webpage, 'video data', fatal=False)
|
||||
if ooyala_el:
|
||||
ooyala_attrs = extract_attributes(ooyala_el) or {}
|
||||
if ooyala_attrs.get('data-token-required') == 'true':
|
||||
token_fetch_url = (self._parse_json(ooyala_attrs.get(
|
||||
'data-token-fetch-options', '{}'),
|
||||
video_id, fatal=False) or {}).get('url')
|
||||
if token_fetch_url:
|
||||
embed_token = self._download_json(urljoin(
|
||||
url, token_fetch_url), video_id, fatal=False)
|
||||
if embed_token:
|
||||
video_url = smuggle_url(
|
||||
video_url, {'embed_token': embed_token})
|
||||
elif provider == 'brightcove':
|
||||
video_id = sdc['data-video-id']
|
||||
account_id = sdc.get('data-account-id') or '6058004172001'
|
||||
player_id = sdc.get('data-player-id') or 'RC9PQUaJ6'
|
||||
video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id)
|
||||
ie_key = 'BrightcoveNew'
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ie_key': ie_key,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._process_ooyala_element(webpage, self._search_regex(
|
||||
self._SDC_EL_REGEX, webpage, 'sdc element'), url)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': strip_or_none(self._og_search_description(webpage)),
|
||||
'ie_key': 'Ooyala',
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class SkySportsIE(SkyBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
IE_NAME = 'sky:sports'
|
||||
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
|
||||
'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
|
||||
'info_dict': {
|
||||
@@ -52,19 +77,55 @@ class SkySportsIE(SkyBaseIE):
|
||||
'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.skysports.com/watch/video/sports/f1/12160544/abu-dhabi-gp-the-notebook',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.skysports.com/watch/video/tv-shows/12118508/rainford-brent-how-ace-programme-helps',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SkyNewsIE(SkyBaseIE):
|
||||
IE_NAME = 'sky:news'
|
||||
_VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
|
||||
'md5': 'd6327e581473cea9976a3236ded370cd',
|
||||
'md5': '411e8893fd216c75eaf7e4c65d364115',
|
||||
'info_dict': {
|
||||
'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
|
||||
'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Russian plane inspected after deadly fire',
|
||||
'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
|
||||
'uploader_id': '6058004172001',
|
||||
'timestamp': 1567112345,
|
||||
'upload_date': '20190829',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}
|
||||
|
||||
|
||||
class SkySportsNewsIE(SkyBaseIE):
|
||||
IE_NAME = 'sky:sports:news'
|
||||
_VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass',
|
||||
'info_dict': {
|
||||
'id': '10871916',
|
||||
'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass',
|
||||
'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
|
||||
entries = []
|
||||
for sdc_el in re.findall(self._SDC_EL_REGEX, webpage):
|
||||
entries.append(self._process_ooyala_element(webpage, sdc_el, url))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, article_id, self._og_search_title(webpage),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
||||
|
||||
@@ -2,7 +2,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SlidesLiveIE(InfoExtractor):
|
||||
@@ -18,8 +23,21 @@ class SlidesLiveIE(InfoExtractor):
|
||||
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
|
||||
'uploader': 'SlidesLive Videos - A',
|
||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'timestamp': 1597615266,
|
||||
'upload_date': '20170925',
|
||||
}
|
||||
}, {
|
||||
# video_service_name = yoda
|
||||
'url': 'https://slideslive.com/38935785',
|
||||
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
|
||||
'info_dict': {
|
||||
'id': 'RMraDYN5ozA_',
|
||||
'ext': 'mp4',
|
||||
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
# video_service_name = youtube
|
||||
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
|
||||
@@ -39,18 +57,48 @@ class SlidesLiveIE(InfoExtractor):
|
||||
video_data = self._download_json(
|
||||
'https://ben.slideslive.com/player/' + video_id, video_id)
|
||||
service_name = video_data['video_service_name'].lower()
|
||||
assert service_name in ('url', 'vimeo', 'youtube')
|
||||
assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
|
||||
service_id = video_data['video_service_id']
|
||||
subtitles = {}
|
||||
for sub in try_get(video_data, lambda x: x['subtitles'], list) or []:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
webvtt_url = url_or_none(sub.get('webvtt_url'))
|
||||
if not webvtt_url:
|
||||
continue
|
||||
lang = sub.get('language') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': webvtt_url,
|
||||
})
|
||||
info = {
|
||||
'id': video_id,
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'url': service_id,
|
||||
'is_live': bool_or_none(video_data.get('is_live')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
if service_name == 'url':
|
||||
if service_name in ('url', 'yoda'):
|
||||
info['title'] = video_data['title']
|
||||
if service_name == 'url':
|
||||
info['url'] = service_id
|
||||
else:
|
||||
formats = []
|
||||
_MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
_MANIFEST_PATTERN % (service_id, 'm3u8'),
|
||||
service_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
|
||||
mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': service_id,
|
||||
'formats': formats,
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': service_id,
|
||||
'ie_key': service_name.capitalize(),
|
||||
'title': video_data.get('title'),
|
||||
})
|
||||
|
||||
@@ -1,416 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class SmotriIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com'
|
||||
IE_NAME = 'smotri'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
_TESTS = [
|
||||
# real video id 2610366
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
||||
'md5': '02c0dfab2102984e9c5bb585cc7cc321',
|
||||
'info_dict': {
|
||||
'id': 'v261036632ab',
|
||||
'ext': 'mp4',
|
||||
'title': 'катастрофа с камер видеонаблюдения',
|
||||
'uploader': 'rbc2008',
|
||||
'uploader_id': 'rbc08',
|
||||
'upload_date': '20131118',
|
||||
'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
|
||||
},
|
||||
},
|
||||
# real video id 57591
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v57591cb20',
|
||||
'md5': '830266dfc21f077eac5afd1883091bcd',
|
||||
'info_dict': {
|
||||
'id': 'v57591cb20',
|
||||
'ext': 'flv',
|
||||
'title': 'test',
|
||||
'uploader': 'Support Photofile@photofile',
|
||||
'uploader_id': 'support-photofile',
|
||||
'upload_date': '20070704',
|
||||
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
|
||||
},
|
||||
},
|
||||
# video-password, not approved by moderator
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
|
||||
'md5': 'f6331cef33cad65a0815ee482a54440b',
|
||||
'info_dict': {
|
||||
'id': 'v1390466a13c',
|
||||
'ext': 'mp4',
|
||||
'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
'uploader': 'timoxa40',
|
||||
'uploader_id': 'timoxa40',
|
||||
'upload_date': '20100404',
|
||||
'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': 'qwerty',
|
||||
},
|
||||
'skip': 'Video is not approved by moderator',
|
||||
},
|
||||
# video-password
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v6984858774#',
|
||||
'md5': 'f11e01d13ac676370fc3b95b9bda11b0',
|
||||
'info_dict': {
|
||||
'id': 'v6984858774',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дача Солженицина ПАРОЛЬ 223322',
|
||||
'uploader': 'psavari1',
|
||||
'uploader_id': 'psavari1',
|
||||
'upload_date': '20081103',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '223322',
|
||||
},
|
||||
},
|
||||
# age limit + video-password, not approved by moderator
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
|
||||
'md5': '91e909c9f0521adf5ee86fbe073aad70',
|
||||
'info_dict': {
|
||||
'id': 'v15408898bcf',
|
||||
'ext': 'flv',
|
||||
'title': 'этот ролик не покажут по ТВ',
|
||||
'uploader': 'zzxxx',
|
||||
'uploader_id': 'ueggb',
|
||||
'upload_date': '20101001',
|
||||
'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '333'
|
||||
},
|
||||
'skip': 'Video is not approved by moderator',
|
||||
},
|
||||
# age limit + video-password
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v7780025814',
|
||||
'md5': 'b4599b068422559374a59300c5337d72',
|
||||
'info_dict': {
|
||||
'id': 'v7780025814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sexy Beach (пароль 123)',
|
||||
'uploader': 'вАся',
|
||||
'uploader_id': 'asya_prosto',
|
||||
'upload_date': '20081218',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '123'
|
||||
},
|
||||
},
|
||||
# swf player
|
||||
{
|
||||
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
|
||||
'md5': '31099eeb4bc906712c5f40092045108d',
|
||||
'info_dict': {
|
||||
'id': 'v9188090500',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shakira - Don\'t Bother',
|
||||
'uploader': 'HannahL',
|
||||
'uploader_id': 'lisaha95',
|
||||
'upload_date': '20090331',
|
||||
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
mobj = re.search(
|
||||
r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
|
||||
<div\s+class="video_image">[^<]+</div>\s*
|
||||
<div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
|
||||
if mobj is not None:
|
||||
return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
|
||||
|
||||
def _search_meta(self, name, html, display_name=None):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_meta(name, html, display_name)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_form = {
|
||||
'ticket': video_id,
|
||||
'video_url': '1',
|
||||
'frame_url': '1',
|
||||
'devid': 'LoadupFlashPlayer',
|
||||
'getvideoinfo': '1',
|
||||
}
|
||||
|
||||
video_password = self._downloader.params.get('videopassword')
|
||||
if video_password:
|
||||
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
|
||||
video = self._download_json(
|
||||
'http://smotri.com/video/view/url/bot/',
|
||||
video_id, 'Downloading video JSON',
|
||||
data=urlencode_postdata(video_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
|
||||
|
||||
if not video_url:
|
||||
if video.get('_moderate_no'):
|
||||
raise ExtractorError(
|
||||
'Video %s has not been approved by moderator' % video_id, expected=True)
|
||||
|
||||
if video.get('error'):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
if video.get('_pass_protected') == 1:
|
||||
msg = ('Invalid video password' if video_password
|
||||
else 'This video is protected by a password, use the --video-password option')
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
title = video['title']
|
||||
thumbnail = video.get('_imgURL')
|
||||
upload_date = unified_strdate(video.get('added'))
|
||||
uploader = video.get('userNick')
|
||||
uploader_id = video.get('userLogin')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
# Video JSON does not provide enough meta data
|
||||
# We will extract some from the video web page instead
|
||||
webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
|
||||
|
||||
# Warning if video is unavailable
|
||||
warning = self._html_search_regex(
|
||||
r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
|
||||
'warning message', default=None)
|
||||
if warning is not None:
|
||||
self._downloader.report_warning(
|
||||
'Video %s may not be available; smotri said: %s ' %
|
||||
(video_id, warning))
|
||||
|
||||
# Adult content
|
||||
if 'EroConfirmText">' in webpage:
|
||||
self.report_age_confirmation()
|
||||
confirm_string = self._html_search_regex(
|
||||
r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
|
||||
webpage, 'confirm string')
|
||||
confirm_url = webpage_url + '&confirm=%s' % confirm_string
|
||||
webpage = self._download_webpage(
|
||||
confirm_url, video_id,
|
||||
'Downloading video page (age confirmed)')
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
|
||||
webpage, 'view count', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': int_or_none(view_count),
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
}
|
||||
|
||||
|
||||
class SmotriCommunityIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com community videos'
|
||||
IE_NAME = 'smotri:community'
|
||||
_VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://smotri.com/community/video/kommuna',
|
||||
'info_dict': {
|
||||
'id': 'kommuna',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
community_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(
|
||||
'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
|
||||
community_id, 'Downloading community RSS')
|
||||
|
||||
entries = [
|
||||
self.url_result(video_url.text, SmotriIE.ie_key())
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
return self.playlist_result(entries, community_id)
|
||||
|
||||
|
||||
class SmotriUserIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com user videos'
|
||||
IE_NAME = 'smotri:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://smotri.com/user/inspector',
|
||||
'info_dict': {
|
||||
'id': 'inspector',
|
||||
'title': 'Inspector',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(
|
||||
'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
|
||||
user_id, 'Downloading user RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = xpath_text(rss, './channel/description') or ''
|
||||
user_nickname = self._search_regex(
|
||||
'^Видео режиссера (.+)$', description_text,
|
||||
'user nickname', fatal=False)
|
||||
|
||||
return self.playlist_result(entries, user_id, user_nickname)
|
||||
|
||||
|
||||
class SmotriBroadcastIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com broadcasts'
|
||||
IE_NAME = 'smotri:broadcast'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
broadcast_id = mobj.group('id')
|
||||
|
||||
broadcast_url = 'http://' + mobj.group('url')
|
||||
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
|
||||
|
||||
if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
|
||||
raise ExtractorError(
|
||||
'Broadcast %s does not exist' % broadcast_id, expected=True)
|
||||
|
||||
# Adult content
|
||||
if re.search('EroConfirmText">', broadcast_page) is not None:
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required(
|
||||
'Erotic broadcasts allowed only for registered users')
|
||||
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
'confirm_erotic': '1',
|
||||
'login': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = sanitized_Request(
|
||||
broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
broadcast_page = self._download_webpage(
|
||||
request, broadcast_id, 'Logging in and confirming age')
|
||||
|
||||
if '>Неверный логин или пароль<' in broadcast_page:
|
||||
raise ExtractorError(
|
||||
'Unable to log in: bad username or password', expected=True)
|
||||
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
ticket = self._html_search_regex(
|
||||
(r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1',
|
||||
r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"),
|
||||
broadcast_page, 'broadcast ticket', group='ticket')
|
||||
|
||||
broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
|
||||
|
||||
broadcast_password = self._downloader.params.get('videopassword')
|
||||
if broadcast_password:
|
||||
broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
|
||||
|
||||
broadcast_json_page = self._download_webpage(
|
||||
broadcast_url, broadcast_id, 'Downloading broadcast JSON')
|
||||
|
||||
try:
|
||||
broadcast_json = json.loads(broadcast_json_page)
|
||||
|
||||
protected_broadcast = broadcast_json['_pass_protected'] == 1
|
||||
if protected_broadcast and not broadcast_password:
|
||||
raise ExtractorError(
|
||||
'This broadcast is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
|
||||
broadcast_offline = broadcast_json['is_play'] == 0
|
||||
if broadcast_offline:
|
||||
raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
|
||||
|
||||
rtmp_url = broadcast_json['_server']
|
||||
mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
|
||||
if not mobj:
|
||||
raise ExtractorError('Unexpected broadcast rtmp URL')
|
||||
|
||||
broadcast_playpath = broadcast_json['_streamName']
|
||||
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
|
||||
broadcast_thumbnail = broadcast_json.get('_imgURL')
|
||||
broadcast_title = self._live_title(broadcast_json['title'])
|
||||
broadcast_description = broadcast_json.get('description')
|
||||
broadcaster_nick = broadcast_json.get('nick')
|
||||
broadcaster_login = broadcast_json.get('login')
|
||||
rtmp_conn = 'S:%s' % uuid.uuid4().hex
|
||||
except KeyError:
|
||||
if protected_broadcast:
|
||||
raise ExtractorError('Bad broadcast password', expected=True)
|
||||
raise ExtractorError('Unexpected broadcast JSON')
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
'url': rtmp_url,
|
||||
'title': broadcast_title,
|
||||
'thumbnail': broadcast_thumbnail,
|
||||
'description': broadcast_description,
|
||||
'uploader': broadcaster_nick,
|
||||
'uploader_id': broadcaster_login,
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
'ext': 'flv',
|
||||
'play_path': broadcast_playpath,
|
||||
'player_url': 'http://pics.smotri.com/broadcast_play.swf',
|
||||
'app': broadcast_app,
|
||||
'rtmp_live': True,
|
||||
'rtmp_conn': rtmp_conn,
|
||||
'is_live': True,
|
||||
}
|
||||
@@ -1,40 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SonyLIVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
|
||||
'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
|
||||
'info_dict': {
|
||||
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
|
||||
'id': 'ref:5024612095001',
|
||||
'title': 'Bachelors Delight - Achaari Cheese Toast',
|
||||
'id': '1000022678',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170923',
|
||||
'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
|
||||
'uploader_id': '5182475815001',
|
||||
'timestamp': 1506200547,
|
||||
'upload_date': '20200411',
|
||||
'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
|
||||
'timestamp': 1586632091,
|
||||
'duration': 185,
|
||||
'season_number': 1,
|
||||
'episode': 'Achaari Cheese Toast',
|
||||
'episode_number': 1,
|
||||
'release_year': 2016,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
|
||||
'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_TOKEN = None
|
||||
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
|
||||
def _call_api(self, version, path, video_id):
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['security_token'] = self._TOKEN
|
||||
try:
|
||||
return self._download_json(
|
||||
'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
|
||||
video_id, headers=headers)['resultObj']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
message = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['message']
|
||||
if message == 'Geoblocked Country':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(message)
|
||||
raise
|
||||
|
||||
def _real_initialize(self):
|
||||
self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
|
||||
'geo_countries': ['IN'],
|
||||
'referrer': url,
|
||||
}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
video_id = self._match_id(url)
|
||||
content = self._call_api(
|
||||
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
|
||||
if content.get('isEncrypted'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
dash_url = content['videoURL']
|
||||
headers = {
|
||||
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
|
||||
}
|
||||
formats = self._extract_mpd_formats(
|
||||
dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
|
||||
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._call_api(
|
||||
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
|
||||
title = metadata['title']
|
||||
episode = metadata.get('episodeTitle')
|
||||
if episode and title != episode:
|
||||
title += ' - ' + episode
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('posterURL'),
|
||||
'description': metadata.get('longDescription') or metadata.get('shortDescription'),
|
||||
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
'season_number': int_or_none(metadata.get('season')),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(metadata.get('episodeNumber')),
|
||||
'release_year': int_or_none(metadata.get('year')),
|
||||
}
|
||||
|
||||
@@ -7,17 +7,24 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
merge_dicts,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[^/]+\.)?spankbang\.com/
|
||||
(?:
|
||||
(?P<id>[\da-z]+)/(?:video|play|embed)\b|
|
||||
[\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||
@@ -57,10 +64,14 @@ class SpankBangIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2y3td/embed/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('id_2')
|
||||
webpage = self._download_webpage(
|
||||
url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
|
||||
video_id, headers={'Cookie': 'country=US'})
|
||||
@@ -155,30 +166,33 @@ class SpankBangIE(InfoExtractor):
|
||||
|
||||
|
||||
class SpankBangPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
|
||||
'info_dict': {
|
||||
'id': 'ug0k',
|
||||
'title': 'Big Ass Titties',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
'playlist_mincount': 40,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
|
||||
|
||||
entries = [self.url_result(
|
||||
'https://spankbang.com/%s/video' % video_id,
|
||||
ie=SpankBangIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
|
||||
urljoin(url, mobj.group('path')),
|
||||
ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1'
|
||||
% re.escape(display_id), webpage)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
|
||||
r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
||||
@@ -3,50 +3,62 @@ from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SproutIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'md5': '74bf14128578d1e040c3ebc82088f45f',
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
|
||||
'info_dict': {
|
||||
'id': '9dexnwtmh8_X',
|
||||
'id': 'bm0foJFaTKqb',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Cowboy Adventure',
|
||||
'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
|
||||
'timestamp': 1437758640,
|
||||
'upload_date': '20150724',
|
||||
'uploader': 'NBCU-SPROUT-NEW',
|
||||
}
|
||||
}
|
||||
'title': 'Robot Bike Race',
|
||||
'description': 'md5:436b1d97117cc437f54c383f4debc66d',
|
||||
'timestamp': 1606148940,
|
||||
'upload_date': '20201123',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.universalkids.com/watch/robot-bike-race',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_component = self._search_regex(
|
||||
r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
|
||||
webpage, 'video component', default=None)
|
||||
if video_component:
|
||||
options = self._parse_json(extract_attributes(
|
||||
video_component)['data-options'], video_id)
|
||||
theplatform_url = options['video']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if options.get('protected'):
|
||||
query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
theplatform_url, query), {'force_smil_url': True})
|
||||
else:
|
||||
iframe = self._search_regex(
|
||||
r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
|
||||
webpage, 'iframe')
|
||||
theplatform_url = extract_attributes(iframe)['src']
|
||||
|
||||
return self.url_result(theplatform_url, 'ThePlatform')
|
||||
display_id = self._match_id(url)
|
||||
mpx_metadata = self._download_json(
|
||||
# http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
|
||||
'https://www.universalkids.com/_api/videos/' + display_id,
|
||||
display_id)['mpxMetadata']
|
||||
media_pid = mpx_metadata['mediaPid']
|
||||
theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if mpx_metadata.get('entitlement') == 'auth':
|
||||
query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(
|
||||
update_url_query(theplatform_url, query), {
|
||||
'force_smil_url': True,
|
||||
'geo_countries': self._GEO_COUNTRIES,
|
||||
})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_pid,
|
||||
'url': theplatform_url,
|
||||
'series': mpx_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
|
||||
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
@@ -1,28 +1,74 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class StitcherIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/(?:[^/]+/)+e/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)'
|
||||
class StitcherBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
|
||||
|
||||
def _call_api(self, path, video_id, query):
|
||||
resp = self._download_json(
|
||||
'https://api.prod.stitcher.com/' + path,
|
||||
video_id, query=query)
|
||||
error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
|
||||
if error_massage:
|
||||
raise ExtractorError(error_massage, expected=True)
|
||||
return resp['data']
|
||||
|
||||
def _extract_description(self, data):
|
||||
return clean_html(data.get('html_description') or data.get('description'))
|
||||
|
||||
def _extract_audio_url(self, episode):
|
||||
return url_or_none(episode.get('audio_url') or episode.get('guid'))
|
||||
|
||||
def _extract_show_info(self, show):
|
||||
return {
|
||||
'thumbnail': show.get('image_base_url'),
|
||||
'series': show.get('title'),
|
||||
}
|
||||
|
||||
def _extract_episode(self, episode, audio_url, show_info):
|
||||
info = {
|
||||
'id': compat_str(episode['id']),
|
||||
'display_id': episode.get('slug'),
|
||||
'title': episode['title'].strip(),
|
||||
'description': self._extract_description(episode),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'url': clean_podcast_url(audio_url),
|
||||
'vcodec': 'none',
|
||||
'timestamp': int_or_none(episode.get('date_published')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
}
|
||||
info.update(show_info)
|
||||
return info
|
||||
|
||||
|
||||
class StitcherIE(StitcherBaseIE):
|
||||
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
|
||||
'md5': '391dd4e021e6edeb7b8e68fbf2e9e940',
|
||||
'md5': 'e9635098e0da10b21a0e2b85585530f6',
|
||||
'info_dict': {
|
||||
'id': '40789481',
|
||||
'ext': 'mp3',
|
||||
'title': 'Machine Learning Mastery and Cancer Clusters',
|
||||
'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3',
|
||||
'description': 'md5:547adb4081864be114ae3831b4c2b42f',
|
||||
'duration': 1604,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20151008',
|
||||
'timestamp': 1444285800,
|
||||
'series': 'Talking Machines',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
|
||||
@@ -38,6 +84,7 @@ class StitcherIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Page Not Found',
|
||||
}, {
|
||||
# escaped title
|
||||
'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
|
||||
@@ -45,37 +92,53 @@ class StitcherIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
audio_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or audio_id
|
||||
audio_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'shows/episodes', audio_id, {'episode_ids': audio_id})
|
||||
episode = data['episodes'][0]
|
||||
audio_url = self._extract_audio_url(episode)
|
||||
if not audio_url:
|
||||
self.raise_login_required()
|
||||
show = try_get(data, lambda x: x['shows'][0], dict) or {}
|
||||
return self._extract_episode(
|
||||
episode, audio_url, self._extract_show_info(show))
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
episode = self._parse_json(
|
||||
js_to_json(self._search_regex(
|
||||
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
|
||||
display_id)['config']['episode']
|
||||
class StitcherShowIE(StitcherBaseIE):
|
||||
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines',
|
||||
'info_dict': {
|
||||
'id': 'the-talking-machines',
|
||||
'title': 'Talking Machines',
|
||||
'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
|
||||
},
|
||||
'playlist_mincount': 106,
|
||||
}, {
|
||||
'url': 'https://www.stitcher.com/show/the-talking-machines',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
title = unescapeHTML(episode['title'])
|
||||
formats = [{
|
||||
'url': episode[episode_key],
|
||||
'ext': determine_ext(episode[episode_key]) or 'mp3',
|
||||
'vcodec': 'none',
|
||||
} for episode_key in ('episodeURL',) if episode.get(episode_key)]
|
||||
description = self._search_regex(
|
||||
r'Episode Info:\s*</span>([^<]+)<', webpage, 'description', fatal=False)
|
||||
duration = int_or_none(episode.get('duration'))
|
||||
thumbnail = episode.get('episodeImage')
|
||||
def _real_extract(self, url):
|
||||
show_slug = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000})
|
||||
show = try_get(data, lambda x: x['shows'][0], dict) or {}
|
||||
show_info = self._extract_show_info(show)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
entries = []
|
||||
for episode in (data.get('episodes') or []):
|
||||
audio_url = self._extract_audio_url(episode)
|
||||
if not audio_url:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, audio_url, show_info))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_slug, show.get('title'),
|
||||
self._extract_description(show))
|
||||
|
||||
@@ -2,25 +2,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class StreetVoiceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://streetvoice.com/skippylu/songs/94440/',
|
||||
'md5': '15974627fc01a29e492c98593c2fd472',
|
||||
'url': 'https://streetvoice.com/skippylu/songs/123688/',
|
||||
'md5': '0eb535970629a5195685355f3ed60bfd',
|
||||
'info_dict': {
|
||||
'id': '94440',
|
||||
'id': '123688',
|
||||
'ext': 'mp3',
|
||||
'title': '輸',
|
||||
'description': 'Crispy脆樂團 - 輸',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 260,
|
||||
'upload_date': '20091018',
|
||||
'title': '流浪',
|
||||
'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 270,
|
||||
'upload_date': '20100923',
|
||||
'uploader': 'Crispy脆樂團',
|
||||
'uploader_id': '627810',
|
||||
'uploader_url': 're:^https?://streetvoice.com/skippylu/',
|
||||
'timestamp': 1285261661,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'track': '流浪',
|
||||
'track_id': '123688',
|
||||
'album': '2010',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
|
||||
@@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
song = self._download_json(
|
||||
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
|
||||
|
||||
base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
|
||||
song = self._download_json(base_url, song_id, query={
|
||||
'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
|
||||
})
|
||||
title = song['name']
|
||||
author = song['user']['nickname']
|
||||
|
||||
formats = []
|
||||
for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
|
||||
f_url = (self._download_json(
|
||||
base_url + suffix + '/', song_id,
|
||||
'Downloading %s format URL' % format_id,
|
||||
data=b'', fatal=False) or {}).get('file')
|
||||
if not f_url:
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp3',
|
||||
'format_id': format_id,
|
||||
'url': f_url,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if format_id == 'hls':
|
||||
f['protocol'] = 'm3u8_native'
|
||||
abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
|
||||
if abr:
|
||||
abr = int(abr)
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'tbr': abr,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
user = song.get('user') or {}
|
||||
username = user.get('username')
|
||||
get_count = lambda x: int_or_none(song.get(x + '_count'))
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song['file'],
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': '%s - %s' % (author, title),
|
||||
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
|
||||
'duration': song.get('length'),
|
||||
'upload_date': unified_strdate(song.get('created_at')),
|
||||
'uploader': author,
|
||||
'uploader_id': compat_str(song['user']['id']),
|
||||
'description': strip_or_none(song.get('synopsis')),
|
||||
'thumbnail': song.get('image'),
|
||||
'duration': int_or_none(song.get('length')),
|
||||
'timestamp': parse_iso8601(song.get('created_at')),
|
||||
'uploader': try_get(user, lambda x: x['profile']['nickname']),
|
||||
'uploader_id': str_or_none(user.get('id')),
|
||||
'uploader_url': urljoin(url, '/%s/' % username) if username else None,
|
||||
'view_count': get_count('plays'),
|
||||
'like_count': get_count('likes'),
|
||||
'comment_count': get_count('comments'),
|
||||
'repost_count': get_count('share'),
|
||||
'track': title,
|
||||
'track_id': song_id,
|
||||
'album': try_get(song, lambda x: x['album']['name']),
|
||||
}
|
||||
|
||||
@@ -8,13 +8,17 @@ from ..utils import (
|
||||
compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class STVPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'stv:player'
|
||||
_VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# shortform
|
||||
'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
|
||||
'md5': '5adf9439c31d554f8be0707c7abe7e0a',
|
||||
'info_dict': {
|
||||
@@ -27,7 +31,11 @@ class STVPlayerIE(InfoExtractor):
|
||||
'uploader_id': '1486976045',
|
||||
},
|
||||
'skip': 'this resource is unavailable outside of the UK',
|
||||
}
|
||||
}, {
|
||||
# episodes
|
||||
'url': 'https://player.stv.tv/episode/4125/jennifer-saunders-memory-lane',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
|
||||
_PTYPE_MAP = {
|
||||
'episode': 'episodes',
|
||||
@@ -36,11 +44,31 @@ class STVPlayerIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
ptype, video_id = re.match(self._VALID_URL, url).groups()
|
||||
resp = self._download_json(
|
||||
'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id),
|
||||
video_id)
|
||||
|
||||
result = resp['results']
|
||||
webpage = self._download_webpage(url, video_id, fatal=False) or ''
|
||||
props = (self._parse_json(self._search_regex(
|
||||
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||
webpage, 'next data', default='{}'), video_id,
|
||||
fatal=False) or {}).get('props') or {}
|
||||
player_api_cache = try_get(
|
||||
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
|
||||
|
||||
api_path, resp = None, {}
|
||||
for k, v in player_api_cache.items():
|
||||
if k.startswith('/episodes/') or k.startswith('/shortform/'):
|
||||
api_path, resp = k, v
|
||||
break
|
||||
else:
|
||||
episode_id = str_or_none(try_get(
|
||||
props, lambda x: x['pageProps']['episodeId']))
|
||||
api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id)
|
||||
|
||||
result = resp.get('results')
|
||||
if not result:
|
||||
resp = self._download_json(
|
||||
'https://player.api.stv.tv/v1' + api_path, video_id)
|
||||
result = resp['results']
|
||||
|
||||
video = result['video']
|
||||
video_id = compat_str(video['id'])
|
||||
|
||||
@@ -57,7 +85,7 @@ class STVPlayerIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id,
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['GB']}),
|
||||
'description': result.get('summary'),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'subtitles': subtitles,
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class TastyTradeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
'title': 'A History of Teaming',
|
||||
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
|
||||
'duration': 422.255,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala code', group='code')
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': OoyalaIE.ie_key(),
|
||||
'url': 'ooyala:%s' % ooyala_code,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info
|
||||
@@ -140,7 +140,7 @@ class TeachableIE(TeachableBaseIE):
|
||||
@staticmethod
|
||||
def _is_teachable(webpage):
|
||||
return 'teachableTracker.linker:autoLink' in webpage and re.search(
|
||||
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
|
||||
r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
|
||||
webpage)
|
||||
|
||||
@staticmethod
|
||||
@@ -269,7 +269,7 @@ class TeachableCourseIE(TeachableBaseIE):
|
||||
r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
|
||||
webpage):
|
||||
li = mobj.group('li')
|
||||
if 'fa-youtube-play' not in li:
|
||||
if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li):
|
||||
continue
|
||||
lecture_url = self._search_regex(
|
||||
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
|
||||
|
||||
@@ -5,14 +5,11 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -28,7 +25,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
'md5': '7ee56d665cfd241c0e6d80fd175068b0',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
@@ -38,7 +35,7 @@ class TelecincoIE(InfoExtractor):
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
|
||||
'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
|
||||
'info_dict': {
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
@@ -48,7 +45,7 @@ class TelecincoIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
|
||||
'md5': 'eddb50291df704ce23c74821b995bcac',
|
||||
'info_dict': {
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
@@ -90,58 +87,24 @@ class TelecincoIE(InfoExtractor):
|
||||
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
if content.get('dataCmsId') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
|
||||
config_url = urljoin(url, content['dataConfig'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
content['dataConfig'], video_id, 'Downloading config JSON')
|
||||
title = config['info']['title']
|
||||
|
||||
def mmc_url(mmc_type):
|
||||
return re.sub(
|
||||
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
|
||||
config['services']['mmc'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for mmc_type in ('flash', 'html5'):
|
||||
mmc = self._download_json(
|
||||
mmc_url(mmc_type), video_id,
|
||||
'Downloading %s mmc JSON' % mmc_type, fatal=False)
|
||||
if not mmc:
|
||||
continue
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
}, fatal=False) or {}
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
services = config['services']
|
||||
caronte = self._download_json(services['caronte'], video_id)
|
||||
stream = caronte['dls'][0]['stream']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json;charset=UTF-8',
|
||||
'Origin': re.match(r'https?://[^/]+', url).group(0),
|
||||
})
|
||||
cdn = self._download_json(
|
||||
caronte['cerbero'], video_id, data=json.dumps({
|
||||
'bbx': caronte['bbx'],
|
||||
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
|
||||
}).encode(), headers=headers)['tokens']['1']['cdn']
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -149,7 +112,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
'duration': int_or_none(content.get('dataDuration')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -12,25 +12,16 @@ from ..utils import (
|
||||
|
||||
|
||||
class TeleQuebecBaseIE(InfoExtractor):
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
@staticmethod
|
||||
def _result(url, ie_key):
|
||||
def _brightcove_result(brightcove_id, player_id, account_id='6150020952001'):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(url, {'geo_countries': ['CA']}),
|
||||
'ie_key': ie_key,
|
||||
'url': smuggle_url(TeleQuebecBaseIE.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, brightcove_id), {'geo_countries': ['CA']}),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _limelight_result(media_id):
|
||||
return TeleQuebecBaseIE._result(
|
||||
'limelight:media:' + media_id, 'LimelightMedia')
|
||||
|
||||
@staticmethod
|
||||
def _brightcove_result(brightcove_id):
|
||||
return TeleQuebecBaseIE._result(
|
||||
'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s'
|
||||
% brightcove_id, 'BrightcoveNew')
|
||||
|
||||
|
||||
class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
@@ -44,14 +35,18 @@ class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
# available till 01.01.2023
|
||||
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
|
||||
'info_dict': {
|
||||
'id': '577116881b4b439084e6b1cf4ef8b1b3',
|
||||
'id': '6155972771001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Un petit choc et puis repart!',
|
||||
'description': 'md5:067bc84bd6afecad85e69d1000730907',
|
||||
'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
|
||||
'timestamp': 1589262469,
|
||||
'uploader_id': '6150020952001',
|
||||
'upload_date': '20200512',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
|
||||
'info_dict': {
|
||||
@@ -65,7 +60,6 @@ class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
@@ -79,25 +73,20 @@ class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
|
||||
media_data = self._download_json(
|
||||
'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
|
||||
media = self._download_json(
|
||||
'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id,
|
||||
media_id)['media']
|
||||
|
||||
source_id = media_data['streamInfo']['sourceId']
|
||||
source = (try_get(
|
||||
media_data, lambda x: x['streamInfo']['source'],
|
||||
compat_str) or 'limelight').lower()
|
||||
if source == 'brightcove':
|
||||
info = self._brightcove_result(source_id)
|
||||
else:
|
||||
info = self._limelight_result(source_id)
|
||||
source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove')
|
||||
info = self._brightcove_result(source_id, '22gPKdt7f')
|
||||
product = media.get('product') or {}
|
||||
season = product.get('season') or {}
|
||||
info.update({
|
||||
'title': media_data.get('title'),
|
||||
'description': try_get(
|
||||
media_data, lambda x: x['descriptions'][0]['text'], compat_str),
|
||||
'duration': int_or_none(
|
||||
media_data.get('durationInMilliseconds'), 1000),
|
||||
'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str),
|
||||
'series': try_get(season, lambda x: x['serie']['titre']),
|
||||
'season': season.get('name'),
|
||||
'season_number': int_or_none(season.get('seasonNo')),
|
||||
'episode': product.get('titre'),
|
||||
'episode_number': int_or_none(product.get('episodeNo')),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -148,7 +137,7 @@ class TeleQuebecSquatIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class TeleQuebecEmissionIE(TeleQuebecBaseIE):
|
||||
class TeleQuebecEmissionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
@@ -160,15 +149,16 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
|
||||
_TESTS = [{
|
||||
'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
|
||||
'info_dict': {
|
||||
'id': '66648a6aef914fe3badda25e81a4d50a',
|
||||
'id': '6154476028001',
|
||||
'ext': 'mp4',
|
||||
'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
|
||||
'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
|
||||
'upload_date': '20171024',
|
||||
'timestamp': 1508862118,
|
||||
'title': 'Des soins esthétiques à 377 % d’intérêts annuels, ça vous tente?',
|
||||
'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f',
|
||||
'upload_date': '20200505',
|
||||
'timestamp': 1588713424,
|
||||
'uploader_id': '6150020952001',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
|
||||
@@ -187,26 +177,26 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_id = self._search_regex(
|
||||
r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
|
||||
'limelight id')
|
||||
r'mediaId\s*:\s*(?P<id>\d+)', webpage, 'media id')
|
||||
|
||||
info = self._limelight_result(media_id)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
})
|
||||
return info
|
||||
return self.url_result(
|
||||
'http://zonevideo.telequebec.tv/media/' + media_id,
|
||||
TeleQuebecIE.ie_key())
|
||||
|
||||
|
||||
class TeleQuebecLiveIE(InfoExtractor):
|
||||
class TeleQuebecLiveIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
|
||||
_TEST = {
|
||||
'url': 'http://zonevideo.telequebec.tv/endirect/',
|
||||
'info_dict': {
|
||||
'id': 'endirect',
|
||||
'id': '6159095684001',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
'description': 'Canal principal de Télé-Québec',
|
||||
'uploader_id': '6150020952001',
|
||||
'timestamp': 1590439901,
|
||||
'upload_date': '20200525',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -214,25 +204,49 @@ class TeleQuebecLiveIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._brightcove_result('6159095684001', 'skCsmi2Uw')
|
||||
|
||||
m3u8_url = None
|
||||
webpage = self._download_webpage(
|
||||
'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
|
||||
fatal=False)
|
||||
if webpage:
|
||||
m3u8_url = self._search_regex(
|
||||
r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'm3u8 url', default=None, group='url')
|
||||
if not m3u8_url:
|
||||
m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title('Télé-Québec - En direct'),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
class TeleQuebecVideoIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.telequebec.tv/player/31110/stream',
|
||||
'info_dict': {
|
||||
'id': '6202570652001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée',
|
||||
'description': 'md5:685a7e4c450ba777c60adb6e71e41526',
|
||||
'upload_date': '20201019',
|
||||
'timestamp': 1603115930,
|
||||
'uploader_id': '6101674910001',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.telequebec.tv/player-live/28527',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
return self._download_json(
|
||||
'http://beacon.playback.api.brightcove.com/telequebec/api/assets/' + path,
|
||||
video_id, query={'device_layout': 'web', 'device_type': 'web'})['data']
|
||||
|
||||
def _real_extract(self, url):
|
||||
asset_id = self._match_id(url)
|
||||
asset = self._call_api(asset_id, asset_id)['asset']
|
||||
stream = self._call_api(
|
||||
asset_id + '/streams/' + asset['streams'][0]['id'], asset_id)['stream']
|
||||
stream_url = stream['url']
|
||||
account_id = try_get(
|
||||
stream, lambda x: x['video_provider_details']['account_id']) or '6101674910001'
|
||||
info = self._brightcove_result(stream_url, 'default', account_id)
|
||||
info.update({
|
||||
'description': asset.get('long_description') or asset.get('short_description'),
|
||||
'series': asset.get('series_original_name'),
|
||||
'season_number': int_or_none(asset.get('season_number')),
|
||||
'episode': asset.get('original_name'),
|
||||
'episode_number': int_or_none(asset.get('episode_number')),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
# smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,14 +25,16 @@ class TenPlayIE(InfoExtractor):
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# 'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
|
||||
_GEO_BYPASS = False
|
||||
_FASTLY_URL_TEMPL = 'https://10-selector.global.ssl.fastly.net/s/kYEXFC/media/%s?mbr=true&manifest=m3u&format=redirect'
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
@@ -40,19 +43,28 @@ class TenPlayIE(InfoExtractor):
|
||||
video = data.get('video') or {}
|
||||
metadata = data.get('metaData') or {}
|
||||
brightcove_id = video.get('videoId') or metadata['showContentVideoId']
|
||||
brightcove_url = smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['AU']})
|
||||
# brightcove_url = smuggle_url(
|
||||
# self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
# {'geo_countries': ['AU']})
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
self._FASTLY_URL_TEMPL % brightcove_id), brightcove_id).geturl()
|
||||
if '10play-not-in-oz' in m3u8_url:
|
||||
self.raise_geo_restricted(countries=['AU'])
|
||||
formats = self._extract_m3u8_formats(m3u8_url, brightcove_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': brightcove_url,
|
||||
'id': content_id,
|
||||
'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'),
|
||||
# '_type': 'url_transparent',
|
||||
# 'url': brightcove_url,
|
||||
'formats': formats,
|
||||
'id': brightcove_id,
|
||||
'title': video.get('title') or metadata.get('pageContentName') or metadata['showContentName'],
|
||||
'description': video.get('description'),
|
||||
'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
|
||||
'series': metadata.get('showName'),
|
||||
'season': metadata.get('showContentSeason'),
|
||||
'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'thumbnail': video.get('poster'),
|
||||
'uploader_id': '2199827728001',
|
||||
# 'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
||||
@@ -234,6 +234,9 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
provider_id = mobj.group('provider_id')
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TheWeatherChannelIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
|
||||
'md5': 'ab924ac9574e79689c24c6b95e957def',
|
||||
'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
|
||||
'info_dict': {
|
||||
'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
|
||||
'ext': 'mp4',
|
||||
@@ -20,18 +24,33 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
|
||||
'uploader': 'TWC - Digital (No Distro)',
|
||||
'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
|
||||
'upload_date': '20160720',
|
||||
'timestamp': 1469018835,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
video_id = drupal_settings['twc']['contexts']['node']['uuid']
|
||||
video_data = self._download_json(
|
||||
'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id)
|
||||
asset_name, locale, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not locale:
|
||||
locale = 'en-US'
|
||||
video_data = list(self._download_json(
|
||||
'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{
|
||||
'name': 'getCMSAssetsUrlConfig',
|
||||
'params': {
|
||||
'language': locale.replace('-', '_'),
|
||||
'query': {
|
||||
'assetName': {
|
||||
'$in': asset_name,
|
||||
},
|
||||
},
|
||||
}
|
||||
}]).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0]
|
||||
video_id = video_data['id']
|
||||
seo_meta = video_data.get('seometa', {})
|
||||
title = video_data.get('title') or seo_meta['title']
|
||||
|
||||
@@ -66,6 +85,8 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
cc_url = video_data.get('cc_url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -74,6 +95,8 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'uploader': video_data.get('providername'),
|
||||
'uploader_id': video_data.get('providerid'),
|
||||
'timestamp': parse_iso8601(video_data.get('publishdate')),
|
||||
'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -11,13 +11,13 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ToggleIE(InfoExtractor):
|
||||
IE_NAME = 'toggle'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}|toggle:)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
|
||||
'info_dict': {
|
||||
@@ -84,28 +84,12 @@ class ToggleIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_PREFERENCES = {
|
||||
'wvm-STBMain': -10,
|
||||
'wvm-iPadMain': -20,
|
||||
'wvm-iPhoneMain': -30,
|
||||
'wvm-Android': -40,
|
||||
}
|
||||
_API_USER = 'tvpapi_147'
|
||||
_API_PASS = '11111'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, note='Downloading video page')
|
||||
|
||||
api_user = self._search_regex(
|
||||
r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
|
||||
default=self._API_USER, group='user')
|
||||
api_pass = self._search_regex(
|
||||
r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
|
||||
default=self._API_PASS, group='pass')
|
||||
|
||||
params = {
|
||||
'initObj': {
|
||||
'Locale': {
|
||||
@@ -118,17 +102,16 @@ class ToggleIE(InfoExtractor):
|
||||
'SiteGuid': 0,
|
||||
'DomainID': '0',
|
||||
'UDID': '',
|
||||
'ApiUser': api_user,
|
||||
'ApiPass': api_pass
|
||||
'ApiUser': self._API_USER,
|
||||
'ApiPass': self._API_PASS
|
||||
},
|
||||
'MediaID': video_id,
|
||||
'mediaType': 0,
|
||||
}
|
||||
|
||||
req = sanitized_Request(
|
||||
info = self._download_json(
|
||||
'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
|
||||
json.dumps(params).encode('utf-8'))
|
||||
info = self._download_json(req, video_id, 'Downloading video info json')
|
||||
video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8'))
|
||||
|
||||
title = info['MediaName']
|
||||
|
||||
@@ -141,11 +124,16 @@ class ToggleIE(InfoExtractor):
|
||||
vid_format = vid_format.replace(' ', '')
|
||||
# if geo-restricted, m3u8 is inaccessible, but mp4 is okay
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4', m3u8_id=vid_format,
|
||||
note='Downloading %s m3u8 information' % vid_format,
|
||||
errnote='Failed to download %s m3u8 information' % vid_format,
|
||||
fatal=False))
|
||||
fatal=False)
|
||||
for f in m3u8_formats:
|
||||
# Apple FairPlay Streaming
|
||||
if '/fpshls/' in f['url']:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id=vid_format,
|
||||
@@ -158,28 +146,21 @@ class ToggleIE(InfoExtractor):
|
||||
note='Downloading %s ISM manifest' % vid_format,
|
||||
errnote='Failed to download %s ISM manifest' % vid_format,
|
||||
fatal=False))
|
||||
elif ext in ('mp4', 'wvm'):
|
||||
# wvm are drm-protected files
|
||||
elif ext == 'mp4':
|
||||
formats.append({
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
'format_id': vid_format,
|
||||
'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
|
||||
'format_note': 'DRM-protected video' if ext == 'wvm' else None
|
||||
})
|
||||
if not formats:
|
||||
for meta in (info.get('Metas') or []):
|
||||
if meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
|
||||
raise ExtractorError(
|
||||
'This video is DRM protected.', expected=True)
|
||||
# Most likely because geo-blocked
|
||||
raise ExtractorError('No downloadable videos found', expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(info.get('Duration'))
|
||||
description = info.get('Description')
|
||||
created_at = parse_iso8601(info.get('CreationDate') or None)
|
||||
|
||||
average_rating = float_or_none(info.get('Rating'))
|
||||
view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
|
||||
like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
|
||||
|
||||
thumbnails = []
|
||||
for picture in info.get('Pictures', []):
|
||||
if not isinstance(picture, dict):
|
||||
@@ -199,15 +180,55 @@ class ToggleIE(InfoExtractor):
|
||||
})
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
def counter(prefix):
|
||||
return int_or_none(
|
||||
info.get(prefix + 'Counter') or info.get(prefix.lower() + '_counter'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': created_at,
|
||||
'average_rating': average_rating,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'description': strip_or_none(info.get('Description')),
|
||||
'duration': int_or_none(info.get('Duration')),
|
||||
'timestamp': parse_iso8601(info.get('CreationDate') or None),
|
||||
'average_rating': float_or_none(info.get('Rating')),
|
||||
'view_count': counter('View'),
|
||||
'like_count': counter('Like'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MeWatchIE(InfoExtractor):
|
||||
IE_NAME = 'mewatch'
|
||||
_VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
|
||||
'info_dict': {
|
||||
'id': '1008625',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recipe Of Life 味之道',
|
||||
'timestamp': 1603306526,
|
||||
'description': 'md5:6e88cde8af2068444fc8e1bc3ebf257c',
|
||||
'upload_date': '20201021',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
custom_id = self._download_json(
|
||||
'https://cdn.mewatch.sg/api/items/' + item_id,
|
||||
item_id, query={'segments': 'all'})['customId']
|
||||
return self.url_result(
|
||||
'toggle:' + custom_id, ToggleIE.ie_key(), custom_id)
|
||||
|
||||
@@ -33,6 +33,19 @@ class TubiTvIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://tubitv.com/movies/383676/tracker',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tubitv.com/movies/560057/penitentiary?start=true',
|
||||
'info_dict': {
|
||||
'id': '560057',
|
||||
'ext': 'mp4',
|
||||
'title': 'Penitentiary',
|
||||
'description': 'md5:8d2fc793a93cc1575ff426fdcb8dd3f9',
|
||||
'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2',
|
||||
'release_year': 1979,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -93,4 +106,5 @@ class TubiTvIE(InfoExtractor):
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'uploader_id': video_data.get('publisher_id'),
|
||||
'release_year': int_or_none(video_data.get('year')),
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import re
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
@@ -49,8 +50,13 @@ class TurnerBaseIE(AdobePassIE):
|
||||
self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
|
||||
return video_url + '?hdnea=' + token
|
||||
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
|
||||
video_data = self._download_xml(data_src, video_id)
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False):
|
||||
video_data = self._download_xml(
|
||||
data_src, video_id,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=fatal)
|
||||
if not video_data:
|
||||
return {}
|
||||
video_id = video_data.attrib['id']
|
||||
title = xpath_text(video_data, 'headline', fatal=True)
|
||||
content_id = xpath_text(video_data, 'contentId') or video_id
|
||||
@@ -63,12 +69,14 @@ class TurnerBaseIE(AdobePassIE):
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
thumbnails = []
|
||||
subtitles = {}
|
||||
rex = re.compile(
|
||||
r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
|
||||
# Possible formats locations: files/file, files/groupFiles/files
|
||||
# and maybe others
|
||||
for video_file in video_data.findall('.//file'):
|
||||
video_url = video_file.text.strip()
|
||||
video_url = url_or_none(video_file.text.strip())
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
@@ -108,9 +116,28 @@ class TurnerBaseIE(AdobePassIE):
|
||||
continue
|
||||
urls.append(video_url)
|
||||
format_id = video_file.get('bitrate')
|
||||
if ext == 'smil':
|
||||
if ext in ('scc', 'srt', 'vtt'):
|
||||
subtitles.setdefault('en', []).append({
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
})
|
||||
elif ext == 'png':
|
||||
thumbnails.append({
|
||||
'id': format_id,
|
||||
'url': video_url,
|
||||
})
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, video_id, fatal=False))
|
||||
elif re.match(r'https?://[^/]+\.akamaihd\.net/[iz]/', video_url):
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
video_url, video_id, {
|
||||
'hds': path_data.get('f4m', {}).get('host'),
|
||||
# nba.cdn.turner.com, ht.cdn.turner.com, ht2.cdn.turner.com
|
||||
# ht3.cdn.turner.com, i.cdn.turner.com, s.cdn.turner.com
|
||||
# ssl.cdn.turner.com
|
||||
'http': 'pmd.cdn.turner.com',
|
||||
}))
|
||||
elif ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
@@ -129,7 +156,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
}
|
||||
mobj = rex.search(format_id + video_url)
|
||||
mobj = rex.search(video_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'width': int(mobj.group('width')),
|
||||
@@ -152,7 +179,6 @@ class TurnerBaseIE(AdobePassIE):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for source in video_data.findall('closedCaptions/source'):
|
||||
for track in source.findall('track'):
|
||||
track_url = url_or_none(track.get('url'))
|
||||
@@ -168,12 +194,12 @@ class TurnerBaseIE(AdobePassIE):
|
||||
}.get(source.get('format'))
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'id': image.get('cut'),
|
||||
thumbnails.extend({
|
||||
'id': image.get('cut') or image.get('name'),
|
||||
'url': image.text,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data.findall('images/image')]
|
||||
} for image in video_data.findall('images/image'))
|
||||
|
||||
is_live = xpath_text(video_data, 'isLive') == 'true'
|
||||
|
||||
|
||||
121
youtube_dl/extractor/tv5unis.py
Normal file
121
youtube_dl/extractor/tv5unis.py
Normal file
@@ -0,0 +1,121 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class TV5UnisBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _real_extract(self, url):
|
||||
groups = re.match(self._VALID_URL, url).groups()
|
||||
product = self._download_json(
|
||||
'https://api.tv5unis.ca/graphql', groups[0], query={
|
||||
'query': '''{
|
||||
%s(%s) {
|
||||
collection {
|
||||
title
|
||||
}
|
||||
episodeNumber
|
||||
rating {
|
||||
name
|
||||
}
|
||||
seasonNumber
|
||||
tags
|
||||
title
|
||||
videoElement {
|
||||
... on Video {
|
||||
mediaId
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (self._GQL_QUERY_NAME, self._gql_args(groups)),
|
||||
})['data'][self._GQL_QUERY_NAME]
|
||||
media_id = product['videoElement']['mediaId']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_id,
|
||||
'title': product.get('title'),
|
||||
'url': smuggle_url('limelight:media:' + media_id, {'geo_countries': self._GEO_COUNTRIES}),
|
||||
'age_limit': parse_age_limit(try_get(product, lambda x: x['rating']['name'])),
|
||||
'tags': product.get('tags'),
|
||||
'series': try_get(product, lambda x: x['collection']['title']),
|
||||
'season_number': int_or_none(product.get('seasonNumber')),
|
||||
'episode_number': int_or_none(product.get('episodeNumber')),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
class TV5UnisVideoIE(TV5UnisBaseIE):
|
||||
IE_NAME = 'tv5unis:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.tv5unis.ca/videos/bande-annonces/71843',
|
||||
'md5': '3d794164928bda97fb87a17e89923d9b',
|
||||
'info_dict': {
|
||||
'id': 'a883684aecb2486cad9bdc7bbe17f861',
|
||||
'ext': 'mp4',
|
||||
'title': 'Watatatow',
|
||||
'duration': 10.01,
|
||||
}
|
||||
}
|
||||
_GQL_QUERY_NAME = 'productById'
|
||||
|
||||
@staticmethod
|
||||
def _gql_args(groups):
|
||||
return 'id: %s' % groups
|
||||
|
||||
|
||||
class TV5UnisIE(TV5UnisBaseIE):
|
||||
IE_NAME = 'tv5unis'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P<id>[^/]+)(?:/saisons/(?P<season_number>\d+)/episodes/(?P<episode_number>\d+))?/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tv5unis.ca/videos/watatatow/saisons/6/episodes/1',
|
||||
'md5': 'a479907d2e531a73e1f8dc48d6388d02',
|
||||
'info_dict': {
|
||||
'id': 'e5ee23a586c44612a56aad61accf16ef',
|
||||
'ext': 'mp4',
|
||||
'title': 'Je ne peux pas lui résister',
|
||||
'description': "Atys, le nouveau concierge de l'école, a réussi à ébranler la confiance de Mado en affirmant qu\'une médaille, ce n'est que du métal. Comme Mado essaie de lui prouver que ses valeurs sont solides, il veut la mettre à l'épreuve...",
|
||||
'subtitles': {
|
||||
'fr': 'count:1',
|
||||
},
|
||||
'duration': 1370,
|
||||
'age_limit': 8,
|
||||
'tags': 'count:3',
|
||||
'series': 'Watatatow',
|
||||
'season_number': 6,
|
||||
'episode_number': 1,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tv5unis.ca/videos/le-voyage-de-fanny',
|
||||
'md5': '9ca80ebb575c681d10cae1adff3d4774',
|
||||
'info_dict': {
|
||||
'id': '726188eefe094d8faefb13381d42bc06',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le voyage de Fanny',
|
||||
'description': "Fanny, 12 ans, cachée dans un foyer loin de ses parents, s'occupe de ses deux soeurs. Devant fuir, Fanny prend la tête d'un groupe de huit enfants et s'engage dans un dangereux périple à travers la France occupée pour rejoindre la frontière suisse.",
|
||||
'subtitles': {
|
||||
'fr': 'count:1',
|
||||
},
|
||||
'duration': 5587.034,
|
||||
'tags': 'count:4',
|
||||
},
|
||||
}]
|
||||
_GQL_QUERY_NAME = 'productByRootProductSlug'
|
||||
|
||||
@staticmethod
|
||||
def _gql_args(groups):
|
||||
args = 'rootProductSlug: "%s"' % groups[0]
|
||||
if groups[1]:
|
||||
args += ', seasonNumber: %s, episodeNumber: %s' % groups[1:]
|
||||
return args
|
||||
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,7 +25,8 @@ class TVAIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'https://video.tva.ca/details/_5596811470001',
|
||||
'only_matching': True,
|
||||
@@ -32,26 +35,54 @@ class TVAIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
}, query={
|
||||
'appId': '5955fc5f23eec60006c951f1',
|
||||
})
|
||||
|
||||
def get_attribute(key):
|
||||
for attribute in video_data.get('attributes', []):
|
||||
if attribute.get('key') == key:
|
||||
return attribute.get('value')
|
||||
return None
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': get_attribute('title'),
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
|
||||
'description': get_attribute('description'),
|
||||
'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
|
||||
'duration': float_or_none(get_attribute('video-duration'), 1000),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
||||
|
||||
class QubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
|
||||
'md5': '949490fd0e7aee11d0543777611fbd53',
|
||||
'info_dict': {
|
||||
'id': '6084352463001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Épisode 01',
|
||||
'uploader_id': '5481942443001',
|
||||
'upload_date': '20190907',
|
||||
'timestamp': 1567899756,
|
||||
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# reference_id also works with old account_id(5481942443001)
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
entity = self._download_json(
|
||||
'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
|
||||
entity_id, query={'id': entity_id})
|
||||
video_id = entity['videoId']
|
||||
episode = strip_or_none(entity.get('name'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': episode,
|
||||
# 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
|
||||
'url': 'https://videos.tva.ca/details/_' + video_id,
|
||||
'description': entity.get('longDescription'),
|
||||
'duration': float_or_none(entity.get('durationMillis'), 1000),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(entity.get('episodeNumber')),
|
||||
# 'ie_key': 'BrightcoveNew',
|
||||
'ie_key': TVAIE.ie_key(),
|
||||
}
|
||||
|
||||
67
youtube_dl/extractor/tver.py
Normal file
67
youtube_dl/extractor/tver.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class TVerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>(?:corner|episode|feature)/(?P<id>f?\d+))'
|
||||
# videos are only available for 7 days
|
||||
_TESTS = [{
|
||||
'url': 'https://tver.jp/corner/f0062178',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tver.jp/feature/f0062413',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tver.jp/episode/79622438',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_TOKEN = None
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._TOKEN = self._download_json(
|
||||
'https://tver.jp/api/access_token.php', None)['token']
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
main = self._download_json(
|
||||
'https://api.tver.jp/v4/' + path, video_id,
|
||||
query={'token': self._TOKEN})['main']
|
||||
p_id = main['publisher_id']
|
||||
service = remove_start(main['service'], 'ts_')
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
|
||||
'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
|
||||
}
|
||||
|
||||
if service == 'cx':
|
||||
info.update({
|
||||
'title': main.get('subtitle') or main['title'],
|
||||
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
|
||||
'ie_key': 'FujiTVFODPlus7',
|
||||
})
|
||||
else:
|
||||
r_id = main['reference_id']
|
||||
if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
|
||||
r_id = 'ref:' + r_id
|
||||
bc_url = smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
|
||||
{'geo_countries': ['JP']})
|
||||
info.update({
|
||||
'url': bc_url,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
})
|
||||
|
||||
return info
|
||||
@@ -12,11 +12,13 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
try_get,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -414,7 +416,7 @@ class ViafreeIE(InfoExtractor):
|
||||
|
||||
|
||||
class TVPlayHomeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
|
||||
'info_dict': {
|
||||
@@ -433,80 +435,58 @@ class TVPlayHomeIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TVPlayIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.tv3.lt/aferistai-10047125',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
asset = self._download_json(
|
||||
urljoin(url, '/sb/public/asset/' + video_id), video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
|
||||
|
||||
if len(video_id) < 8:
|
||||
return self.url_result(
|
||||
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'm3u8 url', group='url')
|
||||
m3u8_url = asset['movie']['contentUrl']
|
||||
video_id = asset['assetId']
|
||||
asset_title = asset['title']
|
||||
title = asset_title['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'title', default=None, group='value') or self._html_search_meta(
|
||||
'title', webpage, default=None) or self._og_search_title(
|
||||
webpage)
|
||||
thumbnails = None
|
||||
image_url = asset.get('imageUrl')
|
||||
if image_url:
|
||||
thumbnails = [{
|
||||
'url': urljoin(url, image_url),
|
||||
'ext': 'jpg',
|
||||
}]
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage,
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', default=None, group='url') or self._html_search_meta(
|
||||
'thumbnail', webpage, default=None) or self._og_search_thumbnail(
|
||||
webpage)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
season = self._search_regex(
|
||||
(r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
|
||||
r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'season', default=None, group='value')
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
|
||||
default=None))
|
||||
episode = self._search_regex(
|
||||
(r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'episode', default=None, group='value')
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
|
||||
default=None))
|
||||
metadata = asset.get('metadata') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': parse_duration(asset_title.get('runTime')),
|
||||
'series': asset.get('tvSeriesTitle'),
|
||||
'season': asset.get('tvSeasonTitle'),
|
||||
'season_number': int_or_none(metadata.get('seasonNumber')),
|
||||
'episode': asset_title.get('titleBrief'),
|
||||
'episode_number': int_or_none(metadata.get('episodeNumber')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -1,11 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urlencode_postdata
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class TwitCastingIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
|
||||
@@ -17,8 +26,12 @@ class TwitCastingIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Live #2357609',
|
||||
'uploader_id': 'ivetesangalo',
|
||||
'description': "Moi! I'm live on TwitCasting from my iPhone.",
|
||||
'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20110822',
|
||||
'timestamp': 1314010824,
|
||||
'duration': 32,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -30,8 +43,12 @@ class TwitCastingIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Live playing something #3689740',
|
||||
'uploader_id': 'mttbernardini',
|
||||
'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)",
|
||||
'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20120212',
|
||||
'timestamp': 1329028024,
|
||||
'duration': 681,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -40,9 +57,7 @@ class TwitCastingIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
uploader_id = mobj.group('uploader_id')
|
||||
uploader_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
video_password = self._downloader.params.get('videopassword')
|
||||
request_data = None
|
||||
@@ -52,30 +67,45 @@ class TwitCastingIE(InfoExtractor):
|
||||
})
|
||||
webpage = self._download_webpage(url, video_id, data=request_data)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, fatal=True)
|
||||
title = clean_html(get_element_by_id(
|
||||
'movietitle', webpage)) or self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, fatal=True)
|
||||
|
||||
video_js_data = {}
|
||||
m3u8_url = self._search_regex(
|
||||
(r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
|
||||
webpage, 'm3u8 url', group='url')
|
||||
r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'm3u8 url', group='url', default=None)
|
||||
if not m3u8_url:
|
||||
video_js_data = self._parse_json(self._search_regex(
|
||||
r"data-movie-playlist='(\[[^']+\])'",
|
||||
webpage, 'movie playlist'), video_id)[0]
|
||||
m3u8_url = video_js_data['source']['url']
|
||||
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage)
|
||||
thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage)
|
||||
description = clean_html(get_element_by_id(
|
||||
'authorcomment', webpage)) or self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'], webpage)
|
||||
duration = float_or_none(video_js_data.get(
|
||||
'duration'), 1000) or parse_duration(clean_html(
|
||||
get_element_by_class('tw-player-duration-time', webpage)))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'Total\s*:\s*([\d,]+)\s*Views', webpage, 'views', None))
|
||||
timestamp = unified_timestamp(self._search_regex(
|
||||
r'data-toggle="true"[^>]+datetime="([^"]+)"',
|
||||
webpage, 'datetime', None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -251,10 +251,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'info_dict': {
|
||||
'id': '700207533655363584',
|
||||
'ext': 'mp4',
|
||||
'title': 'simon vetugo - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'title': 'simon vertugo - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'simon vetugo',
|
||||
'uploader': 'simon vertugo',
|
||||
'uploader_id': 'simonvertugo',
|
||||
'duration': 30.0,
|
||||
'timestamp': 1455777459,
|
||||
@@ -312,6 +312,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'timestamp': 1492000653,
|
||||
'upload_date': '20170412',
|
||||
},
|
||||
'skip': 'Account suspended',
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||
'info_dict': {
|
||||
@@ -380,6 +381,14 @@ class TwitterIE(TwitterBaseIE):
|
||||
# promo_video_website card
|
||||
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# promo_video_convo card
|
||||
'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# appplayer card
|
||||
'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -462,7 +471,30 @@ class TwitterIE(TwitterBaseIE):
|
||||
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
|
||||
|
||||
card_name = card['name'].split(':')[-1]
|
||||
if card_name in ('amplify', 'promo_video_website'):
|
||||
if card_name == 'player':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('player_url'),
|
||||
})
|
||||
elif card_name == 'periscope_broadcast':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('url') or get_binding_value('player_url'),
|
||||
'ie_key': PeriscopeIE.ie_key(),
|
||||
})
|
||||
elif card_name == 'broadcast':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('broadcast_url'),
|
||||
'ie_key': TwitterBroadcastIE.ie_key(),
|
||||
})
|
||||
elif card_name == 'summary':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('card_url'),
|
||||
})
|
||||
# amplify, promo_video_website, promo_video_convo, appplayer, ...
|
||||
else:
|
||||
is_amplify = card_name == 'amplify'
|
||||
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
|
||||
content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
|
||||
@@ -488,25 +520,6 @@ class TwitterIE(TwitterBaseIE):
|
||||
'duration': int_or_none(get_binding_value(
|
||||
'content_duration_seconds')),
|
||||
})
|
||||
elif card_name == 'player':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('player_url'),
|
||||
})
|
||||
elif card_name == 'periscope_broadcast':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('url') or get_binding_value('player_url'),
|
||||
'ie_key': PeriscopeIE.ie_key(),
|
||||
})
|
||||
elif card_name == 'broadcast':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('broadcast_url'),
|
||||
'ie_key': TwitterBroadcastIE.ie_key(),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('Unsupported Twitter Card.')
|
||||
else:
|
||||
expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
|
||||
if not expanded_url:
|
||||
|
||||
@@ -5,10 +5,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class UKTVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '2117008346001',
|
||||
'ext': 'mp4',
|
||||
@@ -23,7 +22,11 @@ class UKTVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest']
|
||||
}
|
||||
}, {
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -4,30 +4,50 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VideomoreBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://more.tv/api/v3/web/'
|
||||
_VALID_URL_BASE = r'https?://(?:videomore\.ru|more\.tv)/'
|
||||
|
||||
def _download_page_data(self, display_id):
|
||||
return self._download_json(
|
||||
self._API_BASE_URL + 'PageData', display_id, query={
|
||||
'url': '/' + display_id,
|
||||
})['attributes']['response']['data']
|
||||
|
||||
def _track_url_result(self, track):
|
||||
track_vod = track['trackVod']
|
||||
video_url = track_vod.get('playerLink') or track_vod['link']
|
||||
return self.url_result(
|
||||
video_url, VideomoreIE.ie_key(), track_vod.get('hubId'))
|
||||
|
||||
|
||||
class VideomoreIE(InfoExtractor):
|
||||
IE_NAME = 'videomore'
|
||||
_VALID_URL = r'''(?x)
|
||||
videomore:(?P<sid>\d+)$|
|
||||
https?://(?:player\.)?videomore\.ru/
|
||||
https?://
|
||||
(?:
|
||||
videomore\.ru/
|
||||
(?:
|
||||
embed|
|
||||
[^/]+/[^/]+
|
||||
)/|
|
||||
[^/]*\?.*?\btrack_id=
|
||||
(?:
|
||||
(?:player\.)?videomore\.ru|
|
||||
siren\.more\.tv/player
|
||||
)/[^/]*\?.*?\btrack_id=|
|
||||
odysseus\.more.tv/player/(?P<partner_id>\d+)/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
(?:[/?#&]|\.(?:xml|json)|$)
|
||||
@@ -47,18 +67,19 @@ class VideomoreIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'age_limit': 16,
|
||||
},
|
||||
'skip': 'The video is not available for viewing.',
|
||||
}, {
|
||||
'url': 'http://videomore.ru/embed/259974',
|
||||
'info_dict': {
|
||||
'id': '259974',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Молодежка 2 сезон 40 серия',
|
||||
'series': 'Молодежка',
|
||||
'season': '2 сезон',
|
||||
'episode': '40 серия',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2809,
|
||||
'duration': 2789,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 16,
|
||||
},
|
||||
'params': {
|
||||
@@ -79,6 +100,7 @@ class VideomoreIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The video is not available for viewing.',
|
||||
}, {
|
||||
'url': 'http://videomore.ru/elki_3?track_id=364623',
|
||||
'only_matching': True,
|
||||
@@ -100,7 +122,14 @@ class VideomoreIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://odysseus.more.tv/player/1788/352317',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://siren.more.tv/player/config?track_id=352317&partner_id=1788&user_token=',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
@@ -118,46 +147,73 @@ class VideomoreIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('sid') or mobj.group('id')
|
||||
partner_id = mobj.group('partner_id') or compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('partner_id', [None])[0] or '97'
|
||||
|
||||
video = self._download_xml(
|
||||
'http://videomore.ru/video/tracks/%s.xml' % video_id,
|
||||
video_id, 'Downloading video XML')
|
||||
item = self._download_json(
|
||||
'https://siren.more.tv/player/config', video_id, query={
|
||||
'partner_id': partner_id,
|
||||
'track_id': video_id,
|
||||
})['data']['playlist']['items'][0]
|
||||
|
||||
item = xpath_element(video, './/playlist/item', fatal=True)
|
||||
title = item.get('title')
|
||||
series = item.get('project_name')
|
||||
season = item.get('season_name')
|
||||
episode = item.get('episode_name')
|
||||
if not title:
|
||||
title = []
|
||||
for v in (series, season, episode):
|
||||
if v:
|
||||
title.append(v)
|
||||
title = ' '.join(title)
|
||||
|
||||
title = xpath_text(
|
||||
item, ('./title', './episode_name'), 'title', fatal=True)
|
||||
streams = item.get('streams') or []
|
||||
for protocol in ('DASH', 'HLS'):
|
||||
stream_url = item.get(protocol.lower() + '_url')
|
||||
if stream_url:
|
||||
streams.append({'protocol': protocol, 'url': stream_url})
|
||||
|
||||
video_url = xpath_text(item, './video_url', 'video url', fatal=True)
|
||||
formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
|
||||
formats = []
|
||||
for stream in streams:
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
protocol = stream.get('protocol')
|
||||
if protocol == 'DASH':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
stream_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif protocol == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif protocol == 'MSS':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
stream_url, video_id, ism_id='mss', fatal=False))
|
||||
|
||||
if not formats:
|
||||
error = item.get('error')
|
||||
if error:
|
||||
if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
|
||||
self.raise_geo_restricted(countries=['RU'])
|
||||
raise ExtractorError(error, expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = xpath_text(item, './thumbnail_url')
|
||||
duration = int_or_none(xpath_text(item, './duration'))
|
||||
view_count = int_or_none(xpath_text(item, './views'))
|
||||
comment_count = int_or_none(xpath_text(item, './count_comments'))
|
||||
age_limit = int_or_none(xpath_text(item, './min_age'))
|
||||
|
||||
series = xpath_text(item, './project_name')
|
||||
episode = xpath_text(item, './episode_name')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season': season,
|
||||
'episode': episode,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'thumbnail': item.get('thumbnail_url'),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'view_count': int_or_none(item.get('views')),
|
||||
'age_limit': int_or_none(item.get('min_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class VideomoreVideoIE(InfoExtractor):
|
||||
class VideomoreVideoIE(VideomoreBaseIE):
|
||||
IE_NAME = 'videomore:video'
|
||||
_VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)(?:/*|[?#&].*?)$'
|
||||
_VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?P<id>(?:(?:[^/]+/){2})?[^/?#&]+)(?:/*|[?#&].*?)$'
|
||||
_TESTS = [{
|
||||
# single video with og:video:iframe
|
||||
'url': 'http://videomore.ru/elki_3',
|
||||
@@ -174,10 +230,25 @@ class VideomoreVideoIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# season single series with og:video:iframe
|
||||
'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '352317',
|
||||
'ext': 'mp4',
|
||||
'title': 'Последний мент 1 сезон 14 серия',
|
||||
'series': 'Последний мент',
|
||||
'season': '1 сезон',
|
||||
'episode': '14 серия',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2464,
|
||||
'age_limit': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk',
|
||||
'only_matching': True,
|
||||
@@ -197,9 +268,13 @@ class VideomoreVideoIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'redirects to https://more.tv/'
|
||||
}, {
|
||||
'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://more.tv/poslednii_ment/1_sezon/14_seriya',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -208,38 +283,25 @@ class VideomoreVideoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._og_search_property(
|
||||
'video:iframe', webpage, 'video url', default=None)
|
||||
|
||||
if not video_url:
|
||||
video_id = self._search_regex(
|
||||
(r'config\s*:\s*["\']https?://videomore\.ru/video/tracks/(\d+)\.xml',
|
||||
r'track-id=["\'](\d+)',
|
||||
r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id')
|
||||
video_url = 'videomore:%s' % video_id
|
||||
else:
|
||||
video_id = None
|
||||
|
||||
return self.url_result(
|
||||
video_url, ie=VideomoreIE.ie_key(), video_id=video_id)
|
||||
return self._track_url_result(self._download_page_data(display_id))
|
||||
|
||||
|
||||
class VideomoreSeasonIE(InfoExtractor):
|
||||
class VideomoreSeasonIE(VideomoreBaseIE):
|
||||
IE_NAME = 'videomore:season'
|
||||
_VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
|
||||
_VALID_URL = VideomoreBaseIE._VALID_URL_BASE + r'(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
|
||||
_TESTS = [{
|
||||
'url': 'http://videomore.ru/molodezhka/sezon_promo',
|
||||
'url': 'http://videomore.ru/molodezhka/film_o_filme',
|
||||
'info_dict': {
|
||||
'id': 'molodezhka/sezon_promo',
|
||||
'title': 'Молодежка Промо',
|
||||
'id': 'molodezhka/film_o_filme',
|
||||
'title': 'Фильм о фильме',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://more.tv/molodezhka/film_o_filme',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -249,59 +311,12 @@ class VideomoreSeasonIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
data = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'\bclass=["\']seasons-tracks["\'][^>]+\bdata-custom-data=(["\'])(?P<value>{.+?})\1',
|
||||
webpage, 'data', default='{}', group='value'),
|
||||
display_id, fatal=False)
|
||||
|
||||
season = self._download_page_data(display_id)
|
||||
season_id = compat_str(season['id'])
|
||||
tracks = self._download_json(
|
||||
self._API_BASE_URL + 'seasons/%s/tracks' % season_id,
|
||||
season_id)['data']
|
||||
entries = []
|
||||
|
||||
if data:
|
||||
episodes = data.get('episodes')
|
||||
if isinstance(episodes, list):
|
||||
for ep in episodes:
|
||||
if not isinstance(ep, dict):
|
||||
continue
|
||||
ep_id = int_or_none(ep.get('id'))
|
||||
ep_url = url_or_none(ep.get('url'))
|
||||
if ep_id:
|
||||
e = {
|
||||
'url': 'videomore:%s' % ep_id,
|
||||
'id': compat_str(ep_id),
|
||||
}
|
||||
elif ep_url:
|
||||
e = {'url': ep_url}
|
||||
else:
|
||||
continue
|
||||
e.update({
|
||||
'_type': 'url',
|
||||
'ie_key': VideomoreIE.ie_key(),
|
||||
'title': str_or_none(ep.get('title')),
|
||||
'thumbnail': url_or_none(ep.get('image')),
|
||||
'duration': parse_duration(ep.get('duration')),
|
||||
'episode_number': int_or_none(ep.get('number')),
|
||||
'upload_date': unified_strdate(ep.get('date')),
|
||||
})
|
||||
entries.append(e)
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(
|
||||
'videomore:%s' % video_id, ie=VideomoreIE.ie_key(),
|
||||
video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r':(?:id|key)=["\'](\d+)["\']', webpage))]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(item) for item in re.findall(
|
||||
r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
|
||||
% display_id, webpage)]
|
||||
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
for track in tracks:
|
||||
entries.append(self._track_url_result(track))
|
||||
return self.playlist_result(entries, display_id, season.get('title'))
|
||||
|
||||
@@ -1119,6 +1119,12 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vhx:embed'
|
||||
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
|
||||
return unescapeHTML(mobj.group(1)) if mobj else None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -1127,5 +1133,6 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
'ott data'), video_id, js_to_json)['config_url']
|
||||
config = self._download_json(config_url, video_id)
|
||||
info = self._parse_config(config, video_id)
|
||||
info['id'] = video_id
|
||||
self._vimeo_sort_formats(info['formats'])
|
||||
return info
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user