mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-25 09:38:38 +09:00 
			
		
		
		
	Compare commits
	
		
			265 Commits
		
	
	
		
			2020.12.05
			...
			9d50f86232
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 9d50f86232 | ||
|   | 7e92f9015e | ||
|   | aa860b8016 | ||
|   | b484097b01 | ||
|   | ab9001dab5 | ||
|   | 879866a230 | ||
|   | 8e5477d036 | ||
|   | 1e8e5d5238 | ||
|   | d81a213cfb | ||
|   | 7c2d18a13f | ||
|   | 2408e6d26a | ||
|   | cf862771d7 | ||
|   | a938f111ed | ||
|   | 4759543f6e | ||
|   | d0fc289f45 | ||
|   | 70f572585d | ||
|   | c2d06aef60 | ||
|   | ff1e765400 | ||
|   | 170e1c1995 | ||
|   | 61e669acff | ||
|   | 2c337f4e85 | ||
|   | bf6a74c620 | ||
|   | 38a967c98e | ||
|   | 3a61e6d360 | ||
|   | 3d8e32dcc0 | ||
|   | 8f29b2dd38 | ||
|   | a29e340efa | ||
|   | b13f29098f | ||
|   | 430c4bc9d0 | ||
|   | 4ae243fc6c | ||
|   | 8f20ad36dc | ||
|   | 799c794947 | ||
|   | 1ae7ae0b96 | ||
|   | ccc7112291 | ||
|   | 5b24f8f505 | ||
|   | fcd90d2583 | ||
|   | 8f757c7353 | ||
|   | be1a3f2d11 | ||
|   | ecae54a98d | ||
|   | f318882955 | ||
|   | c3399cac19 | ||
|   | 9237aaa77f | ||
|   | 766fcdd0fa | ||
|   | f6ea29e24b | ||
|   | 8a3797a4ab | ||
|   | 745db8899d | ||
|   | 83db801cbf | ||
|   | 964a8eb754 | ||
|   | ac61f2e058 | ||
|   | 8487e8b98a | ||
|   | 9c484c0019 | ||
|   | 0e96b4b5ce | ||
|   | a563c97c5c | ||
|   | e88c9ef62a | ||
|   | 0889eb33e0 | ||
|   | 0021a2b9a1 | ||
|   | 19ec468635 | ||
|   | 491ee7efe4 | ||
|   | 8522bcd97c | ||
|   | ac71fd5919 | ||
|   | 8e953dcbb1 | ||
|   | f4afb9a6a8 | ||
|   | d5b8cf093c | ||
|   | 5c6e84c0ff | ||
|   | 1aaee908b9 | ||
|   | b2d9fd9c9f | ||
|   | bc2f83b95e | ||
|   | 85de33b04e | ||
|   | 7dfd966848 | ||
|   | a25d03d7cb | ||
|   | cabfd4b1f0 | ||
|   | 7b643d4cd0 | ||
|   | 1f1d01d498 | ||
|   | 21a42e2588 | ||
|   | 2df93a0c4a | ||
|   | 75972e200d | ||
|   | d0d838638c | ||
|   | 8c17afc471 | ||
|   | 40d66e07df | ||
|   | ab89a8678b | ||
|   | 4d7d056909 | ||
|   | c35bc82606 | ||
|   | 2f56caf083 | ||
|   | 4066945919 | ||
|   | 2a84694b1e | ||
|   | 4046ffe1e1 | ||
|   | d1d0612160 | ||
|   | 7b0f04ed1f | ||
|   | 2e21b06ea2 | ||
|   | a6f75e6e89 | ||
|   | bd18824c2a | ||
|   | bdd044e67b | ||
|   | f7e95fb2a0 | ||
|   | 9dd674e1d2 | ||
|   | 9c1e164e0c | ||
|   | c706fbe9fe | ||
|   | ebdcf70b0d | ||
|   | 5966095e65 | ||
|   | 9ee984fc76 | ||
|   | 53528e1d23 | ||
|   | c931c4b8dd | ||
|   | 7acd042bbb | ||
|   | bcfe485e01 | ||
|   | 479cc6d5a1 | ||
|   | 38286ee729 | ||
|   | 1a95953867 | ||
|   | 71febd1c52 | ||
|   | f1bc56c99b | ||
|   | 64e419bd73 | ||
|   | 782ea947b4 | ||
|   | f27224d57b | ||
|   | c007188598 | ||
|   | af93ecfd88 | ||
|   | 794771a164 | ||
|   | 6f2eaaf73d | ||
|   | 4c7a4dbc4d | ||
|   | f86b299d0e | ||
|   | e474996541 | ||
|   | aed617e311 | ||
|   | 0fa67c1d68 | ||
|   | 365b3cc72d | ||
|   | a272fe21a8 | ||
|   | cec1c2f211 | ||
|   | 12053450dc | ||
|   | 46cffb0c47 | ||
|   | c32a059f52 | ||
|   | 6911312e53 | ||
|   | f22b5a6b96 | ||
|   | 58e55198c1 | ||
|   | d61ed9f2f1 | ||
|   | 8bc4c6350e | ||
|   | cfa4ffa23b | ||
|   | 4f1dc1463d | ||
|   | 17e0f41d34 | ||
|   | b57b27ff8f | ||
|   | bbe8cc6662 | ||
|   | 98106accb6 | ||
|   | af1312bfc3 | ||
|   | 4c7d7215cd | ||
|   | 0370d9eb3d | ||
|   | 1434651d20 | ||
|   | 2c312ab84a | ||
|   | 0ee78d62d5 | ||
|   | 7f3c90ab25 | ||
|   | 1d3cd29730 | ||
|   | 4ef1fc9707 | ||
|   | f9e6aa1dcf | ||
|   | f83db9064b | ||
|   | 2da9a86399 | ||
|   | ecaa535cf4 | ||
|   | 79dd92b1fe | ||
|   | bd3844c9c2 | ||
|   | 7bf5e3a84a | ||
|   | 08a17dae5b | ||
|   | 924ea66ade | ||
|   | 5b72f5b74f | ||
|   | bfa345744d | ||
|   | f966461476 | ||
|   | b8aea53682 | ||
|   | c0d9eb7043 | ||
|   | 3ba6aabd25 | ||
|   | a8b31505ed | ||
|   | 90a271e914 | ||
|   | 172d270607 | ||
|   | 22feed08a1 | ||
|   | 942b8ca3be | ||
|   | 3729c52f9d | ||
|   | 71679eaee8 | ||
|   | 76fe4ba3b2 | ||
|   | 164a4a5756 | ||
|   | 455951985b | ||
|   | c29500e412 | ||
|   | 1bc1520adc | ||
|   | 022e05dc1f | ||
|   | b34c9551aa | ||
|   | 84f19c026f | ||
|   | 6bde5492b6 | ||
|   | 6086df4d6a | ||
|   | c98052c5da | ||
|   | ab62bc5838 | ||
|   | bc87ba8424 | ||
|   | b79df1b68d | ||
|   | 2797c7be45 | ||
|   | 755f186e21 | ||
|   | 2240a1dc4d | ||
|   | 03d3af9768 | ||
|   | 5ce9527e16 | ||
|   | c527f5ada0 | ||
|   | ace52668f0 | ||
|   | 9c33eb027e | ||
|   | 679b711395 | ||
|   | 1727541315 | ||
|   | 45b0a0d11b | ||
|   | e665fcd4da | ||
|   | aae737d4af | ||
|   | 92a6de861e | ||
|   | 5ff881aee6 | ||
|   | eae19a4473 | ||
|   | f70c263ce5 | ||
|   | 92d135921f | ||
|   | d8008dee4f | ||
|   | bb38a12157 | ||
|   | bcc8ef0a5a | ||
|   | 3cb1a5dc73 | ||
|   | ed40c02c9b | ||
|   | b5fdceb4d1 | ||
|   | 1e6f7f3b45 | ||
|   | 469ff02f4e | ||
|   | 56f1c5ad38 | ||
|   | 3a6b8f4edf | ||
|   | 3ae02d4a62 | ||
|   | 59fea63dc2 | ||
|   | 22933e02d4 | ||
|   | 98a62d7cbd | ||
|   | d6c5fdef34 | ||
|   | 4b9051cf39 | ||
|   | 00f5068908 | ||
|   | 432c6b0f48 | ||
|   | ad5e5788ff | ||
|   | dc65041c22 | ||
|   | 9f88b07945 | ||
|   | 225646c4ca | ||
|   | 5c6e9f0b6c | ||
|   | 6c370bc149 | ||
|   | 37fd242342 | ||
|   | aee1f87168 | ||
|   | b69bb1ed11 | ||
|   | 772cefef8c | ||
|   | 842654b6d0 | ||
|   | df5e50954b | ||
|   | a4a2fa8754 | ||
|   | 9da0504a09 | ||
|   | 470cf496f5 | ||
|   | e029da9add | ||
|   | e00b8f60d4 | ||
|   | 644c3ef886 | ||
|   | 9d8d0f8b4a | ||
|   | 5a1fbbf8b7 | ||
|   | e2bdf8bf4f | ||
|   | c368dc98e0 | ||
|   | e7eff914cd | ||
|   | 07333d0062 | ||
|   | 5bd7ad2e81 | ||
|   | 3ded751985 | ||
|   | 6956db3606 | ||
|   | 17b01228f8 | ||
|   | 4f1ecca58d | ||
|   | 2717036489 | ||
|   | d9482c0083 | ||
|   | 791b743765 | ||
|   | fa604d9083 | ||
|   | 2bf0634d16 | ||
|   | dccf4932e1 | ||
|   | 91dd25fe1e | ||
|   | 06bf2ac20f | ||
|   | 6ad0d8781e | ||
|   | f2c704e112 | ||
|   | 5e822c2526 | ||
|   | cc017e07ca | ||
|   | 082da36416 | ||
|   | 6bf95b15ee | ||
|   | 4c93b2fd15 | ||
|   | 1b26bfd425 | ||
|   | 13ec444a98 | ||
|   | 51579d87e4 | 
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE/1_broken_site.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE/1_broken_site.md
									
									
									
									
										vendored
									
									
								
							| @@ -18,7 +18,7 @@ title: '' | ||||
|  | ||||
| <!-- | ||||
| Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. | ||||
| - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. | ||||
| - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. | ||||
| @@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com | ||||
| --> | ||||
|  | ||||
| - [ ] I'm reporting a broken site support | ||||
| - [ ] I've verified that I'm running youtube-dl version **2020.12.05** | ||||
| - [ ] I've verified that I'm running youtube-dl version **2021.01.08** | ||||
| - [ ] I've checked that all provided URLs are alive and playable in a browser | ||||
| - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped | ||||
| - [ ] I've searched the bugtracker for similar issues including closed ones | ||||
| @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < | ||||
|  [debug] User config: [] | ||||
|  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
|  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
|  [debug] youtube-dl version 2020.12.05 | ||||
|  [debug] youtube-dl version 2021.01.08 | ||||
|  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
|  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
|  [debug] Proxy map: {} | ||||
|   | ||||
| @@ -19,7 +19,7 @@ labels: 'site-support-request' | ||||
|  | ||||
| <!-- | ||||
| Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. | ||||
| - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. | ||||
| - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. | ||||
| @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com | ||||
| --> | ||||
|  | ||||
| - [ ] I'm reporting a new site support request | ||||
| - [ ] I've verified that I'm running youtube-dl version **2020.12.05** | ||||
| - [ ] I've verified that I'm running youtube-dl version **2021.01.08** | ||||
| - [ ] I've checked that all provided URLs are alive and playable in a browser | ||||
| - [ ] I've checked that none of provided URLs violate any copyrights | ||||
| - [ ] I've searched the bugtracker for similar site support requests including closed ones | ||||
|   | ||||
| @@ -18,13 +18,13 @@ title: '' | ||||
|  | ||||
| <!-- | ||||
| Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. | ||||
| - Finally, put x into all relevant boxes (like this [x]) | ||||
| --> | ||||
|  | ||||
| - [ ] I'm reporting a site feature request | ||||
| - [ ] I've verified that I'm running youtube-dl version **2020.12.05** | ||||
| - [ ] I've verified that I'm running youtube-dl version **2021.01.08** | ||||
| - [ ] I've searched the bugtracker for similar site feature requests including closed ones | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE/4_bug_report.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE/4_bug_report.md
									
									
									
									
										vendored
									
									
								
							| @@ -18,7 +18,7 @@ title: '' | ||||
|  | ||||
| <!-- | ||||
| Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. | ||||
| - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. | ||||
| - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. | ||||
| @@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com | ||||
| --> | ||||
|  | ||||
| - [ ] I'm reporting a broken site support issue | ||||
| - [ ] I've verified that I'm running youtube-dl version **2020.12.05** | ||||
| - [ ] I've verified that I'm running youtube-dl version **2021.01.08** | ||||
| - [ ] I've checked that all provided URLs are alive and playable in a browser | ||||
| - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped | ||||
| - [ ] I've searched the bugtracker for similar bug reports including closed ones | ||||
| @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < | ||||
|  [debug] User config: [] | ||||
|  [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
|  [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
|  [debug] youtube-dl version 2020.12.05 | ||||
|  [debug] youtube-dl version 2021.01.08 | ||||
|  [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
|  [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
|  [debug] Proxy map: {} | ||||
|   | ||||
							
								
								
									
										4
									
								
								.github/ISSUE_TEMPLATE/5_feature_request.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/ISSUE_TEMPLATE/5_feature_request.md
									
									
									
									
										vendored
									
									
								
							| @@ -19,13 +19,13 @@ labels: 'request' | ||||
|  | ||||
| <!-- | ||||
| Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. | ||||
| - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. | ||||
| - Finally, put x into all relevant boxes (like this [x]) | ||||
| --> | ||||
|  | ||||
| - [ ] I'm reporting a feature request | ||||
| - [ ] I've verified that I'm running youtube-dl version **2020.12.05** | ||||
| - [ ] I've verified that I'm running youtube-dl version **2021.01.08** | ||||
| - [ ] I've searched the bugtracker for similar feature requests including closed ones | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										4
									
								
								.github/PULL_REQUEST_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/PULL_REQUEST_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @@ -7,8 +7,10 @@ | ||||
| --- | ||||
|  | ||||
| ### Before submitting a *pull request* make sure you have: | ||||
| - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections | ||||
| - [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests | ||||
| - [ ] Read [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) | ||||
| - [ ] Read [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) and adjusted the code to meet them | ||||
| - [ ] Covered the code with tests (note that PRs without tests will be REJECTED) | ||||
| - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) | ||||
|  | ||||
| ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: | ||||
|   | ||||
							
								
								
									
										74
									
								
								.github/workflows/ci.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								.github/workflows/ci.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | ||||
| name: CI | ||||
| on: [push, pull_request] | ||||
| jobs: | ||||
|   tests: | ||||
|     name: Tests | ||||
|     runs-on: ${{ matrix.os }} | ||||
|     strategy: | ||||
|       fail-fast: true | ||||
|       matrix: | ||||
|         os: [ubuntu-18.04] | ||||
|         # TODO: python 2.6 | ||||
|         python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7] | ||||
|         python-impl: [cpython] | ||||
|         ytdl-test-set: [core, download] | ||||
|         run-tests-ext: [sh] | ||||
|         include: | ||||
|         # python 3.2 is only available on windows via setup-python | ||||
|         - os: windows-latest | ||||
|           python-version: 3.2 | ||||
|           python-impl: cpython | ||||
|           ytdl-test-set: core | ||||
|           run-tests-ext: bat | ||||
|         - os: windows-latest | ||||
|           python-version: 3.2 | ||||
|           python-impl: cpython | ||||
|           ytdl-test-set: download | ||||
|           run-tests-ext: bat | ||||
|         # jython | ||||
|         - os: ubuntu-18.04 | ||||
|           python-impl: jython | ||||
|           ytdl-test-set: core | ||||
|           run-tests-ext: sh | ||||
|         - os: ubuntu-18.04 | ||||
|           python-impl: jython | ||||
|           ytdl-test-set: download | ||||
|           run-tests-ext: sh | ||||
|     steps: | ||||
|     - uses: actions/checkout@v2 | ||||
|     - name: Set up Python ${{ matrix.python-version }} | ||||
|       uses: actions/setup-python@v2 | ||||
|       if: ${{ matrix.python-impl == 'cpython' }} | ||||
|       with: | ||||
|         python-version: ${{ matrix.python-version }} | ||||
|     - name: Set up Java 8 | ||||
|       if: ${{ matrix.python-impl == 'jython' }} | ||||
|       uses: actions/setup-java@v1 | ||||
|       with: | ||||
|         java-version: 8 | ||||
|     - name: Install Jython | ||||
|       if: ${{ matrix.python-impl == 'jython' }} | ||||
|       run: | | ||||
|         wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar | ||||
|         java -jar jython-installer.jar -s -d "$HOME/jython" | ||||
|         echo "$HOME/jython/bin" >> $GITHUB_PATH | ||||
|     - name: Install nose | ||||
|       run: pip install nose | ||||
|     - name: Run tests | ||||
|       continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }} | ||||
|       env: | ||||
|         YTDL_TEST_SET: ${{ matrix.ytdl-test-set }} | ||||
|       run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} | ||||
|   flake8: | ||||
|     name: Linter | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|     - uses: actions/checkout@v2 | ||||
|     - name: Set up Python | ||||
|       uses: actions/setup-python@v2 | ||||
|       with: | ||||
|         python-version: 3.9 | ||||
|     - name: Install flake8 | ||||
|       run: pip install flake8 | ||||
|     - name: Run flake8 | ||||
|       run: flake8 . | ||||
							
								
								
									
										50
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										50
									
								
								.travis.yml
									
									
									
									
									
								
							| @@ -1,50 +0,0 @@ | ||||
| language: python | ||||
| python: | ||||
|   - "2.6" | ||||
|   - "2.7" | ||||
|   - "3.2" | ||||
|   - "3.3" | ||||
|   - "3.4" | ||||
|   - "3.5" | ||||
|   - "3.6" | ||||
|   - "pypy" | ||||
|   - "pypy3" | ||||
| dist: trusty | ||||
| env: | ||||
|   - YTDL_TEST_SET=core | ||||
| #  - YTDL_TEST_SET=download | ||||
| jobs: | ||||
|   include: | ||||
|     - python: 3.7 | ||||
|       dist: xenial | ||||
|       env: YTDL_TEST_SET=core | ||||
| #    - python: 3.7 | ||||
| #      dist: xenial | ||||
| #      env: YTDL_TEST_SET=download | ||||
|     - python: 3.8 | ||||
|       dist: xenial | ||||
|       env: YTDL_TEST_SET=core | ||||
| #    - python: 3.8 | ||||
| #      dist: xenial | ||||
| #      env: YTDL_TEST_SET=download | ||||
|     - python: 3.8-dev | ||||
|       dist: xenial | ||||
|       env: YTDL_TEST_SET=core | ||||
| #    - python: 3.8-dev | ||||
| #      dist: xenial | ||||
| #      env: YTDL_TEST_SET=download | ||||
|     - env: JYTHON=true; YTDL_TEST_SET=core | ||||
| #    - env: JYTHON=true; YTDL_TEST_SET=download | ||||
|     - name: flake8 | ||||
|       python: 3.8 | ||||
|       dist: xenial | ||||
|       install: pip install flake8 | ||||
|       script: flake8 . | ||||
|   fast_finish: true | ||||
|   allow_failures: | ||||
| #    - env: YTDL_TEST_SET=download | ||||
|     - env: JYTHON=true; YTDL_TEST_SET=core | ||||
| #    - env: JYTHON=true; YTDL_TEST_SET=download | ||||
| before_install: | ||||
|   - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi | ||||
| script: ./devscripts/run_tests.sh | ||||
							
								
								
									
										274
									
								
								ChangeLog
									
									
									
									
									
								
							
							
						
						
									
										274
									
								
								ChangeLog
									
									
									
									
									
								
							| @@ -1,3 +1,275 @@ | ||||
| version 2021.01.08 | ||||
|  | ||||
| Core | ||||
| * [downloader/hls] Disable decryption in tests (#27660) | ||||
| + [utils] Add a function to clean podcast URLs | ||||
|  | ||||
| Extractors | ||||
| * [rai] Improve subtitles extraction (#27698, #27705) | ||||
| * [canvas] Match only supported VRT NU URLs (#27707) | ||||
| + [bibeltv] Add support for bibeltv.de (#14361) | ||||
| + [bfmtv] Add support for bfmtv.com (#16053, #26615) | ||||
| + [sbs] Add support for ondemand play and news embed URLs (#17650, #27629) | ||||
| * [twitch] Drop legacy kraken API v5 code altogether and refactor | ||||
| * [twitch:vod] Switch to GraphQL for video metadata | ||||
| * [canvas] Fix VRT NU extraction (#26957, #27053) | ||||
| * [twitch] Switch access token to GraphQL and refactor (#27646) | ||||
| + [rai] Detect ContentItem in iframe (#12652, #27673) | ||||
| * [ketnet] Fix extraction (#27662) | ||||
| + [dplay] Add suport Discovery+ domains (#27680) | ||||
| * [motherless] Improve extraction (#26495, #27450) | ||||
| * [motherless] Fix recent videos upload date extraction (#27661) | ||||
| * [nrk] Fix extraction for videos without a legalAge rating | ||||
| - [googleplus] Remove extractor (#4955, #7400) | ||||
| + [applepodcasts] Add support for podcasts.apple.com (#25918) | ||||
| + [googlepodcasts] Add support for podcasts.google.com | ||||
| + [iheart] Add support for iheart.com (#27037) | ||||
| * [acast] Clean podcast URLs | ||||
| * [stitcher] Clean podcast URLs | ||||
| + [xfileshare] Add support for aparat.cam (#27651) | ||||
| + [twitter] Add support for summary card (#25121) | ||||
| * [twitter] Try to use a Generic fallback for unknown twitter cards (#25982) | ||||
| + [stitcher] Add support for shows and show metadata extraction (#20510) | ||||
| * [stv] Improve episode id extraction (#23083) | ||||
|  | ||||
|  | ||||
| version 2021.01.03 | ||||
|  | ||||
| Extractors | ||||
| * [nrk] Improve series metadata extraction (#27473) | ||||
| + [nrk] Extract subtitles | ||||
| * [nrk] Fix age limit extraction | ||||
| * [nrk] Improve video id extraction | ||||
| + [nrk] Add support for podcasts (#27634, #27635) | ||||
| * [nrk] Generalize and delegate all item extractors to nrk | ||||
| + [nrk] Add support for mp3 formats | ||||
| * [nrktv] Switch to playback endpoint | ||||
| * [vvvvid] Fix season metadata extraction (#18130) | ||||
| * [stitcher] Fix extraction (#20811, #27606) | ||||
| * [acast] Fix extraction (#21444, #27612, #27613) | ||||
| + [arcpublishing] Add support for arcpublishing.com (#2298, #9340, #17200) | ||||
| + [sky] Add support for Sports News articles and Brighcove videos (#13054) | ||||
| + [vvvvid] Extract akamai formats | ||||
| * [vvvvid] Skip unplayable episodes (#27599) | ||||
| * [yandexvideo] Fix extraction for Python 3.4 | ||||
|  | ||||
|  | ||||
| version 2020.12.31 | ||||
|  | ||||
| Core | ||||
| * [utils] Accept only supported protocols in url_or_none | ||||
| * [YoutubeDL] Allow format filtering using audio language (#16209) | ||||
|  | ||||
| Extractors | ||||
| + [redditr] Extract all thumbnails (#27503) | ||||
| * [vvvvid] Improve info extraction | ||||
| + [vvvvid] Add support for playlists (#18130, #27574) | ||||
| + [yandexdisk] Extract info from webpage | ||||
| * [yandexdisk] Fix extraction (#17861, #27131) | ||||
| * [yandexvideo] Use old API call as fallback | ||||
| * [yandexvideo] Fix extraction (#25000) | ||||
| - [nbc] Remove CSNNE extractor | ||||
| * [nbc] Fix NBCSport VPlayer URL extraction (#16640) | ||||
| + [aenetworks] Add support for biography.com (#3863) | ||||
| * [uktvplay] Match new video URLs (#17909) | ||||
| * [sevenplay] Detect API errors | ||||
| * [tenplay] Fix format extraction (#26653) | ||||
| * [brightcove] Raise error for DRM protected videos (#23467, #27568) | ||||
|  | ||||
|  | ||||
| version 2020.12.29 | ||||
|  | ||||
| Extractors | ||||
| * [youtube] Improve yt initial data extraction (#27524) | ||||
| * [youtube:tab] Improve URL matching #27559) | ||||
| * [youtube:tab] Restore retry on browse requests (#27313, #27564) | ||||
| * [aparat] Fix extraction (#22285, #22611, #23348, #24354, #24591, #24904, | ||||
|   #25418, #26070, #26350, #26738, #27563) | ||||
| - [brightcove] Remove sonyliv specific code | ||||
| * [piksel] Improve format extraction | ||||
| + [zype] Add support for uplynk videos | ||||
| + [toggle] Add support for live.mewatch.sg (#27555) | ||||
| + [go] Add support for fxnow.fxnetworks.com (#13972, #22467, #23754, #26826) | ||||
| * [teachable] Improve embed detection (#26923) | ||||
| * [mitele] Fix free video extraction (#24624, #25827, #26757) | ||||
| * [telecinco] Fix extraction | ||||
| * [youtube] Update invidious.snopyta.org (#22667) | ||||
| * [amcnetworks] Improve auth only video detection (#27548) | ||||
| + [generic] Add support for VHX Embeds (#27546) | ||||
|  | ||||
|  | ||||
| version 2020.12.26 | ||||
|  | ||||
| Extractors | ||||
| * [instagram] Fix comment count extraction | ||||
| + [instagram] Add support for reel URLs (#26234, #26250) | ||||
| * [bbc] Switch to media selector v6 (#23232, #23933, #26303, #26432, #26821, | ||||
|   #27538) | ||||
| * [instagram] Improve thumbnail extraction | ||||
| * [instagram] Fix extraction when authenticated (#22880, #26377, #26981, | ||||
|   #27422) | ||||
| * [spankbang:playlist] Fix extraction (#24087) | ||||
| + [spankbang] Add support for playlist videos | ||||
| * [pornhub] Improve like and dislike count extraction (#27356) | ||||
| * [pornhub] Fix lq formats extraction (#27386, #27393) | ||||
| + [bongacams] Add support for bongacams.com (#27440) | ||||
| * [youtube:tab] Extend URL regular expression (#27501) | ||||
| * [theweatherchannel] Fix extraction (#25930, #26051) | ||||
| + [sprout] Add support for Universal Kids (#22518) | ||||
| * [theplatform] Allow passing geo bypass countries from other extractors | ||||
| + [wistia] Add support for playlists (#27533) | ||||
| + [ctv] Add support for ctv.ca (#27525) | ||||
| * [9c9media] Improve info extraction | ||||
| * [youtube] Fix automatic captions extraction (#27162, #27388) | ||||
| * [sonyliv] Fix title for movies | ||||
| * [sonyliv] Fix extraction (#25667) | ||||
| * [streetvoice] Fix extraction (#27455, #27492) | ||||
| + [facebook] Add support for watchparty pages (#27507) | ||||
| * [cbslocal] Fix video extraction | ||||
| + [brightcove] Add another method to extract policyKey | ||||
| * [mewatch] Relax URL regular expression (#27506) | ||||
|  | ||||
|  | ||||
| version 2020.12.22 | ||||
|  | ||||
| Core | ||||
| * [common] Remove unwanted query params from unsigned akamai manifest URLs | ||||
|  | ||||
| Extractors | ||||
| - [tastytrade] Remove extractor (#25716) | ||||
| * [niconico] Fix playlist extraction (#27428) | ||||
| - [everyonesmixtape] Remove extractor | ||||
| - [kanalplay] Remove extractor | ||||
| * [arkena] Fix extraction | ||||
| * [nba] Rewrite extractor | ||||
| * [turner] Improve info extraction | ||||
| * [youtube] Improve xsrf token extraction (#27442) | ||||
| * [generic] Improve RSS age limit extraction | ||||
| * [generic] Fix RSS itunes thumbnail extraction (#27405) | ||||
| + [redditr] Extract duration (#27426) | ||||
| - [zaq1] Remove extractor | ||||
| + [asiancrush] Add support for retrocrush.tv | ||||
| * [asiancrush] Fix extraction | ||||
| - [noco] Remove extractor (#10864) | ||||
| * [nfl] Fix extraction (#22245) | ||||
| * [skysports] Relax URL regular expression (#27435) | ||||
| + [tv5unis] Add support for tv5unis.ca (#22399, #24890) | ||||
| + [videomore] Add support for more.tv (#27088) | ||||
| + [yandexmusic] Add support for music.yandex.com (#27425) | ||||
| + [nhk:program] Add support for audio programs and program clips | ||||
| + [nhk] Add support for NHK video programs (#27230) | ||||
|  | ||||
|  | ||||
| version 2020.12.14 | ||||
|  | ||||
| Core | ||||
| * [extractor/common] Improve JSON-LD interaction statistic extraction (#23306) | ||||
| * [downloader/hls] Delegate manifests with media initialization to ffmpeg | ||||
| + [extractor/common] Document duration meta field for playlists | ||||
|  | ||||
| Extractors | ||||
| * [mdr] Bypass geo restriction | ||||
| * [mdr] Improve extraction (#24346, #26873) | ||||
| * [yandexmusic:album] Improve album title extraction (#27418) | ||||
| * [eporner] Fix view count extraction and make optional (#23306) | ||||
| + [eporner] Extend URL regular expression | ||||
| * [eporner] Fix hash extraction and extend _VALID_URL (#27396) | ||||
| * [slideslive] Use m3u8 entry protocol for m3u8 formats (#27400) | ||||
| * [twitcasting] Fix format extraction and improve info extraction (#24868) | ||||
| * [linuxacademy] Fix authentication and extraction (#21129, #26223, #27402) | ||||
| * [itv] Clean description from HTML tags (#27399) | ||||
| * [vlive] Sort live formats (#27404) | ||||
| * [hotstart] Fix and improve extraction | ||||
|     * Fix format extraction (#26690) | ||||
|     + Extract thumbnail URL (#16079, #20412) | ||||
|     + Add support for country specific playlist URLs (#23496) | ||||
|     * Select the last id in video URL (#26412) | ||||
| + [youtube] Add some invidious instances (#27373) | ||||
|  | ||||
|  | ||||
| version 2020.12.12 | ||||
|  | ||||
| Core | ||||
| * [YoutubeDL] Improve thumbnail filename deducing (#26010, #27244) | ||||
|  | ||||
| Extractors | ||||
| + [ruutu] Extract more metadata | ||||
| + [ruutu] Detect non-free videos (#21154) | ||||
| * [ruutu] Authenticate format URLs (#21031, #26782) | ||||
| + [ruutu] Add support for static.nelonenmedia.fi (#25412) | ||||
| + [ruutu] Extend URL regular expression (#24839) | ||||
| + [facebook] Add support archived live video URLs (#15859) | ||||
| * [wdr] Improve overall extraction | ||||
| + [wdr] Extend subtitles extraction (#22672, #22723) | ||||
| + [facebook] Add support for videos attached to Relay based story pages | ||||
|   (#10795) | ||||
| + [wdr:page] Add support for kinder.wdr.de (#27350) | ||||
| + [facebook] Add another regular expression for handleServerJS | ||||
| * [facebook] Fix embed page extraction | ||||
| + [facebook] Add support for Relay post pages (#26935) | ||||
| + [facebook] Add support for watch videos (#22795, #27062) | ||||
| + [facebook] Add support for group posts with multiple videos (#19131) | ||||
| * [itv] Fix series metadata extraction (#26897) | ||||
| - [itv] Remove old extraction method (#23177) | ||||
| * [facebook] Redirect mobile URLs to desktop URLs (#24831, #25624) | ||||
| + [facebook] Add support for Relay based pages (#26823) | ||||
| * [facebook] Try to reduce unnecessary tahoe requests | ||||
| - [facebook] Remove hardcoded Chrome User-Agent (#18974, #25411, #26958, | ||||
|   #27329) | ||||
| - [smotri] Remove extractor (#27358) | ||||
| - [beampro] Remove extractor (#17290, #22871, #23020, #23061, #26099) | ||||
|  | ||||
|  | ||||
| version 2020.12.09 | ||||
|  | ||||
| Core | ||||
| * [extractor/common] Fix inline HTML5 media tags processing (#27345) | ||||
|  | ||||
| Extractors | ||||
| * [youtube:tab] Improve identity token extraction (#27197) | ||||
| * [youtube:tab] Make click tracking params on continuation optional | ||||
| * [youtube:tab] Delegate inline playlists to tab-based playlists (27298) | ||||
| + [tubitv] Extract release year (#27317) | ||||
| * [amcnetworks] Fix free content extraction (#20354) | ||||
| + [lbry:channel] Add support for channels (#25584) | ||||
| + [lbry] Add support for short and embed URLs | ||||
| * [lbry] Fix channel metadata extraction | ||||
| + [telequebec] Add support for video.telequebec.tv (#27339) | ||||
| * [telequebec] Fix extraction (#25733, #26883) | ||||
| + [youtube:tab] Capture and output alerts (#27340) | ||||
| * [tvplay:home] Fix extraction (#21153) | ||||
| * [americastestkitchen] Fix Extraction and add support | ||||
|   for Cook's Country and Cook's Illustrated (#17234, #27322) | ||||
| + [slideslive] Add support for yoda service videos and extract subtitles | ||||
|   (#27323) | ||||
|  | ||||
|  | ||||
| version 2020.12.07 | ||||
|  | ||||
| Core | ||||
| * [extractor/common] Extract timestamp from Last-Modified header | ||||
| + [extractor/common] Add support for dl8-* media tags (#27283) | ||||
| * [extractor/common] Fix media type extraction for HTML5 media tags | ||||
|   in start/end form | ||||
|  | ||||
| Extractors | ||||
| * [aenetworks] Fix extraction (#23363, #23390, #26795, #26985) | ||||
|     * Fix Fastly format extraction | ||||
|     + Add support for play and watch subdomains | ||||
|     + Extract series metadata | ||||
| * [youtube] Improve youtu.be extraction in non-existing playlists (#27324) | ||||
| + [generic] Extract RSS video description, timestamp and itunes metadata | ||||
|   (#27177) | ||||
| * [nrk] Reduce the number of instalments and episodes requests | ||||
| * [nrk] Improve extraction | ||||
|     * Improve format extraction for old akamai formats | ||||
|     + Add is_live value to entry info dict | ||||
|     * Request instalments only when available | ||||
|     * Fix skole extraction | ||||
| + [peertube] Extract fps | ||||
| + [peertube] Recognize audio-only formats (#27295) | ||||
|  | ||||
|  | ||||
| version 2020.12.05 | ||||
|  | ||||
| Core | ||||
| @@ -23,7 +295,7 @@ version 2020.12.02 | ||||
|  | ||||
| Extractors | ||||
| + [tva] Add support for qub.ca (#27235) | ||||
| + [toggle] Detect DRM protected videos (closes #16479)(closes #20805) | ||||
| + [toggle] Detect DRM protected videos (#16479, #20805) | ||||
| + [toggle] Add support for new MeWatch URLs (#27256) | ||||
| * [youtube:tab] Extract channels only from channels tab (#27266) | ||||
| + [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030, | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| [](https://travis-ci.org/ytdl-org/youtube-dl) | ||||
| [](https://github.com/ytdl-org/youtube-dl/actions?query=workflow%3ACI) | ||||
|  | ||||
|  | ||||
| youtube-dl - download videos from youtube.com or other video platforms | ||||
|  | ||||
| @@ -371,7 +372,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo | ||||
|                                      out, youtube-dl will ask interactively. | ||||
|     -2, --twofactor TWOFACTOR        Two-factor authentication code | ||||
|     -n, --netrc                      Use .netrc authentication data | ||||
|     --video-password PASSWORD        Video password (vimeo, smotri, youku) | ||||
|     --video-password PASSWORD        Video password (vimeo, youku) | ||||
|  | ||||
| ## Adobe Pass Options: | ||||
|     --ap-mso MSO                     Adobe Pass multiple-system operator (TV | ||||
| @@ -677,6 +678,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends | ||||
|  - `container`: Name of the container format | ||||
|  - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) | ||||
|  - `format_id`: A short description of the format | ||||
|  - `language`: Language code | ||||
|  | ||||
| Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). | ||||
|  | ||||
| @@ -879,7 +881,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op | ||||
|  | ||||
| Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. | ||||
|  | ||||
| In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox). | ||||
| In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox). | ||||
|  | ||||
| Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. | ||||
|  | ||||
|   | ||||
| @@ -1,5 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar | ||||
| java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython" | ||||
| $HOME/jython/bin/jython -m pip install nose | ||||
							
								
								
									
										17
									
								
								devscripts/run_tests.bat
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								devscripts/run_tests.bat
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,17 @@ | ||||
| @echo off | ||||
|  | ||||
| rem Keep this list in sync with the `offlinetest` target in Makefile | ||||
| set DOWNLOAD_TESTS="age_restriction^|download^|iqiyi_sdk_interpreter^|socks^|subtitles^|write_annotations^|youtube_lists^|youtube_signature" | ||||
|  | ||||
| if "%YTDL_TEST_SET%" == "core" ( | ||||
|     set test_set="-I test_("%DOWNLOAD_TESTS%")\.py" | ||||
|     set multiprocess_args="" | ||||
| ) else if "%YTDL_TEST_SET%" == "download" ( | ||||
|     set test_set="-I test_(?!"%DOWNLOAD_TESTS%").+\.py" | ||||
|     set multiprocess_args="--processes=4 --process-timeout=540" | ||||
| ) else ( | ||||
|     echo YTDL_TEST_SET is not set or invalid | ||||
|     exit /b 1 | ||||
| ) | ||||
|  | ||||
| nosetests test --verbose %test_set:"=% %multiprocess_args:"=% | ||||
| @@ -35,6 +35,8 @@ | ||||
|  - **adobetv:video** | ||||
|  - **AdultSwim** | ||||
|  - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault | ||||
|  - **aenetworks:collection** | ||||
|  - **aenetworks:show** | ||||
|  - **afreecatv**: afreecatv.com | ||||
|  - **AirMozilla** | ||||
|  - **AliExpressLive** | ||||
| @@ -52,9 +54,11 @@ | ||||
|  - **Aparat** | ||||
|  - **AppleConnect** | ||||
|  - **AppleDaily**: 臺灣蘋果日報 | ||||
|  - **ApplePodcasts** | ||||
|  - **appletrailers** | ||||
|  - **appletrailers:section** | ||||
|  - **archive.org**: archive.org videos | ||||
|  - **ArcPublishing** | ||||
|  - **ARD** | ||||
|  - **ARD:mediathek** | ||||
|  - **ARDBetaMediathek** | ||||
| @@ -95,6 +99,10 @@ | ||||
|  - **BellMedia** | ||||
|  - **Bet** | ||||
|  - **bfi:player** | ||||
|  - **bfmtv** | ||||
|  - **bfmtv:article** | ||||
|  - **bfmtv:live** | ||||
|  - **BibelTV** | ||||
|  - **Bigflix** | ||||
|  - **Bild**: Bild.de | ||||
|  - **BiliBili** | ||||
| @@ -102,6 +110,7 @@ | ||||
|  - **BilibiliAudioAlbum** | ||||
|  - **BiliBiliPlayer** | ||||
|  - **BioBioChileTV** | ||||
|  - **Biography** | ||||
|  - **BIQLE** | ||||
|  - **BitChute** | ||||
|  - **BitChuteChannel** | ||||
| @@ -110,6 +119,7 @@ | ||||
|  - **blinkx** | ||||
|  - **Bloomberg** | ||||
|  - **BokeCC** | ||||
|  - **BongaCams** | ||||
|  - **BostonGlobe** | ||||
|  - **Box** | ||||
|  - **Bpb**: Bundeszentrale für politische Bildung | ||||
| @@ -144,6 +154,7 @@ | ||||
|  - **CBS** | ||||
|  - **CBSInteractive** | ||||
|  - **CBSLocal** | ||||
|  - **CBSLocalArticle** | ||||
|  - **cbsnews**: CBS News | ||||
|  - **cbsnews:embed** | ||||
|  - **cbsnews:livevideo**: CBS News Live Videos | ||||
| @@ -193,9 +204,9 @@ | ||||
|  - **CrooksAndLiars** | ||||
|  - **crunchyroll** | ||||
|  - **crunchyroll:playlist** | ||||
|  - **CSNNE** | ||||
|  - **CSpan**: C-SPAN | ||||
|  - **CtsNews**: 華視新聞 | ||||
|  - **CTV** | ||||
|  - **CTVNews** | ||||
|  - **cu.ntv.co.jp**: Nippon Television Network | ||||
|  - **Culturebox** | ||||
| @@ -266,7 +277,6 @@ | ||||
|  - **ESPNArticle** | ||||
|  - **EsriVideo** | ||||
|  - **Europa** | ||||
|  - **EveryonesMixtape** | ||||
|  - **EWETV** | ||||
|  - **ExpoTV** | ||||
|  - **Expressen** | ||||
| @@ -313,7 +323,6 @@ | ||||
|  - **Funk** | ||||
|  - **Fusion** | ||||
|  - **Fux** | ||||
|  - **FXNetworks** | ||||
|  - **Gaia** | ||||
|  - **GameInformer** | ||||
|  - **GameSpot** | ||||
| @@ -332,6 +341,8 @@ | ||||
|  - **Go** | ||||
|  - **GodTube** | ||||
|  - **Golem** | ||||
|  - **google:podcasts** | ||||
|  - **google:podcasts:feed** | ||||
|  - **GoogleDrive** | ||||
|  - **Goshgay** | ||||
|  - **GPUTechConf** | ||||
| @@ -346,6 +357,7 @@ | ||||
|  - **hgtv.com:show** | ||||
|  - **HiDive** | ||||
|  - **HistoricFilms** | ||||
|  - **history:player** | ||||
|  - **history:topic**: History.com Topic | ||||
|  - **hitbox** | ||||
|  - **hitbox:live** | ||||
| @@ -365,6 +377,8 @@ | ||||
|  - **HungamaSong** | ||||
|  - **Hypem** | ||||
|  - **ign.com** | ||||
|  - **IHeartRadio** | ||||
|  - **iheartradio:podcast** | ||||
|  - **imdb**: Internet Movie Database trailers | ||||
|  - **imdb:list**: Internet Movie Database lists | ||||
|  - **Imgur** | ||||
| @@ -398,7 +412,6 @@ | ||||
|  - **JWPlatform** | ||||
|  - **Kakao** | ||||
|  - **Kaltura** | ||||
|  - **KanalPlay**: Kanal 5/9/11 Play | ||||
|  - **Kankan** | ||||
|  - **Karaoketv** | ||||
|  - **KarriereVideos** | ||||
| @@ -422,7 +435,8 @@ | ||||
|  - **la7.it** | ||||
|  - **laola1tv** | ||||
|  - **laola1tv:embed** | ||||
|  - **lbry.tv** | ||||
|  - **lbry** | ||||
|  - **lbry:channel** | ||||
|  - **LCI** | ||||
|  - **Lcp** | ||||
|  - **LcpPlay** | ||||
| @@ -498,8 +512,6 @@ | ||||
|  - **mixcloud** | ||||
|  - **mixcloud:playlist** | ||||
|  - **mixcloud:user** | ||||
|  - **Mixer:live** | ||||
|  - **Mixer:vod** | ||||
|  - **MLB** | ||||
|  - **Mnet** | ||||
|  - **MNetTV** | ||||
| @@ -540,6 +552,11 @@ | ||||
|  - **NationalGeographicTV** | ||||
|  - **Naver** | ||||
|  - **NBA** | ||||
|  - **nba:watch** | ||||
|  - **nba:watch:collection** | ||||
|  - **NBAChannel** | ||||
|  - **NBAEmbed** | ||||
|  - **NBAWatchEmbed** | ||||
|  - **NBC** | ||||
|  - **NBCNews** | ||||
|  - **nbcolympics** | ||||
| @@ -569,8 +586,10 @@ | ||||
|  - **NextTV**: 壹電視 | ||||
|  - **Nexx** | ||||
|  - **NexxEmbed** | ||||
|  - **nfl.com** | ||||
|  - **nfl.com** (Currently broken) | ||||
|  - **nfl.com:article** (Currently broken) | ||||
|  - **NhkVod** | ||||
|  - **NhkVodProgram** | ||||
|  - **nhl.com** | ||||
|  - **nick.com** | ||||
|  - **nick.de** | ||||
| @@ -584,7 +603,6 @@ | ||||
|  - **njoy:embed** | ||||
|  - **NJPWWorld**: 新日本プロレスワールド | ||||
|  - **NobelPrize** | ||||
|  - **Noco** | ||||
|  - **NonkTube** | ||||
|  - **Noovo** | ||||
|  - **Normalboots** | ||||
| @@ -602,6 +620,7 @@ | ||||
|  - **Npr** | ||||
|  - **NRK** | ||||
|  - **NRKPlaylist** | ||||
|  - **NRKRadioPodkast** | ||||
|  - **NRKSkole**: NRK Skole | ||||
|  - **NRKTV**: NRK TV and NRK Radio | ||||
|  - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte | ||||
| @@ -681,7 +700,6 @@ | ||||
|  - **Playwire** | ||||
|  - **pluralsight** | ||||
|  - **pluralsight:course** | ||||
|  - **plus.google**: Google Plus | ||||
|  - **podomatic** | ||||
|  - **Pokemon** | ||||
|  - **PolskieRadio** | ||||
| @@ -805,19 +823,16 @@ | ||||
|  - **ShowRoomLive** | ||||
|  - **Sina** | ||||
|  - **sky.it** | ||||
|  - **sky:news** | ||||
|  - **sky:sports** | ||||
|  - **sky:sports:news** | ||||
|  - **skyacademy.it** | ||||
|  - **SkylineWebcams** | ||||
|  - **SkyNews** | ||||
|  - **skynewsarabia:article** | ||||
|  - **skynewsarabia:video** | ||||
|  - **SkySports** | ||||
|  - **Slideshare** | ||||
|  - **SlidesLive** | ||||
|  - **Slutload** | ||||
|  - **smotri**: Smotri.com | ||||
|  - **smotri:broadcast**: Smotri.com broadcasts | ||||
|  - **smotri:community**: Smotri.com community videos | ||||
|  - **smotri:user**: Smotri.com user videos | ||||
|  - **Snotr** | ||||
|  - **Sohu** | ||||
|  - **SonyLIV** | ||||
| @@ -855,6 +870,7 @@ | ||||
|  - **stanfordoc**: Stanford Open ClassRoom | ||||
|  - **Steam** | ||||
|  - **Stitcher** | ||||
|  - **StitcherShow** | ||||
|  - **Streamable** | ||||
|  - **streamcloud.eu** | ||||
|  - **StreamCZ** | ||||
| @@ -875,7 +891,6 @@ | ||||
|  - **Tagesschau** | ||||
|  - **tagesschau:player** | ||||
|  - **Tass** | ||||
|  - **TastyTrade** | ||||
|  - **TBS** | ||||
|  - **TDSLifeway** | ||||
|  - **Teachable** | ||||
| @@ -898,6 +913,7 @@ | ||||
|  - **TeleQuebecEmission** | ||||
|  - **TeleQuebecLive** | ||||
|  - **TeleQuebecSquat** | ||||
|  - **TeleQuebecVideo** | ||||
|  - **TeleTask** | ||||
|  - **Telewebion** | ||||
|  - **TennisTV** | ||||
| @@ -948,6 +964,8 @@ | ||||
|  - **TV2DKBornholmPlay** | ||||
|  - **TV4**: tv4.se and tv4play.se | ||||
|  - **TV5MondePlus**: TV5MONDE+ | ||||
|  - **tv5unis** | ||||
|  - **tv5unis:video** | ||||
|  - **tv8.it** | ||||
|  - **TVA** | ||||
|  - **TVANouvelles** | ||||
| @@ -1083,6 +1101,7 @@ | ||||
|  - **vube**: Vube.com | ||||
|  - **VuClip** | ||||
|  - **VVVVID** | ||||
|  - **VVVVIDShow** | ||||
|  - **VyboryMos** | ||||
|  - **Vzaar** | ||||
|  - **Wakanim** | ||||
| @@ -1105,6 +1124,7 @@ | ||||
|  - **WeiboMobile** | ||||
|  - **WeiqiTV**: WQTV | ||||
|  - **Wistia** | ||||
|  - **WistiaPlaylist** | ||||
|  - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl | ||||
|  - **WorldStarHipHop** | ||||
|  - **WSJ**: Wall Street Journal | ||||
| @@ -1112,7 +1132,7 @@ | ||||
|  - **WWE** | ||||
|  - **XBef** | ||||
|  - **XboxClips** | ||||
|  - **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing | ||||
|  - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing | ||||
|  - **XHamster** | ||||
|  - **XHamsterEmbed** | ||||
|  - **XHamsterUser** | ||||
| @@ -1164,9 +1184,9 @@ | ||||
|  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) | ||||
|  - **youtube:tab**: YouTube.com tab | ||||
|  - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) | ||||
|  - **YoutubeYtBe** | ||||
|  - **YoutubeYtUser** | ||||
|  - **Zapiks** | ||||
|  - **Zaq1** | ||||
|  - **Zattoo** | ||||
|  - **ZattooLive** | ||||
|  - **ZDF** | ||||
|   | ||||
| @@ -98,6 +98,55 @@ class TestInfoExtractor(unittest.TestCase): | ||||
|         self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) | ||||
|         self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) | ||||
|  | ||||
|     def test_search_json_ld_realworld(self): | ||||
|         # https://github.com/ytdl-org/youtube-dl/issues/23306 | ||||
|         expect_dict( | ||||
|             self, | ||||
|             self.ie._search_json_ld(r'''<script type="application/ld+json"> | ||||
| { | ||||
| "@context": "http://schema.org/", | ||||
| "@type": "VideoObject", | ||||
| "name": "1 On 1 With Kleio", | ||||
| "url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/", | ||||
| "duration": "PT0H12M23S", | ||||
| "thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"], | ||||
| "contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4", | ||||
| "embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/", | ||||
| "image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", | ||||
| "width": "1920", | ||||
| "height": "1080", | ||||
| "encodingFormat": "mp4", | ||||
| "bitrate": "6617kbps", | ||||
| "isFamilyFriendly": "False", | ||||
| "description": "Kleio Valentien", | ||||
| "uploadDate": "2015-12-05T21:24:35+01:00", | ||||
| "interactionStatistic": { | ||||
| "@type": "InteractionCounter", | ||||
| "interactionType": { "@type": "http://schema.org/WatchAction" }, | ||||
| "userInteractionCount": 1120958 | ||||
| }, "aggregateRating": { | ||||
| "@type": "AggregateRating", | ||||
| "ratingValue": "88", | ||||
| "ratingCount": "630", | ||||
| "bestRating": "100", | ||||
| "worstRating": "0" | ||||
| }, "actor": [{ | ||||
| "@type": "Person", | ||||
| "name": "Kleio Valentien", | ||||
| "url": "https://www.eporner.com/pornstar/kleio-valentien/" | ||||
| }]} | ||||
| </script>''', None), | ||||
|             { | ||||
|                 'title': '1 On 1 With Kleio', | ||||
|                 'description': 'Kleio Valentien', | ||||
|                 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', | ||||
|                 'timestamp': 1449347075, | ||||
|                 'duration': 743.0, | ||||
|                 'view_count': 1120958, | ||||
|                 'width': 1920, | ||||
|                 'height': 1080, | ||||
|             }) | ||||
|  | ||||
|     def test_download_json(self): | ||||
|         uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') | ||||
|         self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) | ||||
| @@ -108,6 +157,18 @@ class TestInfoExtractor(unittest.TestCase): | ||||
|         self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) | ||||
|  | ||||
|     def test_parse_html5_media_entries(self): | ||||
|         # inline video tag | ||||
|         expect_dict( | ||||
|             self, | ||||
|             self.ie._parse_html5_media_entries( | ||||
|                 'https://127.0.0.1/video.html', | ||||
|                 r'<html><video src="/vid.mp4" /></html>', None)[0], | ||||
|             { | ||||
|                 'formats': [{ | ||||
|                     'url': 'https://127.0.0.1/vid.mp4', | ||||
|                 }], | ||||
|             }) | ||||
|  | ||||
|         # from https://www.r18.com/ | ||||
|         # with kpbs in label | ||||
|         expect_dict( | ||||
|   | ||||
| @@ -464,6 +464,7 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         assert_syntax_error('+bestaudio') | ||||
|         assert_syntax_error('bestvideo+') | ||||
|         assert_syntax_error('/') | ||||
|         assert_syntax_error('bestvideo+bestvideo+bestaudio') | ||||
|  | ||||
|     def test_format_filtering(self): | ||||
|         formats = [ | ||||
|   | ||||
| @@ -36,7 +36,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q')  # 585 | ||||
|         assertPlaylist('PL63F0C78739B09958') | ||||
|         assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') | ||||
|         assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') | ||||
|         assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') | ||||
|         assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')  # 668 | ||||
|         self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) | ||||
| @@ -57,8 +57,8 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') | ||||
|         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') | ||||
|  | ||||
|     # def test_youtube_user_matching(self): | ||||
|     #     self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab']) | ||||
|     def test_youtube_user_matching(self): | ||||
|         self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab']) | ||||
|  | ||||
|     def test_youtube_feeds(self): | ||||
|         self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab']) | ||||
|   | ||||
| @@ -258,16 +258,24 @@ class TestNRKSubtitles(BaseTestSubtitles): | ||||
|  | ||||
|  | ||||
| class TestRaiPlaySubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html' | ||||
|     IE = RaiPlayIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|     def test_subtitles_key(self): | ||||
|         self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['it'])) | ||||
|         self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') | ||||
|  | ||||
|     def test_subtitles_array_key(self): | ||||
|         self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['it'])) | ||||
|         self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd') | ||||
|  | ||||
|  | ||||
| class TestVikiSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.viki.com/videos/1060846v-punch-episode-18' | ||||
|   | ||||
| @@ -21,6 +21,7 @@ from youtube_dl.utils import ( | ||||
|     encode_base_n, | ||||
|     caesar, | ||||
|     clean_html, | ||||
|     clean_podcast_url, | ||||
|     date_from_str, | ||||
|     DateRange, | ||||
|     detect_exe_version, | ||||
| @@ -554,6 +555,11 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(url_or_none('http$://foo.de'), None) | ||||
|         self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') | ||||
|         self.assertEqual(url_or_none('//foo.de'), '//foo.de') | ||||
|         self.assertEqual(url_or_none('s3://foo.de'), None) | ||||
|         self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de') | ||||
|         self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de') | ||||
|         self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de') | ||||
|         self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de') | ||||
|  | ||||
|     def test_parse_age_limit(self): | ||||
|         self.assertEqual(parse_age_limit(None), None) | ||||
| @@ -1465,6 +1471,10 @@ Line 1 | ||||
|         self.assertEqual(get_elements_by_attribute('class', 'foo', html), []) | ||||
|         self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) | ||||
|  | ||||
|     def test_clean_podcast_url(self): | ||||
|         self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') | ||||
|         self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -1083,7 +1083,7 @@ class YoutubeDL(object): | ||||
|                 '*=': lambda attr, value: value in attr, | ||||
|             } | ||||
|             str_operator_rex = re.compile(r'''(?x) | ||||
|                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id) | ||||
|                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language) | ||||
|                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)? | ||||
|                 \s*(?P<value>[a-zA-Z0-9._-]+) | ||||
|                 \s*$ | ||||
| @@ -1226,6 +1226,8 @@ class YoutubeDL(object): | ||||
|                         group = _parse_format_selection(tokens, inside_group=True) | ||||
|                         current_selector = FormatSelector(GROUP, group, []) | ||||
|                     elif string == '+': | ||||
|                         if inside_merge: | ||||
|                             raise syntax_error('Unexpected "+"', start) | ||||
|                         video_selector = current_selector | ||||
|                         audio_selector = _parse_format_selection(tokens, inside_merge=True) | ||||
|                         if not video_selector or not audio_selector: | ||||
| @@ -1777,6 +1779,8 @@ class YoutubeDL(object): | ||||
|                     os.makedirs(dn) | ||||
|                 return True | ||||
|             except (OSError, IOError) as err: | ||||
|                 if isinstance(err, OSError) and err.errno == errno.EEXIST: | ||||
|                     return True | ||||
|                 self.report_error('unable to create directory ' + error_to_compat_str(err)) | ||||
|                 return False | ||||
|  | ||||
| @@ -2410,7 +2414,7 @@ class YoutubeDL(object): | ||||
|             thumb_ext = determine_ext(t['url'], 'jpg') | ||||
|             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' | ||||
|             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' | ||||
|             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext | ||||
|             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext')) | ||||
|  | ||||
|             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): | ||||
|                 self.to_screen('[%s] %s: Thumbnail %sis already present' % | ||||
|   | ||||
| @@ -42,11 +42,13 @@ class HlsFD(FragmentFD): | ||||
|             # no segments will definitely be appended to the end of the playlist. | ||||
|             # r'#EXT-X-PLAYLIST-TYPE:EVENT',  # media segments may be appended to the end of | ||||
|             #                                 # event media playlists [4] | ||||
|             r'#EXT-X-MAP:',  # media initialization [5] | ||||
|  | ||||
|             # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 | ||||
|             # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 | ||||
|             # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | ||||
|             # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 | ||||
|             # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 | ||||
|         ) | ||||
|         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] | ||||
|         is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest | ||||
| @@ -170,6 +172,10 @@ class HlsFD(FragmentFD): | ||||
|                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) | ||||
|                         decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( | ||||
|                             self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() | ||||
|                         # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block | ||||
|                         # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, | ||||
|                         # not what it decrypts to. | ||||
|                         if not test: | ||||
|                             frag_content = AES.new( | ||||
|                                 decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) | ||||
|                     self._append_fragment(ctx, frag_content) | ||||
|   | ||||
| @@ -2,21 +2,48 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import functools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     float_or_none, | ||||
|     clean_podcast_url, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     OnDemandPagedList, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ACastIE(InfoExtractor): | ||||
| class ACastBaseIE(InfoExtractor): | ||||
|     def _extract_episode(self, episode, show_info): | ||||
|         title = episode['title'] | ||||
|         info = { | ||||
|             'id': episode['id'], | ||||
|             'display_id': episode.get('episodeUrl'), | ||||
|             'url': clean_podcast_url(episode['url']), | ||||
|             'title': title, | ||||
|             'description': clean_html(episode.get('description') or episode.get('summary')), | ||||
|             'thumbnail': episode.get('image'), | ||||
|             'timestamp': parse_iso8601(episode.get('publishDate')), | ||||
|             'duration': int_or_none(episode.get('duration')), | ||||
|             'filesize': int_or_none(episode.get('contentLength')), | ||||
|             'season_number': int_or_none(episode.get('season')), | ||||
|             'episode': title, | ||||
|             'episode_number': int_or_none(episode.get('episode')), | ||||
|         } | ||||
|         info.update(show_info) | ||||
|         return info | ||||
|  | ||||
|     def _extract_show_info(self, show): | ||||
|         return { | ||||
|             'creator': show.get('author'), | ||||
|             'series': show.get('title'), | ||||
|         } | ||||
|  | ||||
|     def _call_api(self, path, video_id, query=None): | ||||
|         return self._download_json( | ||||
|             'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query) | ||||
|  | ||||
|  | ||||
| class ACastIE(ACastBaseIE): | ||||
|     IE_NAME = 'acast' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
| @@ -28,15 +55,15 @@ class ACastIE(InfoExtractor): | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', | ||||
|         'md5': '16d936099ec5ca2d5869e3a813ee8dc4', | ||||
|         'md5': 'f5598f3ad1e4776fed12ec1407153e4b', | ||||
|         'info_dict': { | ||||
|             'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', | ||||
|             'ext': 'mp3', | ||||
|             'title': '2. Raggarmordet - Röster ur det förflutna', | ||||
|             'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', | ||||
|             'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67', | ||||
|             'timestamp': 1477346700, | ||||
|             'upload_date': '20161024', | ||||
|             'duration': 2766.602563, | ||||
|             'duration': 2766, | ||||
|             'creator': 'Anton Berg & Martin Johnson', | ||||
|             'series': 'Spår', | ||||
|             'episode': '2. Raggarmordet - Röster ur det förflutna', | ||||
| @@ -45,7 +72,7 @@ class ACastIE(InfoExtractor): | ||||
|         'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', | ||||
|         'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', | ||||
| @@ -54,40 +81,14 @@ class ACastIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         s = self._download_json( | ||||
|             'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id), | ||||
|             display_id) | ||||
|         media_url = s['url'] | ||||
|         if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id): | ||||
|             episode_url = s.get('episodeUrl') | ||||
|             if episode_url: | ||||
|                 display_id = episode_url | ||||
|             else: | ||||
|                 channel, display_id = re.match(self._VALID_URL, s['link']).groups() | ||||
|         cast_data = self._download_json( | ||||
|             'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), | ||||
|             display_id)['result'] | ||||
|         e = cast_data['episode'] | ||||
|         title = e.get('name') or s['title'] | ||||
|         return { | ||||
|             'id': compat_str(e['id']), | ||||
|             'display_id': display_id, | ||||
|             'url': media_url, | ||||
|             'title': title, | ||||
|             'description': e.get('summary') or clean_html(e.get('description') or s.get('description')), | ||||
|             'thumbnail': e.get('image'), | ||||
|             'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')), | ||||
|             'duration': float_or_none(e.get('duration') or s.get('duration')), | ||||
|             'filesize': int_or_none(e.get('contentLength')), | ||||
|             'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), | ||||
|             'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), | ||||
|             'season_number': int_or_none(e.get('seasonNumber')), | ||||
|             'episode': title, | ||||
|             'episode_number': int_or_none(e.get('episodeNumber')), | ||||
|         } | ||||
|         episode = self._call_api( | ||||
|             '%s/episodes/%s' % (channel, display_id), | ||||
|             display_id, {'showInfo': 'true'}) | ||||
|         return self._extract_episode( | ||||
|             episode, self._extract_show_info(episode.get('show') or {})) | ||||
|  | ||||
|  | ||||
| class ACastChannelIE(InfoExtractor): | ||||
| class ACastChannelIE(ACastBaseIE): | ||||
|     IE_NAME = 'acast:channel' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
| @@ -102,34 +103,24 @@ class ACastChannelIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': '4efc5294-5385-4847-98bd-519799ce5786', | ||||
|             'title': 'Today in Focus', | ||||
|             'description': 'md5:9ba5564de5ce897faeb12963f4537a64', | ||||
|             'description': 'md5:c09ce28c91002ce4ffce71d6504abaae', | ||||
|         }, | ||||
|         'playlist_mincount': 35, | ||||
|         'playlist_mincount': 200, | ||||
|     }, { | ||||
|         'url': 'http://play.acast.com/s/ft-banking-weekly', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _API_BASE_URL = 'https://play.acast.com/api/' | ||||
|     _PAGE_SIZE = 10 | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) | ||||
|  | ||||
|     def _fetch_page(self, channel_slug, page): | ||||
|         casts = self._download_json( | ||||
|             self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page), | ||||
|             channel_slug, note='Download page %d of channel data' % page) | ||||
|         for cast in casts: | ||||
|             yield self.url_result( | ||||
|                 'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']), | ||||
|                 'ACast', cast['id']) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_slug = self._match_id(url) | ||||
|         channel_data = self._download_json( | ||||
|             self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug) | ||||
|         entries = OnDemandPagedList(functools.partial( | ||||
|             self._fetch_page, channel_slug), self._PAGE_SIZE) | ||||
|         return self.playlist_result(entries, compat_str( | ||||
|             channel_data['id']), channel_data['name'], channel_data.get('description')) | ||||
|         show_slug = self._match_id(url) | ||||
|         show = self._call_api(show_slug, show_slug) | ||||
|         show_info = self._extract_show_info(show) | ||||
|         entries = [] | ||||
|         for episode in (show.get('episodes') or []): | ||||
|             entries.append(self._extract_episode(episode, show_info)) | ||||
|         return self.playlist_result( | ||||
|             entries, show.get('id'), show.get('title'), show.get('description')) | ||||
|   | ||||
| @@ -10,6 +10,7 @@ import random | ||||
| from .common import InfoExtractor | ||||
| from ..aes import aes_cbc_decrypt | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_b64decode, | ||||
|     compat_ord, | ||||
| ) | ||||
| @@ -18,11 +19,13 @@ from ..utils import ( | ||||
|     bytes_to_long, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     intlist_to_bytes, | ||||
|     long_to_bytes, | ||||
|     pkcs1pad, | ||||
|     strip_or_none, | ||||
|     urljoin, | ||||
|     try_get, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -31,16 +34,27 @@ class ADNIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', | ||||
|         'md5': 'e497370d847fd79d9d4c74be55575c7a', | ||||
|         'md5': '0319c99885ff5547565cacb4f3f9348d', | ||||
|         'info_dict': { | ||||
|             'id': '7778', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Blue Exorcist - Kyôto Saga - Épisode 1', | ||||
|             'title': 'Blue Exorcist - Kyôto Saga - Episode 1', | ||||
|             'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', | ||||
|             'series': 'Blue Exorcist - Kyôto Saga', | ||||
|             'duration': 1467, | ||||
|             'release_date': '20170106', | ||||
|             'comment_count': int, | ||||
|             'average_rating': float, | ||||
|             'season_number': 2, | ||||
|             'episode': 'Début des hostilités', | ||||
|             'episode_number': 1, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     _BASE_URL = 'http://animedigitalnetwork.fr' | ||||
|     _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537) | ||||
|     _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/' | ||||
|     _PLAYER_BASE_URL = _API_BASE_URL + 'player/' | ||||
|     _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537) | ||||
|     _POS_ALIGN_MAP = { | ||||
|         'start': 1, | ||||
|         'end': 3, | ||||
| @@ -54,26 +68,24 @@ class ADNIE(InfoExtractor): | ||||
|     def _ass_subtitles_timecode(seconds): | ||||
|         return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100) | ||||
|  | ||||
|     def _get_subtitles(self, sub_path, video_id): | ||||
|         if not sub_path: | ||||
|     def _get_subtitles(self, sub_url, video_id): | ||||
|         if not sub_url: | ||||
|             return None | ||||
|  | ||||
|         enc_subtitles = self._download_webpage( | ||||
|             urljoin(self._BASE_URL, sub_path), | ||||
|             video_id, 'Downloading subtitles location', fatal=False) or '{}' | ||||
|             sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}' | ||||
|         subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location') | ||||
|         if subtitle_location: | ||||
|             enc_subtitles = self._download_webpage( | ||||
|                 urljoin(self._BASE_URL, subtitle_location), | ||||
|                 video_id, 'Downloading subtitles data', fatal=False, | ||||
|                 headers={'Origin': 'https://animedigitalnetwork.fr'}) | ||||
|                 subtitle_location, video_id, 'Downloading subtitles data', | ||||
|                 fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'}) | ||||
|         if not enc_subtitles: | ||||
|             return None | ||||
|  | ||||
|         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js | ||||
|         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( | ||||
|             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), | ||||
|             bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')), | ||||
|             bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')), | ||||
|             bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) | ||||
|         )) | ||||
|         subtitles_json = self._parse_json( | ||||
| @@ -119,59 +131,76 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         player_config = self._parse_json(self._search_regex( | ||||
|             r'playerConfig\s*=\s*({.+});', webpage, | ||||
|             'player config', default='{}'), video_id, fatal=False) | ||||
|         if not player_config: | ||||
|             config_url = urljoin(self._BASE_URL, self._search_regex( | ||||
|                 r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"', | ||||
|                 webpage, 'config url')) | ||||
|             player_config = self._download_json( | ||||
|                 config_url, video_id, | ||||
|         video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id | ||||
|         player = self._download_json( | ||||
|             video_base_url + 'configuration', video_id, | ||||
|             'Downloading player config JSON metadata')['player'] | ||||
|         options = player['options'] | ||||
|  | ||||
|         video_info = {} | ||||
|         video_info_str = self._search_regex( | ||||
|             r'videoInfo\s*=\s*({.+});', webpage, | ||||
|             'video info', fatal=False) | ||||
|         if video_info_str: | ||||
|             video_info = self._parse_json( | ||||
|                 video_info_str, video_id, fatal=False) or {} | ||||
|         user = options['user'] | ||||
|         if not user.get('hasAccess'): | ||||
|             raise ExtractorError( | ||||
|                 'This video is only available for paying users', expected=True) | ||||
|             # self.raise_login_required() # FIXME: Login is not implemented | ||||
|  | ||||
|         options = player_config.get('options') or {} | ||||
|         metas = options.get('metas') or {} | ||||
|         links = player_config.get('links') or {} | ||||
|         sub_path = player_config.get('subtitles') | ||||
|         error = None | ||||
|         if not links: | ||||
|             links_url = player_config.get('linksurl') or options['videoUrl'] | ||||
|             token = options['token'] | ||||
|         token = self._download_json( | ||||
|             user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'), | ||||
|             video_id, 'Downloading access token', headers={ | ||||
|                 'x-player-refresh-token': user['refreshToken'] | ||||
|             }, data=b'')['token'] | ||||
|  | ||||
|         links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') | ||||
|         self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) | ||||
|         message = bytes_to_intlist(json.dumps({ | ||||
|             'k': self._K, | ||||
|                 'e': 60, | ||||
|             't': token, | ||||
|         })) | ||||
|  | ||||
|         # Sometimes authentication fails for no good reason, retry with | ||||
|         # a different random padding | ||||
|         links_data = None | ||||
|         for _ in range(3): | ||||
|             padded_message = intlist_to_bytes(pkcs1pad(message, 128)) | ||||
|             n, e = self._RSA_KEY | ||||
|             encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) | ||||
|             authorization = base64.b64encode(encrypted_message).decode() | ||||
|  | ||||
|             try: | ||||
|                 links_data = self._download_json( | ||||
|                 urljoin(self._BASE_URL, links_url), video_id, | ||||
|                 'Downloading links JSON metadata', headers={ | ||||
|                     'Authorization': 'Bearer ' + authorization, | ||||
|                     links_url, video_id, 'Downloading links JSON metadata', headers={ | ||||
|                         'X-Player-Token': authorization | ||||
|                     }, query={ | ||||
|                         'freeWithAds': 'true', | ||||
|                         'adaptive': 'false', | ||||
|                         'withMetadata': 'true', | ||||
|                         'source': 'Web' | ||||
|                     }) | ||||
|                 break | ||||
|             except ExtractorError as e: | ||||
|                 if not isinstance(e.cause, compat_HTTPError): | ||||
|                     raise e | ||||
|  | ||||
|                 if e.cause.code == 401: | ||||
|                     # This usually goes away with a different random pkcs1pad, so retry | ||||
|                     continue | ||||
|  | ||||
|                 error = self._parse_json(e.cause.read(), video_id) | ||||
|                 message = error.get('message') | ||||
|                 if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': | ||||
|                     self.raise_geo_restricted(msg=message) | ||||
|                 else: | ||||
|                     raise ExtractorError(message) | ||||
|         else: | ||||
|             raise ExtractorError('Giving up retrying') | ||||
|  | ||||
|         links = links_data.get('links') or {} | ||||
|             metas = metas or links_data.get('meta') or {} | ||||
|             sub_path = sub_path or links_data.get('subtitles') or \ | ||||
|                 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id | ||||
|             sub_path += '&token=' + token | ||||
|             error = links_data.get('error') | ||||
|         title = metas.get('title') or video_info['title'] | ||||
|         metas = links_data.get('metadata') or {} | ||||
|         sub_url = (links.get('subtitles') or {}).get('all') | ||||
|         video_info = links_data.get('video') or {} | ||||
|         title = metas['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, qualities in links.items(): | ||||
|         for format_id, qualities in (links.get('streaming') or {}).items(): | ||||
|             if not isinstance(qualities, dict): | ||||
|                 continue | ||||
|             for quality, load_balancer_url in qualities.items(): | ||||
| @@ -189,19 +218,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | ||||
|                     for f in m3u8_formats: | ||||
|                         f['language'] = 'fr' | ||||
|                 formats.extend(m3u8_formats) | ||||
|         if not error: | ||||
|             error = options.get('error') | ||||
|         if not formats and error: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video = (self._download_json( | ||||
|             self._API_BASE_URL + 'video/%s' % video_id, video_id, | ||||
|             'Downloading additional video metadata', fatal=False) or {}).get('video') or {} | ||||
|         show = video.get('show') or {} | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': strip_or_none(metas.get('summary') or video_info.get('resume')), | ||||
|             'thumbnail': video_info.get('image'), | ||||
|             'description': strip_or_none(metas.get('summary') or video.get('summary')), | ||||
|             'thumbnail': video_info.get('image') or player.get('image'), | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(sub_path, video_id), | ||||
|             'episode': metas.get('subtitle') or video_info.get('videoTitle'), | ||||
|             'series': video_info.get('playlistTitle'), | ||||
|             'subtitles': self.extract_subtitles(sub_url, video_id), | ||||
|             'episode': metas.get('subtitle') or video.get('name'), | ||||
|             'episode_number': int_or_none(video.get('shortNumber')), | ||||
|             'series': show.get('title'), | ||||
|             'season_number': int_or_none(video.get('season')), | ||||
|             'duration': int_or_none(video_info.get('duration') or video.get('duration')), | ||||
|             'release_date': unified_strdate(video.get('releaseDate')), | ||||
|             'average_rating': float_or_none(video.get('rating') or metas.get('rating')), | ||||
|             'comment_count': int_or_none(video.get('commentsCount')), | ||||
|         } | ||||
|   | ||||
| @@ -5,20 +5,32 @@ import re | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     ExtractorError, | ||||
|     GeoRestrictedError, | ||||
|     int_or_none, | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
| ) | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AENetworksBaseIE(ThePlatformIE): | ||||
|     _BASE_URL_REGEX = r'''(?x)https?:// | ||||
|         (?:(?:www|play|watch)\.)? | ||||
|         (?P<domain> | ||||
|             (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com| | ||||
|             fyi\.tv | ||||
|         )/''' | ||||
|     _THEPLATFORM_KEY = 'crazyjava' | ||||
|     _THEPLATFORM_SECRET = 's3cr3t' | ||||
|     _DOMAIN_MAP = { | ||||
|         'history.com': ('HISTORY', 'history'), | ||||
|         'aetv.com': ('AETV', 'aetv'), | ||||
|         'mylifetime.com': ('LIFETIME', 'lifetime'), | ||||
|         'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'), | ||||
|         'fyi.tv': ('FYI', 'fyi'), | ||||
|         'historyvault.com': (None, 'historyvault'), | ||||
|         'biography.com': (None, 'biography'), | ||||
|     } | ||||
|  | ||||
|     def _extract_aen_smil(self, smil_url, video_id, auth=None): | ||||
|         query = {'mbr': 'true'} | ||||
| @@ -31,7 +43,7 @@ class AENetworksBaseIE(ThePlatformIE): | ||||
|             'assetTypes': 'high_video_s3' | ||||
|         }, { | ||||
|             'assetTypes': 'high_video_s3', | ||||
|             'switch': 'hls_ingest_fastly' | ||||
|             'switch': 'hls_high_fastly', | ||||
|         }] | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
| @@ -44,6 +56,8 @@ class AENetworksBaseIE(ThePlatformIE): | ||||
|                 tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||
|                     m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes'])) | ||||
|             except ExtractorError as e: | ||||
|                 if isinstance(e, GeoRestrictedError): | ||||
|                     raise | ||||
|                 last_e = e | ||||
|                 continue | ||||
|             formats.extend(tp_formats) | ||||
| @@ -57,24 +71,45 @@ class AENetworksBaseIE(ThePlatformIE): | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _extract_aetn_info(self, domain, filter_key, filter_value, url): | ||||
|         requestor_id, brand = self._DOMAIN_MAP[domain] | ||||
|         result = self._download_json( | ||||
|             'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand, | ||||
|             filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0] | ||||
|         title = result['title'] | ||||
|         video_id = result['id'] | ||||
|         media_url = result['publicUrl'] | ||||
|         theplatform_metadata = self._download_theplatform_metadata(self._search_regex( | ||||
|             r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) | ||||
|         info = self._parse_theplatform_metadata(theplatform_metadata) | ||||
|         auth = None | ||||
|         if theplatform_metadata.get('AETN$isBehindWall'): | ||||
|             resource = self._get_mvpd_resource( | ||||
|                 requestor_id, theplatform_metadata['title'], | ||||
|                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), | ||||
|                 theplatform_metadata['ratings'][0]['rating']) | ||||
|             auth = self._extract_mvpd_auth( | ||||
|                 url, video_id, requestor_id, resource) | ||||
|         info.update(self._extract_aen_smil(media_url, video_id, auth)) | ||||
|         info.update({ | ||||
|             'title': title, | ||||
|             'series': result.get('seriesName'), | ||||
|             'season_number': int_or_none(result.get('tvSeasonNumber')), | ||||
|             'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')), | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class AENetworksIE(AENetworksBaseIE): | ||||
|     IE_NAME = 'aenetworks' | ||||
|     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:www\.)? | ||||
|                         (?P<domain> | ||||
|                             (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com| | ||||
|                             fyi\.tv | ||||
|                         )/ | ||||
|     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id> | ||||
|         shows/[^/]+/season-\d+/episode-\d+| | ||||
|         (?: | ||||
|                             shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})| | ||||
|                             movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?| | ||||
|                             specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)| | ||||
|                             collections/[^/]+/(?P<collection_display_id>[^/]+) | ||||
|                         ) | ||||
|                     ''' | ||||
|             (?:movie|special)s/[^/]+| | ||||
|             (?:shows/[^/]+/)?videos | ||||
|         )/[^/?#&]+ | ||||
|     )''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', | ||||
|         'info_dict': { | ||||
| @@ -91,22 +126,23 @@ class AENetworksIE(AENetworksBaseIE): | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/shows/ancient-aliens/season-1', | ||||
|         'info_dict': { | ||||
|             'id': '71889446852', | ||||
|         }, | ||||
|         'playlist_mincount': 5, | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/atlanta-plastic', | ||||
|         'info_dict': { | ||||
|             'id': 'SERIES4317', | ||||
|             'title': 'Atlanta Plastic', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|         'skip': 'This video is only available for users of participating TV providers.', | ||||
|     }, { | ||||
|         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', | ||||
|         'only_matching': True | ||||
|         'info_dict': { | ||||
|             'id': '600587331957', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Inlawful Entry', | ||||
|             'description': 'md5:57c12115a2b384d883fe64ca50529e08', | ||||
|             'timestamp': 1452634428, | ||||
|             'upload_date': '20160112', | ||||
|             'uploader': 'AENE-NEW', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }, { | ||||
|         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', | ||||
|         'only_matching': True | ||||
| @@ -117,78 +153,125 @@ class AENetworksIE(AENetworksBaseIE): | ||||
|         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us', | ||||
|         'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/videos/history-of-valentines-day', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|     _DOMAIN_TO_REQUESTOR_ID = { | ||||
|         'history.com': 'HISTORY', | ||||
|         'aetv.com': 'AETV', | ||||
|         'mylifetime.com': 'LIFETIME', | ||||
|         'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB', | ||||
|         'fyi.tv': 'FYI', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups() | ||||
|         display_id = show_path or movie_display_id or special_display_id or collection_display_id | ||||
|         webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers()) | ||||
|         if show_path: | ||||
|             url_parts = show_path.split('/') | ||||
|             url_parts_len = len(url_parts) | ||||
|             if url_parts_len == 1: | ||||
|                 entries = [] | ||||
|                 for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): | ||||
|                     entries.append(self.url_result( | ||||
|                         compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) | ||||
|                 if entries: | ||||
|                     return self.playlist_result( | ||||
|                         entries, self._html_search_meta('aetn:SeriesId', webpage), | ||||
|                         self._html_search_meta('aetn:SeriesTitle', webpage)) | ||||
|                 else: | ||||
|                     # single season | ||||
|                     url_parts_len = 2 | ||||
|             if url_parts_len == 2: | ||||
|                 entries = [] | ||||
|                 for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage): | ||||
|                     episode_attributes = extract_attributes(episode_item) | ||||
|                     episode_url = compat_urlparse.urljoin( | ||||
|                         url, episode_attributes['data-canonical']) | ||||
|                     entries.append(self.url_result( | ||||
|                         episode_url, 'AENetworks', | ||||
|                         episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id'))) | ||||
|                 return self.playlist_result( | ||||
|                     entries, self._html_search_meta('aetn:SeasonId', webpage)) | ||||
|         domain, canonical = re.match(self._VALID_URL, url).groups() | ||||
|         return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) | ||||
|  | ||||
|         video_id = self._html_search_meta('aetn:VideoID', webpage) | ||||
|         media_url = self._search_regex( | ||||
|             [r"media_url\s*=\s*'(?P<url>[^']+)'", | ||||
|              r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)', | ||||
|              r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'], | ||||
|             webpage, 'video url', group='url') | ||||
|         theplatform_metadata = self._download_theplatform_metadata(self._search_regex( | ||||
|             r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) | ||||
|         info = self._parse_theplatform_metadata(theplatform_metadata) | ||||
|         auth = None | ||||
|         if theplatform_metadata.get('AETN$isBehindWall'): | ||||
|             requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] | ||||
|             resource = self._get_mvpd_resource( | ||||
|                 requestor_id, theplatform_metadata['title'], | ||||
|                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), | ||||
|                 theplatform_metadata['ratings'][0]['rating']) | ||||
|             auth = self._extract_mvpd_auth( | ||||
|                 url, video_id, requestor_id, resource) | ||||
|         info.update(self._search_json_ld(webpage, video_id, fatal=False)) | ||||
|         info.update(self._extract_aen_smil(media_url, video_id, auth)) | ||||
|         return info | ||||
|  | ||||
| class AENetworksListBaseIE(AENetworksBaseIE): | ||||
|     def _call_api(self, resource, slug, brand, fields): | ||||
|         return self._download_json( | ||||
|             'https://yoga.appsvcs.aetnd.com/graphql', | ||||
|             slug, query={'brand': brand}, data=urlencode_postdata({ | ||||
|                 'query': '''{ | ||||
|   %s(slug: "%s") { | ||||
|     %s | ||||
|   } | ||||
| }''' % (resource, slug, fields), | ||||
|             }))['data'][resource] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, slug = re.match(self._VALID_URL, url).groups() | ||||
|         _, brand = self._DOMAIN_MAP[domain] | ||||
|         playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) | ||||
|         base_url = 'http://watch.%s' % domain | ||||
|  | ||||
|         entries = [] | ||||
|         for item in (playlist.get(self._ITEMS_KEY) or []): | ||||
|             doc = self._get_doc(item) | ||||
|             canonical = doc.get('canonical') | ||||
|             if not canonical: | ||||
|                 continue | ||||
|             entries.append(self.url_result( | ||||
|                 base_url + canonical, AENetworksIE.ie_key(), doc.get('id'))) | ||||
|  | ||||
|         description = None | ||||
|         if self._PLAYLIST_DESCRIPTION_KEY: | ||||
|             description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, playlist.get('id'), | ||||
|             playlist.get(self._PLAYLIST_TITLE_KEY), description) | ||||
|  | ||||
|  | ||||
| class AENetworksCollectionIE(AENetworksListBaseIE): | ||||
|     IE_NAME = 'aenetworks:collection' | ||||
|     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://watch.historyvault.com/list/america-the-story-of-us', | ||||
|         'info_dict': { | ||||
|             'id': '282', | ||||
|             'title': 'America The Story of Us', | ||||
|         }, | ||||
|         'playlist_mincount': 12, | ||||
|     }, { | ||||
|         'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://www.historyvault.com/collections/mysteryquest', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|     _RESOURCE = 'list' | ||||
|     _ITEMS_KEY = 'items' | ||||
|     _PLAYLIST_TITLE_KEY = 'display_title' | ||||
|     _PLAYLIST_DESCRIPTION_KEY = None | ||||
|     _FIELDS = '''id | ||||
|     display_title | ||||
|     items { | ||||
|       ... on ListVideoItem { | ||||
|         doc { | ||||
|           canonical | ||||
|           id | ||||
|         } | ||||
|       } | ||||
|     }''' | ||||
|  | ||||
|     def _get_doc(self, item): | ||||
|         return item.get('doc') or {} | ||||
|  | ||||
|  | ||||
| class AENetworksShowIE(AENetworksListBaseIE): | ||||
|     IE_NAME = 'aenetworks:show' | ||||
|     _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/shows/ancient-aliens', | ||||
|         'info_dict': { | ||||
|             'id': 'SH012427480000', | ||||
|             'title': 'Ancient Aliens', | ||||
|             'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f', | ||||
|         }, | ||||
|         'playlist_mincount': 168, | ||||
|     }] | ||||
|     _RESOURCE = 'series' | ||||
|     _ITEMS_KEY = 'episodes' | ||||
|     _PLAYLIST_TITLE_KEY = 'title' | ||||
|     _PLAYLIST_DESCRIPTION_KEY = 'description' | ||||
|     _FIELDS = '''description | ||||
|     id | ||||
|     title | ||||
|     episodes { | ||||
|       canonical | ||||
|       id | ||||
|     }''' | ||||
|  | ||||
|     def _get_doc(self, item): | ||||
|         return item | ||||
|  | ||||
|  | ||||
| class HistoryTopicIE(AENetworksBaseIE): | ||||
| @@ -204,6 +287,7 @@ class HistoryTopicIE(AENetworksBaseIE): | ||||
|             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', | ||||
|             'timestamp': 1375819729, | ||||
|             'upload_date': '20130806', | ||||
|             'uploader': 'AENE-NEW', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
| @@ -212,36 +296,47 @@ class HistoryTopicIE(AENetworksBaseIE): | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }] | ||||
|  | ||||
|     def theplatform_url_result(self, theplatform_url, video_id, query): | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': smuggle_url( | ||||
|                 update_url_query(theplatform_url, query), | ||||
|                 { | ||||
|                     'sig': { | ||||
|                         'key': self._THEPLATFORM_KEY, | ||||
|                         'secret': self._THEPLATFORM_SECRET, | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             'http://www.history.com/videos/' + display_id, | ||||
|             AENetworksIE.ie_key()) | ||||
|  | ||||
|  | ||||
| class HistoryPlayerIE(AENetworksBaseIE): | ||||
|     IE_NAME = 'history:player' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)' | ||||
|     _TESTS = [] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         return self._extract_aetn_info(domain, 'id', video_id, url) | ||||
|  | ||||
|  | ||||
| class BiographyIE(AENetworksBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808', | ||||
|         'info_dict': { | ||||
|             'id': '30322987', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Vincent Van Gogh - Full Episode', | ||||
|             'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.', | ||||
|             'timestamp': 1311970571, | ||||
|             'upload_date': '20110729', | ||||
|             'uploader': 'AENE-NEW', | ||||
|         }, | ||||
|                     'force_smil_url': True | ||||
|                 }), | ||||
|             'ie_key': 'ThePlatform', | ||||
|         } | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_id = self._search_regex( | ||||
|             r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid') | ||||
|         result = self._download_json( | ||||
|             'https://feeds.video.aetnd.com/api/v2/history/videos', | ||||
|             video_id, query={'filter[id]': video_id})['results'][0] | ||||
|         title = result['title'] | ||||
|         info = self._extract_aen_smil(result['publicUrl'], video_id) | ||||
|         info.update({ | ||||
|             'title': title, | ||||
|             'description': result.get('description'), | ||||
|             'duration': int_or_none(result.get('duration')), | ||||
|             'timestamp': int_or_none(result.get('added'), 1000), | ||||
|         }) | ||||
|         return info | ||||
|         player_url = self._search_regex( | ||||
|             r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL, | ||||
|             webpage, 'player URL') | ||||
|         return self.url_result(player_url, HistoryPlayerIE.ie_key()) | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
| @@ -11,25 +13,22 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class AMCNetworksIE(ThePlatformIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', | ||||
|         'md5': '', | ||||
|         'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631', | ||||
|         'info_dict': { | ||||
|             'id': 's3MX01Nl4vPH', | ||||
|             'id': '4Lq1dzOnZGt0', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Maron - Season 4 - Step 1', | ||||
|             'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', | ||||
|             'age_limit': 17, | ||||
|             'upload_date': '20160505', | ||||
|             'timestamp': 1462468831, | ||||
|             'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner", | ||||
|             'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.", | ||||
|             'upload_date': '20201120', | ||||
|             'timestamp': 1605904350, | ||||
|             'uploader': 'AMCN', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Requires TV provider accounts', | ||||
|     }, { | ||||
|         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', | ||||
|         'only_matching': True, | ||||
| @@ -55,32 +54,34 @@ class AMCNetworksIE(ThePlatformIE): | ||||
|         'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _REQUESTOR_ID_MAP = { | ||||
|         'amc': 'AMC', | ||||
|         'bbcamerica': 'BBCA', | ||||
|         'ifc': 'IFC', | ||||
|         'sundancetv': 'SUNDANCE', | ||||
|         'wetv': 'WETV', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         site, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         requestor_id = self._REQUESTOR_ID_MAP[site] | ||||
|         properties = self._download_json( | ||||
|             'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id), | ||||
|             display_id)['data']['properties'] | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|             'manifest': 'm3u', | ||||
|         } | ||||
|         media_url = self._search_regex( | ||||
|             r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', | ||||
|             webpage, 'media url') | ||||
|         theplatform_metadata = self._download_theplatform_metadata(self._search_regex( | ||||
|             r'link\.theplatform\.com/s/([^?]+)', | ||||
|             media_url, 'theplatform_path'), display_id) | ||||
|         tp_path = 'M_UwQC/media/' + properties['videoPid'] | ||||
|         media_url = 'https://link.theplatform.com/s/' + tp_path | ||||
|         theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id) | ||||
|         info = self._parse_theplatform_metadata(theplatform_metadata) | ||||
|         video_id = theplatform_metadata['pid'] | ||||
|         title = theplatform_metadata['title'] | ||||
|         rating = try_get( | ||||
|             theplatform_metadata, lambda x: x['ratings'][0]['rating']) | ||||
|         auth_required = self._search_regex( | ||||
|             r'window\.authRequired\s*=\s*(true|false);', | ||||
|             webpage, 'auth required') | ||||
|         if auth_required == 'true': | ||||
|             requestor_id = self._search_regex( | ||||
|                 r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', | ||||
|                 webpage, 'requestor id') | ||||
|         video_category = properties.get('videoCategory') | ||||
|         if video_category and video_category.endswith('-Auth'): | ||||
|             resource = self._get_mvpd_resource( | ||||
|                 requestor_id, title, video_id, rating) | ||||
|             query['auth'] = self._extract_mvpd_auth( | ||||
|   | ||||
| @@ -1,33 +1,33 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     try_get, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AmericasTestKitchenIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers', | ||||
|         'md5': 'b861c3e365ac38ad319cfd509c30577f', | ||||
|         'info_dict': { | ||||
|             'id': '5b400b9ee338f922cb06450c', | ||||
|             'title': 'Weeknight Japanese Suppers', | ||||
|             'title': 'Japanese Suppers', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8', | ||||
|             'description': 'md5:64e606bfee910627efc4b5f050de92b3', | ||||
|             'thumbnail': r're:^https?://', | ||||
|             'timestamp': 1523664000, | ||||
|             'upload_date': '20180414', | ||||
|             'release_date': '20180414', | ||||
|             'release_date': '20180410', | ||||
|             'series': "America's Test Kitchen", | ||||
|             'season_number': 18, | ||||
|             'episode': 'Weeknight Japanese Suppers', | ||||
|             'episode': 'Japanese Suppers', | ||||
|             'episode_number': 15, | ||||
|         }, | ||||
|         'params': { | ||||
| @@ -36,47 +36,31 @@ class AmericasTestKitchenIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         resource_type, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         is_episode = resource_type == 'episode' | ||||
|         if is_episode: | ||||
|             resource_type = 'episodes' | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>', | ||||
|                 webpage, 'initial context'), | ||||
|             video_id, js_to_json) | ||||
|  | ||||
|         ep_data = try_get( | ||||
|             video_data, | ||||
|             (lambda x: x['episodeDetail']['content']['data'], | ||||
|              lambda x: x['videoDetail']['content']['data']), dict) | ||||
|         ep_meta = ep_data.get('full_video', {}) | ||||
|  | ||||
|         zype_id = ep_data.get('zype_id') or ep_meta['zype_id'] | ||||
|  | ||||
|         title = ep_data.get('title') or ep_meta.get('title') | ||||
|         description = clean_html(ep_meta.get('episode_description') or ep_data.get( | ||||
|             'description') or ep_meta.get('description')) | ||||
|         thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url']) | ||||
|         release_date = unified_strdate(ep_data.get('aired_at')) | ||||
|  | ||||
|         season_number = int_or_none(ep_meta.get('season_number')) | ||||
|         episode = ep_meta.get('title') | ||||
|         episode_number = int_or_none(ep_meta.get('episode_number')) | ||||
|         resource = self._download_json( | ||||
|             'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id) | ||||
|         video = resource['video'] if is_episode else resource | ||||
|         episode = resource if is_episode else resource.get('episode') or {} | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id, | ||||
|             'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'], | ||||
|             'ie_key': 'Zype', | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'release_date': release_date, | ||||
|             'series': "America's Test Kitchen", | ||||
|             'season_number': season_number, | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
|             'description': clean_html(video.get('description')), | ||||
|             'release_date': unified_strdate(video.get('publishDate')), | ||||
|             'series': try_get(episode, lambda x: x['show']['title']), | ||||
|             'episode': episode.get('title'), | ||||
|         } | ||||
|   | ||||
| @@ -116,7 +116,76 @@ class AnvatoIE(InfoExtractor): | ||||
|         'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn', | ||||
|         'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W', | ||||
|         'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ', | ||||
|         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ' | ||||
|         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', | ||||
|         'X8POa4zPPaKVZHqmWjuEzfP31b1QM9VN': 'Dn5vOY9ooDw7VSl9qztjZI5o0g08mA0z', | ||||
|         'M2v78QkBMpNJlSPp9diX5F2PBmBy6Bog': 'ka6K32kyo7nDZfNkjQCGWf1lpApXMd1B', | ||||
|         'bvJ0dQpav07l0hG5JgfVLF2dv1vARwpP': 'BzoQW24GrJZoJfmNodiJKSPeB9B8NOxj', | ||||
|         'lxQMLg2XZKuEZaWgsqubBxV9INZ6bryY': 'Vm2Mx6noKds9jB71h6urazwlTG3m9x8l', | ||||
|         '04EnjvXeoSmkbJ9ckPs7oY0mcxv7PlyN': 'aXERQP9LMfQVlEDsgGs6eEA1SWznAQ8P', | ||||
|         'mQbO2ge6BFRWVPYCYpU06YvNt80XLvAX': 'E2BV1NGmasN5v7eujECVPJgwflnLPm2A', | ||||
|         'g43oeBzJrCml7o6fa5fRL1ErCdeD8z4K': 'RX34mZ6zVH4Nr6whbxIGLv9WSbxEKo8V', | ||||
|         'VQrDJoP7mtdBzkxhXbSPwGB1coeElk4x': 'j2VejQx0VFKQepAF7dI0mJLKtOVJE18z', | ||||
|         'WxA5NzLRjCrmq0NUgaU5pdMDuZO7RJ4w': 'lyY5ADLKaIOLEgAsGQCveEMAcqnx3rY9', | ||||
|         'M4lpMXB71ie0PjMCjdFzVXq0SeRVqz49': 'n2zVkOqaLIv3GbLfBjcwW51LcveWOZ2e', | ||||
|         'dyDZGEqN8u8nkJZcJns0oxYmtP7KbGAn': 'VXOEqQW9BtEVLajfZQSLEqxgS5B7qn2D', | ||||
|         'E7QNjrVY5u5mGvgu67IoDgV1CjEND8QR': 'rz8AaDmdKIkLmPNhB5ILPJnjS5PnlL8d', | ||||
|         'a4zrqjoKlfzg0dwHEWtP31VqcLBpjm4g': 'LY9J16gwETdGWa3hjBu5o0RzuoQDjqXQ', | ||||
|         'dQP5BZroMsMVLO1hbmT5r2Enu86GjxA6': '7XR3oOdbPF6x3PRFLDCq9RkgsRjAo48V', | ||||
|         'M4lKNBO1NFe0PjMCj1tzVXq0SeRVqzA9': 'n2zoRqGLRUv3GbLfBmTwW51LcveWOZYe', | ||||
|         'nAZ7MZdpGCGg1pqFEbsoJOz2C60mv143': 'dYJgdqA9aT4yojETqGi7yNgoFADxqmXP', | ||||
|         '3y1MERYgOuE9NzbFgwhV6Wv2F0YKvbyz': '081xpZDQgC4VadLTavhWQxrku56DAgXV', | ||||
|         'bmQvmEXr5HWklBMCZOcpE2Z3HBYwqGyl': 'zxXPbVNyMiMAZldhr9FkOmA0fl4aKr2v', | ||||
|         'wA7oDNYldfr6050Hwxi52lPZiVlB86Ap': 'ZYK16aA7ni0d3l3c34uwpxD7CbReMm8Q', | ||||
|         'g43MbKMWmFml7o7sJoSRkXxZiXRvJ3QK': 'RX3oBJonvs4Nr6rUWBCGn3matRGqJPXV', | ||||
|         'mA9VdlqpLS0raGaSDvtoqNrBTzb8XY4q': '0XN4OjBD3fnW7r7IbmtJB4AyfOmlrE2r', | ||||
|         'mAajOwgkGt17oGoFmEuklMP9H0GnW54d': 'lXbBLPGyzikNGeGujAuAJGjZiwLRxyXR', | ||||
|         'vy8vjJ9kbUwrRqRu59Cj5dWZfzYErlAb': 'K8l7gpwaGcBpnAnCLNCmPZRdin3eaQX0', | ||||
|         'xQMWBpR8oHEZaWaSMGUb0avOHjLVYn4Y': 'm2MrN4vEaf9jB7BFy5Srb40jTrN67AYl', | ||||
|         'xyKEmVO3miRr6D6UVkt7oB8jtD6aJEAv': 'g2ddDebqDfqdgKgswyUKwGjbTWwzq923', | ||||
|         '7Qk0wa2D9FjKapacoJF27aLvUDKkLGA0': 'b2kgBEkephJaMkMTL7s1PLe4Ua6WyP2P', | ||||
|         '3QLg6nqmNTJ5VvVTo7f508LPidz1xwyY': 'g2L1GgpraipmAOAUqmIbBnPxHOmw4MYa', | ||||
|         '3y1B7zZjXTE9NZNSzZSVNPZaTNLjo6Qz': '081b5G6wzH4VagaURmcWbN5mT4JGEe2V', | ||||
|         'lAqnwvkw6SG6D8DSqmUg6DRLUp0w3G4x': 'O2pbP0xPDFNJjpjIEvcdryOJtpkVM4X5', | ||||
|         'awA7xd1N0Hr6050Hw2c52lPZiVlB864p': 'GZYKpn4aoT0d3l3c3PiwpxD7CbReMmXQ', | ||||
|         'jQVqPLl9YHL1WGWtR1HDgWBGT63qRNyV': '6X03ne6vrU4oWyWUN7tQVoajikxJR3Ye', | ||||
|         'GQRMR8mL7uZK797t7xH3eNzPIP5dOny1': 'm2vqPWGd4U31zWzSyasDRAoMT1PKRp8o', | ||||
|         'zydq9RdmRhXLkNkfNoTJlMzaF0lWekQB': '3X7LnvE7vH5nkEkSqLiey793Un7dLB8e', | ||||
|         'VQrDzwkB2IdBzjzu9MHPbEYkSB50gR4x': 'j2VebLzoKUKQeEesmVh0gM1eIp9jKz8z', | ||||
|         'mAa2wMamBs17oGoFmktklMP9H0GnW54d': 'lXbgP74xZTkNGeGujVUAJGjZiwLRxy8R', | ||||
|         '7yjB6ZLG6sW8R6RF2xcan1KGfJ5dNoyd': 'wXQkPorvPHZ45N5t4Jf6qwg5Tp4xvw29', | ||||
|         'a4zPpNeWGuzg0m0iX3tPeanGSkRKWXQg': 'LY9oa3QAyHdGW9Wu3Ri5JGeEik7l1N8Q', | ||||
|         'k2rneA2M38k25cXDwwSknTJlxPxQLZ6M': '61lyA2aEVDzklfdwmmh31saPxQx2VRjp', | ||||
|         'bK9Zk4OvPnvxduLgxvi8VUeojnjA02eV': 'o5jANYjbeMb4nfBaQvcLAt1jzLzYx6ze', | ||||
|         '5VD6EydM3R9orHmNMGInGCJwbxbQvGRw': 'w3zjmX7g4vnxzCxElvUEOiewkokXprkZ', | ||||
|         '70X35QbVYVYNPUmP9YfbzI06YqYQk2R1': 'vG4Aj2BMjMjoztB7zeFOnCVPJpJ8lMOa', | ||||
|         '26qYwQVG9p1Bks2GgBckjfDJOXOAMgG1': 'r4ev9X0mv5zqJc0yk5IBDcQOwZw8mnwQ', | ||||
|         'rvVKpA56MBXWlSxMw3cobT5pdkd4Dm7q': '1J7ZkY53pZ645c93owcLZuveE7E8B3rL', | ||||
|         'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo': 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo', | ||||
|         'jdKqRGF16dKsBviMDae7IGDl7oTjEbVV': 'Q09l7vhlNxPFErIOK6BVCe7KnwUW5DVV', | ||||
|         '3QLkogW1OUJ5VvPsrDH56DY2u7lgZWyY': 'g2LRE1V9espmAOPhE4ubj4ZdUA57yDXa', | ||||
|         'wyJvWbXGBSdbkEzhv0CW8meou82aqRy8': 'M2wolPvyBIpQGkbT4juedD4ruzQGdK2y', | ||||
|         '7QkdZrzEkFjKap6IYDU2PB0oCNZORmA0': 'b2kN1l96qhJaMkPs9dt1lpjBfwqZoA8P', | ||||
|         'pvA05113MHG1w3JTYxc6DVlRCjErVz4O': 'gQXeAbblBUnDJ7vujbHvbRd1cxlz3AXO', | ||||
|         'mA9blJDZwT0raG1cvkuoeVjLC7ZWd54q': '0XN9jRPwMHnW7rvumgfJZOD9CJgVkWYr', | ||||
|         '5QwRN5qKJTvGKlDTmnf7xwNZcjRmvEy9': 'R2GP6LWBJU1QlnytwGt0B9pytWwAdDYy', | ||||
|         'eyn5rPPbkfw2KYxH32fG1q58CbLJzM40': 'p2gyqooZnS56JWeiDgfmOy1VugOQEBXn', | ||||
|         '3BABn3b5RfPJGDwilbHe7l82uBoR05Am': '7OYZG7KMVhbPdKJS3xcWEN3AuDlLNmXj', | ||||
|         'xA5zNGXD3HrmqMlF6OS5pdMDuZO7RJ4w': 'yY5DAm6r1IOLE3BCVMFveEMAcqnx3r29', | ||||
|         'g43PgW3JZfml7o6fDEURL1ErCdeD8zyK': 'RX3aQn1zrS4Nr6whDgCGLv9WSbxEKo2V', | ||||
|         'lAqp8WbGgiG6D8LTKJcg3O72CDdre1Qx': 'O2pnm6473HNJjpKuVosd3vVeh975yrX5', | ||||
|         'wyJbYEDxKSdbkJ6S6RhW8meou82aqRy8': 'M2wPm7EgRSpQGlAh70CedD4ruzQGdKYy', | ||||
|         'M4lgW28nLCe0PVdtaXszVXq0SeRVqzA9': 'n2zmJvg4jHv3G0ETNgiwW51LcveWOZ8e', | ||||
|         '5Qw3OVvp9FvGKlDTmOC7xwNZcjRmvEQ9': 'R2GzDdml9F1Qlnytw9s0B9pytWwAdD8y', | ||||
|         'vy8a98X7zCwrRqbHrLUjYzwDiK2b70Qb': 'K8lVwzyjZiBpnAaSGeUmnAgxuGOBxmY0', | ||||
|         'g4eGjJLLoiqRD3Pf9oT5O03LuNbLRDQp': '6XqD59zzpfN4EwQuaGt67qNpSyRBlnYy', | ||||
|         'g43OPp9boIml7o6fDOIRL1ErCdeD8z4K': 'RX33alNB4s4Nr6whDPUGLv9WSbxEKoXV', | ||||
|         'xA2ng9OkBcGKzDbTkKsJlx7dUK8R3dA5': 'z2aPnJvzBfObkwGC3vFaPxeBhxoMqZ8K', | ||||
|         'xyKEgBajZuRr6DEC0Kt7XpD1cnNW9gAv': 'g2ddlEBvRsqdgKaI4jUK9PrgfMexGZ23', | ||||
|         'BAogww51jIMa2JnH1BcYpXM5F658RNAL': 'rYWDmm0KptlkGv4FGJFMdZmjs9RDE6XR', | ||||
|         'BAokpg62VtMa2JnH1mHYpXM5F658RNAL': 'rYWryDnlNslkGv4FG4HMdZmjs9RDE62R', | ||||
|         'a4z1Px5e2hzg0m0iMMCPeanGSkRKWXAg': 'LY9eorNQGUdGW9WuKKf5JGeEik7l1NYQ', | ||||
|         'kAx69R58kF9nY5YcdecJdl2pFXP53WyX': 'gXyRxELpbfPvLeLSaRil0mp6UEzbZJ8L', | ||||
|         'BAoY13nwViMa2J2uo2cY6BlETgmdwryL': 'rYWwKzJmNFlkGvGtNoUM9bzwIJVzB1YR', | ||||
|     } | ||||
|  | ||||
|     _MCP_TO_ACCESS_KEY_TABLE = { | ||||
| @@ -189,19 +258,17 @@ class AnvatoIE(InfoExtractor): | ||||
|  | ||||
|         video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii') | ||||
|         anvrid = md5_text(time.time() * 1000 * random.random())[:30] | ||||
|         payload = { | ||||
|             'api': { | ||||
|         api = { | ||||
|             'anvrid': anvrid, | ||||
|                 'anvstk': md5_text('%s|%s|%d|%s' % ( | ||||
|                     access_key, anvrid, server_time, | ||||
|                     self._ANVACK_TABLE.get(access_key, self._API_KEY))), | ||||
|             'anvts': server_time, | ||||
|             }, | ||||
|         } | ||||
|         api['anvstk'] = md5_text('%s|%s|%d|%s' % ( | ||||
|             access_key, anvrid, server_time, | ||||
|             self._ANVACK_TABLE.get(access_key, self._API_KEY))) | ||||
|  | ||||
|         return self._download_json( | ||||
|             video_data_url, video_id, transform_source=strip_jsonp, | ||||
|             data=json.dumps(payload).encode('utf-8')) | ||||
|             data=json.dumps({'api': api}).encode('utf-8')) | ||||
|  | ||||
|     def _get_anvato_videos(self, access_key, video_id): | ||||
|         video_data = self._get_video_json(access_key, video_id) | ||||
| @@ -259,7 +326,7 @@ class AnvatoIE(InfoExtractor): | ||||
|             'description': video_data.get('def_description'), | ||||
|             'tags': video_data.get('def_tags', '').split(','), | ||||
|             'categories': video_data.get('categories'), | ||||
|             'thumbnail': video_data.get('thumbnail'), | ||||
|             'thumbnail': video_data.get('src_image_url') or video_data.get('thumbnail'), | ||||
|             'timestamp': int_or_none(video_data.get( | ||||
|                 'ts_published') or video_data.get('ts_added')), | ||||
|             'uploader': video_data.get('mcp_id'), | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     get_element_by_id, | ||||
|     int_or_none, | ||||
|     merge_dicts, | ||||
|     mimetype2ext, | ||||
| @@ -39,23 +40,15 @@ class AparatIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id, fatal=False) | ||||
|  | ||||
|         if not webpage: | ||||
|             # Note: There is an easier-to-parse configuration at | ||||
|             # http://www.aparat.com/video/video/config/videohash/%video_id | ||||
|             # but the URL in there does not work | ||||
|             webpage = self._download_webpage( | ||||
|                 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, | ||||
|                 video_id) | ||||
|  | ||||
|         options = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)', | ||||
|                 webpage, 'options', group='value'), | ||||
|             video_id) | ||||
|  | ||||
|         player = options['plugins']['sabaPlayerPlugin'] | ||||
|         options = self._parse_json(self._search_regex( | ||||
|             r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for sources in player['multiSRC']: | ||||
|         for sources in (options.get('multiSRC') or []): | ||||
|             for item in sources: | ||||
|                 if not isinstance(item, dict): | ||||
|                     continue | ||||
| @@ -85,11 +78,12 @@ class AparatIE(InfoExtractor): | ||||
|         info = self._search_json_ld(webpage, video_id, default={}) | ||||
|  | ||||
|         if not info.get('title'): | ||||
|             info['title'] = player['title'] | ||||
|             info['title'] = get_element_by_id('videoTitle', webpage) or \ | ||||
|                 self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True) | ||||
|  | ||||
|         return merge_dicts(info, { | ||||
|             'id': video_id, | ||||
|             'thumbnail': url_or_none(options.get('poster')), | ||||
|             'duration': int_or_none(player.get('duration')), | ||||
|             'duration': int_or_none(options.get('duration')), | ||||
|             'formats': formats, | ||||
|         }) | ||||
|   | ||||
							
								
								
									
										61
									
								
								youtube_dl/extractor/applepodcasts.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								youtube_dl/extractor/applepodcasts.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_podcast_url, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ApplePodcastsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', | ||||
|         'md5': 'df02e6acb11c10e844946a39e7222b08', | ||||
|         'info_dict': { | ||||
|             'id': '1000482637777', | ||||
|             'ext': 'mp3', | ||||
|             'title': '207 - Whitney Webb Returns', | ||||
|             'description': 'md5:13a73bade02d2e43737751e3987e1399', | ||||
|             'upload_date': '20200705', | ||||
|             'timestamp': 1593921600, | ||||
|             'duration': 6425, | ||||
|             'series': 'The Tim Dillon Show', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         episode_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, episode_id) | ||||
|         ember_data = self._parse_json(self._search_regex( | ||||
|             r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<', | ||||
|             webpage, 'ember data'), episode_id) | ||||
|         episode = ember_data['data']['attributes'] | ||||
|         description = episode.get('description') or {} | ||||
|  | ||||
|         series = None | ||||
|         for inc in (ember_data.get('included') or []): | ||||
|             if inc.get('type') == 'media/podcast': | ||||
|                 series = try_get(inc, lambda x: x['attributes']['name']) | ||||
|  | ||||
|         return { | ||||
|             'id': episode_id, | ||||
|             'title': episode['name'], | ||||
|             'url': clean_podcast_url(episode['assetUrl']), | ||||
|             'description': description.get('standard') or description.get('short'), | ||||
|             'timestamp': parse_iso8601(episode.get('releaseDateTime')), | ||||
|             'duration': int_or_none(episode.get('durationInMilliseconds'), 1000), | ||||
|             'series': series, | ||||
|         } | ||||
							
								
								
									
										174
									
								
								youtube_dl/extractor/arcpublishing.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										174
									
								
								youtube_dl/extractor/arcpublishing.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,174 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ArcPublishingIE(InfoExtractor): | ||||
|     _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' | ||||
|     _VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX | ||||
|     _TESTS = [{ | ||||
|         # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ | ||||
|         'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/ | ||||
|         'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.actionnewsjax.com/video/live-stream/ | ||||
|         'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/ | ||||
|         'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/ | ||||
|         'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/ | ||||
|         'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/ | ||||
|         'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/ | ||||
|         'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/ | ||||
|         'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/ | ||||
|         'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/ | ||||
|         'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html | ||||
|         'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _POWA_DEFAULTS = [ | ||||
|         (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'), | ||||
|         ([ | ||||
|             'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo', | ||||
|             'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom', | ||||
|             'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek', | ||||
|         ], 'video-api-cdn.%s.arcpublishing.com/api'), | ||||
|     ] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(webpage): | ||||
|         entries = [] | ||||
|         # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview | ||||
|         for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): | ||||
|             powa = extract_attributes(powa_el) or {} | ||||
|             org = powa.get('data-org') | ||||
|             uuid = powa.get('data-uuid') | ||||
|             if org and uuid: | ||||
|                 entries.append('arcpublishing:%s:%s' % (org, uuid)) | ||||
|         return entries | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         org, uuid = re.match(self._VALID_URL, url).groups() | ||||
|         for orgs, tmpl in self._POWA_DEFAULTS: | ||||
|             if org in orgs: | ||||
|                 base_api_tmpl = tmpl | ||||
|                 break | ||||
|         else: | ||||
|             base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api' | ||||
|         if org == 'wapo': | ||||
|             org = 'washpost' | ||||
|         video = self._download_json( | ||||
|             'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org), | ||||
|             uuid, query={'uuid': uuid})[0] | ||||
|         title = video['headlines']['basic'] | ||||
|         is_live = video.get('status') == 'live' | ||||
|  | ||||
|         urls = [] | ||||
|         formats = [] | ||||
|         for s in video.get('streams', []): | ||||
|             s_url = s.get('url') | ||||
|             if not s_url or s_url in urls: | ||||
|                 continue | ||||
|             urls.append(s_url) | ||||
|             stream_type = s.get('stream_type') | ||||
|             if stream_type == 'smil': | ||||
|                 smil_formats = self._extract_smil_formats( | ||||
|                     s_url, uuid, fatal=False) | ||||
|                 for f in smil_formats: | ||||
|                     if f['url'].endswith('/cfx/st'): | ||||
|                         f['app'] = 'cfx/st' | ||||
|                         if not f['play_path'].startswith('mp4:'): | ||||
|                             f['play_path'] = 'mp4:' + f['play_path'] | ||||
|                         if isinstance(f['tbr'], float): | ||||
|                             f['vbr'] = f['tbr'] * 1000 | ||||
|                             del f['tbr'] | ||||
|                             f['format_id'] = 'rtmp-%d' % f['vbr'] | ||||
|                 formats.extend(smil_formats) | ||||
|             elif stream_type in ('ts', 'hls'): | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False) | ||||
|                 if all([f.get('acodec') == 'none' for f in m3u8_formats]): | ||||
|                     continue | ||||
|                 for f in m3u8_formats: | ||||
|                     if f.get('acodec') == 'none': | ||||
|                         f['preference'] = -40 | ||||
|                     elif f.get('vcodec') == 'none': | ||||
|                         f['preference'] = -50 | ||||
|                     height = f.get('height') | ||||
|                     if not height: | ||||
|                         continue | ||||
|                     vbr = self._search_regex( | ||||
|                         r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None) | ||||
|                     if vbr: | ||||
|                         f['vbr'] = int(vbr) | ||||
|                 formats.extend(m3u8_formats) | ||||
|             else: | ||||
|                 vbr = int_or_none(s.get('bitrate')) | ||||
|                 formats.append({ | ||||
|                     'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, | ||||
|                     'vbr': vbr, | ||||
|                     'width': int_or_none(s.get('width')), | ||||
|                     'height': int_or_none(s.get('height')), | ||||
|                     'filesize': int_or_none(s.get('filesize')), | ||||
|                     'url': s_url, | ||||
|                     'preference': -1, | ||||
|                 }) | ||||
|         self._sort_formats( | ||||
|             formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id')) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []): | ||||
|             subtitle_url = subtitle.get('url') | ||||
|             if subtitle_url: | ||||
|                 subtitles.setdefault('en', []).append({'url': subtitle_url}) | ||||
|  | ||||
|         return { | ||||
|             'id': uuid, | ||||
|             'title': self._live_title(title) if is_live else title, | ||||
|             'thumbnail': try_get(video, lambda x: x['promo_image']['url']), | ||||
|             'description': try_get(video, lambda x: x['subheadlines']['basic']), | ||||
|             'formats': formats, | ||||
|             'duration': int_or_none(video.get('duration'), 100), | ||||
|             'timestamp': parse_iso8601(video.get('created_date')), | ||||
|             'subtitles': subtitles, | ||||
|             'is_live': is_live, | ||||
|         } | ||||
| @@ -6,13 +6,11 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     mimetype2ext, | ||||
|     parse_iso8601, | ||||
|     strip_jsonp, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -20,22 +18,27 @@ class ArkenaIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         https?:// | ||||
|                             (?: | ||||
|                                 video\.arkena\.com/play2/embed/player\?| | ||||
|                                 video\.(?:arkena|qbrick)\.com/play2/embed/player\?| | ||||
|                                 play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+) | ||||
|                             ) | ||||
|                         ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411', | ||||
|         'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', | ||||
|         'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310', | ||||
|         'md5': '97f117754e5f3c020f5f26da4a44ebaf', | ||||
|         'info_dict': { | ||||
|             'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe', | ||||
|             'id': 'd8ab4607-00090107-aab86310', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Big Buck Bunny', | ||||
|             'description': 'Royalty free test video', | ||||
|             'timestamp': 1432816365, | ||||
|             'upload_date': '20150528', | ||||
|             'is_live': False, | ||||
|             'title': 'EM_HT20_117_roslund_v2.mp4', | ||||
|             'timestamp': 1608285912, | ||||
|             'upload_date': '20201218', | ||||
|             'duration': 1429.162667, | ||||
|             'subtitles': { | ||||
|                 'sv': 'count:3', | ||||
|             }, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893', | ||||
|         'only_matching': True, | ||||
| @@ -72,62 +75,89 @@ class ArkenaIE(InfoExtractor): | ||||
|             if not video_id or not account_id: | ||||
|                 raise ExtractorError('Invalid URL', expected=True) | ||||
|  | ||||
|         playlist = self._download_json( | ||||
|             'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_' | ||||
|             % (video_id, account_id), | ||||
|             video_id, transform_source=strip_jsonp)['Playlist'][0] | ||||
|  | ||||
|         media_info = playlist['MediaInfo'] | ||||
|         title = media_info['Title'] | ||||
|         media_files = playlist['MediaFiles'] | ||||
|  | ||||
|         is_live = False | ||||
|         formats = [] | ||||
|         for kind_case, kind_formats in media_files.items(): | ||||
|             kind = kind_case.lower() | ||||
|             for f in kind_formats: | ||||
|                 f_url = f.get('Url') | ||||
|                 if not f_url: | ||||
|                     continue | ||||
|                 is_live = f.get('Live') == 'true' | ||||
|                 exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None)) | ||||
|                 if kind == 'm3u8' or 'm3u8' in exts: | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         f_url, video_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id=kind, fatal=False, live=is_live)) | ||||
|                 elif kind == 'flash' or 'f4m' in exts: | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         f_url, video_id, f4m_id=kind, fatal=False)) | ||||
|                 elif kind == 'dash' or 'mpd' in exts: | ||||
|                     formats.extend(self._extract_mpd_formats( | ||||
|                         f_url, video_id, mpd_id=kind, fatal=False)) | ||||
|                 elif kind == 'silverlight': | ||||
|                     # TODO: process when ism is supported (see | ||||
|                     # https://github.com/ytdl-org/youtube-dl/issues/8118) | ||||
|                     continue | ||||
|                 else: | ||||
|                     tbr = float_or_none(f.get('Bitrate'), 1000) | ||||
|                     formats.append({ | ||||
|                         'url': f_url, | ||||
|                         'format_id': '%s-%d' % (kind, tbr) if tbr else kind, | ||||
|                         'tbr': tbr, | ||||
|         media = self._download_json( | ||||
|             'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id), | ||||
|             video_id, query={ | ||||
|                 # https://video.qbrick.com/docs/api/examples/library-api.html | ||||
|                 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|         metadata = media.get('metadata') or {} | ||||
|         title = metadata['title'] | ||||
|  | ||||
|         description = media_info.get('Description') | ||||
|         video_id = media_info.get('VideoId') or video_id | ||||
|         timestamp = parse_iso8601(media_info.get('PublishDate')) | ||||
|         thumbnails = [{ | ||||
|             'url': thumbnail['Url'], | ||||
|             'width': int_or_none(thumbnail.get('Size')), | ||||
|         } for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')] | ||||
|         duration = None | ||||
|         formats = [] | ||||
|         thumbnails = [] | ||||
|         subtitles = {} | ||||
|         for resource in media['asset']['resources']: | ||||
|             for rendition in (resource.get('renditions') or []): | ||||
|                 rendition_type = rendition.get('type') | ||||
|                 for i, link in enumerate(rendition.get('links') or []): | ||||
|                     href = link.get('href') | ||||
|                     if not href: | ||||
|                         continue | ||||
|                     if rendition_type == 'image': | ||||
|                         thumbnails.append({ | ||||
|                             'filesize': int_or_none(rendition.get('size')), | ||||
|                             'height': int_or_none(rendition.get('height')), | ||||
|                             'id': rendition.get('id'), | ||||
|                             'url': href, | ||||
|                             'width': int_or_none(rendition.get('width')), | ||||
|                         }) | ||||
|                     elif rendition_type == 'subtitle': | ||||
|                         subtitles.setdefault(rendition.get('language') or 'en', []).append({ | ||||
|                             'url': href, | ||||
|                         }) | ||||
|                     elif rendition_type == 'video': | ||||
|                         f = { | ||||
|                             'filesize': int_or_none(rendition.get('size')), | ||||
|                             'format_id': rendition.get('id'), | ||||
|                             'url': href, | ||||
|                         } | ||||
|                         video = try_get(rendition, lambda x: x['videos'][i], dict) | ||||
|                         if video: | ||||
|                             if not duration: | ||||
|                                 duration = float_or_none(video.get('duration')) | ||||
|                             f.update({ | ||||
|                                 'height': int_or_none(video.get('height')), | ||||
|                                 'tbr': int_or_none(video.get('bitrate'), 1000), | ||||
|                                 'vcodec': video.get('codec'), | ||||
|                                 'width': int_or_none(video.get('width')), | ||||
|                             }) | ||||
|                             audio = try_get(video, lambda x: x['audios'][0], dict) | ||||
|                             if audio: | ||||
|                                 f.update({ | ||||
|                                     'acodec': audio.get('codec'), | ||||
|                                     'asr': int_or_none(audio.get('sampleRate')), | ||||
|                                 }) | ||||
|                         formats.append(f) | ||||
|                     elif rendition_type == 'index': | ||||
|                         mime_type = link.get('mimeType') | ||||
|                         if mime_type == 'application/smil+xml': | ||||
|                             formats.extend(self._extract_smil_formats( | ||||
|                                 href, video_id, fatal=False)) | ||||
|                         elif mime_type == 'application/x-mpegURL': | ||||
|                             formats.extend(self._extract_m3u8_formats( | ||||
|                                 href, video_id, 'mp4', 'm3u8_native', | ||||
|                                 m3u8_id='hls', fatal=False)) | ||||
|                         elif mime_type == 'application/hds+xml': | ||||
|                             formats.extend(self._extract_f4m_formats( | ||||
|                                 href, video_id, f4m_id='hds', fatal=False)) | ||||
|                         elif mime_type == 'application/dash+xml': | ||||
|                             formats.extend(self._extract_f4m_formats( | ||||
|                                 href, video_id, f4m_id='hds', fatal=False)) | ||||
|                         elif mime_type == 'application/vnd.ms-sstr+xml': | ||||
|                             formats.extend(self._extract_ism_formats( | ||||
|                                 href, video_id, ism_id='mss', fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'is_live': is_live, | ||||
|             'description': metadata.get('description'), | ||||
|             'timestamp': parse_iso8601(media.get('created')), | ||||
|             'thumbnails': thumbnails, | ||||
|             'subtitles': subtitles, | ||||
|             'duration': duration, | ||||
|             'tags': media.get('tags'), | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -1,27 +1,91 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import functools | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .kaltura import KalturaIE | ||||
| from ..utils import extract_attributes | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
|     OnDemandPagedList, | ||||
|     parse_age_limit, | ||||
|     strip_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AsianCrushIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))' | ||||
|     _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE | ||||
| class AsianCrushBaseIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))' | ||||
|     _KALTURA_KEYS = [ | ||||
|         'video_url', 'progressive_url', 'download_url', 'thumbnail_url', | ||||
|         'widescreen_thumbnail_url', 'screencap_widescreen', | ||||
|     ] | ||||
|     _API_SUFFIX = {'retrocrush.tv': '-ott'} | ||||
|  | ||||
|     def _call_api(self, host, endpoint, video_id, query, resource): | ||||
|         return self._download_json( | ||||
|             'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id, | ||||
|             'Downloading %s JSON metadata' % resource, query=query, | ||||
|             headers=self.geo_verification_headers())['objects'] | ||||
|  | ||||
|     def _download_object_data(self, host, object_id, resource): | ||||
|         return self._call_api( | ||||
|             host, 'search', object_id, {'id': object_id}, resource)[0] | ||||
|  | ||||
|     def _get_object_description(self, obj): | ||||
|         return strip_or_none(obj.get('long_description') or obj.get('short_description')) | ||||
|  | ||||
|     def _parse_video_data(self, video): | ||||
|         title = video['name'] | ||||
|  | ||||
|         entry_id, partner_id = [None] * 2 | ||||
|         for k in self._KALTURA_KEYS: | ||||
|             k_url = video.get(k) | ||||
|             if k_url: | ||||
|                 mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url) | ||||
|                 if mobj: | ||||
|                     partner_id, entry_id = mobj.groups() | ||||
|                     break | ||||
|  | ||||
|         meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or [] | ||||
|         categories = list(filter(None, [c.get('name') for c in meta_categories])) | ||||
|  | ||||
|         show_info = video.get('show_info') or {} | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'kaltura:%s:%s' % (partner_id, entry_id), | ||||
|             'ie_key': KalturaIE.ie_key(), | ||||
|             'id': entry_id, | ||||
|             'title': title, | ||||
|             'description': self._get_object_description(video), | ||||
|             'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')), | ||||
|             'categories': categories, | ||||
|             'series': show_info.get('show_name'), | ||||
|             'season_number': int_or_none(show_info.get('season_num')), | ||||
|             'season_id': show_info.get('season_id'), | ||||
|             'episode_number': int_or_none(show_info.get('episode_num')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AsianCrushIE(AsianCrushBaseIE): | ||||
|     _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', | ||||
|         'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt', | ||||
|         'md5': 'c3b740e48d0ba002a42c0b72857beae6', | ||||
|         'info_dict': { | ||||
|             'id': '1_y4tmjm5r', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Women Who Flirt', | ||||
|             'description': 'md5:7e986615808bcfb11756eb503a751487', | ||||
|             'description': 'md5:b65c7e0ae03a85585476a62a186f924c', | ||||
|             'timestamp': 1496936429, | ||||
|             'upload_date': '20170608', | ||||
|             'uploader_id': 'craig@crifkin.com', | ||||
|             'age_limit': 13, | ||||
|             'categories': 'count:5', | ||||
|             'duration': 5812, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', | ||||
| @@ -41,67 +105,35 @@ class AsianCrushIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         host = mobj.group('host') | ||||
|         video_id = mobj.group('id') | ||||
|         host, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         if host == 'cocoro.tv': | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         entry_id, partner_id, title = [None] * 3 | ||||
|  | ||||
|         vars = self._parse_json( | ||||
|             self._search_regex( | ||||
|             embed_vars = self._parse_json(self._search_regex( | ||||
|                 r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', | ||||
|                 default='{}'), video_id, fatal=False) | ||||
|         if vars: | ||||
|             entry_id = vars.get('entry_id') | ||||
|             partner_id = vars.get('partner_id') | ||||
|             title = vars.get('vid_label') | ||||
|                 default='{}'), video_id, fatal=False) or {} | ||||
|             video_id = embed_vars.get('entry_id') or video_id | ||||
|  | ||||
|         if not entry_id: | ||||
|             entry_id = self._search_regex( | ||||
|                 r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') | ||||
|  | ||||
|         player = self._download_webpage( | ||||
|             'https://api.%s/embeddedVideoPlayer' % host, video_id, | ||||
|             query={'id': entry_id}) | ||||
|  | ||||
|         kaltura_id = self._search_regex( | ||||
|             r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player, | ||||
|             'kaltura id', group='id') | ||||
|  | ||||
|         if not partner_id: | ||||
|             partner_id = self._search_regex( | ||||
|                 r'/p(?:artner_id)?/(\d+)', player, 'partner id', | ||||
|                 default='513551') | ||||
|  | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), | ||||
|             'ie_key': KalturaIE.ie_key(), | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         } | ||||
|         video = self._download_object_data(host, video_id, 'video') | ||||
|         return self._parse_video_data(video) | ||||
|  | ||||
|  | ||||
| class AsianCrushPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE | ||||
| class AsianCrushPlaylistIE(AsianCrushBaseIE): | ||||
|     _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', | ||||
|         'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai', | ||||
|         'info_dict': { | ||||
|             'id': '12481', | ||||
|             'title': 'Scholar Who Walks the Night', | ||||
|             'description': 'md5:7addd7c5132a09fd4741152d96cce886', | ||||
|             'id': '6447', | ||||
|             'title': 'Fruity Samurai', | ||||
|             'description': 'md5:7535174487e4a202d3872a7fc8f2f154', | ||||
|         }, | ||||
|         'playlist_count': 20, | ||||
|         'playlist_count': 13, | ||||
|     }, { | ||||
|         'url': 'https://www.yuyutv.com/series/013920s/peep-show/', | ||||
|         'only_matching': True, | ||||
| @@ -111,11 +143,27 @@ class AsianCrushPlaylistIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.retrocrush.tv/series/012355s/true-tears', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _PAGE_SIZE = 1000000000 | ||||
|  | ||||
|     def _fetch_page(self, domain, parent_id, page): | ||||
|         videos = self._call_api( | ||||
|             domain, 'getreferencedobjects', parent_id, { | ||||
|                 'max': self._PAGE_SIZE, | ||||
|                 'object_type': 'video', | ||||
|                 'parent_id': parent_id, | ||||
|                 'start': page * self._PAGE_SIZE, | ||||
|             }, 'page %d' % (page + 1)) | ||||
|         for video in videos: | ||||
|             yield self._parse_video_data(video) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         host, playlist_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         if host == 'cocoro.tv': | ||||
|             webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|             entries = [] | ||||
| @@ -141,5 +189,12 @@ class AsianCrushPlaylistIE(InfoExtractor): | ||||
|             description = self._og_search_description( | ||||
|                 webpage, default=None) or self._html_search_meta( | ||||
|                 'twitter:description', webpage, 'description', fatal=False) | ||||
|         else: | ||||
|             show = self._download_object_data(host, playlist_id, 'show') | ||||
|             title = show.get('name') | ||||
|             description = self._get_object_description(show) | ||||
|             entries = OnDemandPagedList( | ||||
|                 functools.partial(self._fetch_page, host, playlist_id), | ||||
|                 self._PAGE_SIZE) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
|   | ||||
| @@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor): | ||||
|     _LOGIN_URL = 'https://account.bbc.com/signin' | ||||
|     _NETRC_MACHINE = 'bbc' | ||||
|  | ||||
|     _MEDIASELECTOR_URLS = [ | ||||
|     _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s' | ||||
|     _MEDIA_SETS = [ | ||||
|         # Provides HQ HLS streams with even better quality that pc mediaset but fails | ||||
|         # with geolocation in some cases when it's even not geo restricted at all (e.g. | ||||
|         # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable. | ||||
|         'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s', | ||||
|         'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s', | ||||
|         'iptv-all', | ||||
|         'pc', | ||||
|     ] | ||||
|  | ||||
|     _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection' | ||||
|     _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist' | ||||
|  | ||||
|     _NAMESPACES = ( | ||||
|         _MEDIASELECTION_NS, | ||||
|         _EMP_PLAYLIST_NS, | ||||
|     ) | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.bbc.co.uk/programmes/b039g8p7', | ||||
| @@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor): | ||||
|             'only_matching': True, | ||||
|         }] | ||||
|  | ||||
|     _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
| @@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor): | ||||
|     def _extract_items(self, playlist): | ||||
|         return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) | ||||
|  | ||||
|     def _findall_ns(self, element, xpath): | ||||
|         elements = [] | ||||
|         for ns in self._NAMESPACES: | ||||
|             elements.extend(element.findall(xpath % ns)) | ||||
|         return elements | ||||
|  | ||||
|     def _extract_medias(self, media_selection): | ||||
|         error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS) | ||||
|         if error is None: | ||||
|             media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS) | ||||
|         if error is not None: | ||||
|             raise BBCCoUkIE.MediaSelectionError(error.get('id')) | ||||
|         return self._findall_ns(media_selection, './{%s}media') | ||||
|         error = media_selection.get('result') | ||||
|         if error: | ||||
|             raise BBCCoUkIE.MediaSelectionError(error) | ||||
|         return media_selection.get('media') or [] | ||||
|  | ||||
|     def _extract_connections(self, media): | ||||
|         return self._findall_ns(media, './{%s}connection') | ||||
|         return media.get('connection') or [] | ||||
|  | ||||
|     def _get_subtitles(self, media, programme_id): | ||||
|         subtitles = {} | ||||
| @@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                 cc_url, programme_id, 'Downloading captions', fatal=False) | ||||
|             if not isinstance(captions, compat_etree_Element): | ||||
|                 continue | ||||
|             lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') | ||||
|             subtitles[lang] = [ | ||||
|             subtitles['en'] = [ | ||||
|                 { | ||||
|                     'url': connection.get('href'), | ||||
|                     'ext': 'ttml', | ||||
|                 }, | ||||
|             ] | ||||
|             break | ||||
|         return subtitles | ||||
|  | ||||
|     def _raise_extractor_error(self, media_selection_error): | ||||
| @@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor): | ||||
|  | ||||
|     def _download_media_selector(self, programme_id): | ||||
|         last_exception = None | ||||
|         for mediaselector_url in self._MEDIASELECTOR_URLS: | ||||
|         for media_set in self._MEDIA_SETS: | ||||
|             try: | ||||
|                 return self._download_media_selector_url( | ||||
|                     mediaselector_url % programme_id, programme_id) | ||||
|                     self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id) | ||||
|             except BBCCoUkIE.MediaSelectionError as e: | ||||
|                 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): | ||||
|                     last_exception = e | ||||
| @@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor): | ||||
|         self._raise_extractor_error(last_exception) | ||||
|  | ||||
|     def _download_media_selector_url(self, url, programme_id=None): | ||||
|         media_selection = self._download_xml( | ||||
|             url, programme_id, 'Downloading media selection XML', | ||||
|         media_selection = self._download_json( | ||||
|             url, programme_id, 'Downloading media selection JSON', | ||||
|             expected_status=(403, 404)) | ||||
|         return self._process_media_selector(media_selection, programme_id) | ||||
|  | ||||
| @@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor): | ||||
|             if kind in ('video', 'audio'): | ||||
|                 bitrate = int_or_none(media.get('bitrate')) | ||||
|                 encoding = media.get('encoding') | ||||
|                 service = media.get('service') | ||||
|                 width = int_or_none(media.get('width')) | ||||
|                 height = int_or_none(media.get('height')) | ||||
|                 file_size = int_or_none(media.get('media_file_size')) | ||||
| @@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                     supplier = connection.get('supplier') | ||||
|                     transfer_format = connection.get('transferFormat') | ||||
|                     format_id = supplier or conn_kind or protocol | ||||
|                     if service: | ||||
|                         format_id = '%s_%s' % (service, format_id) | ||||
|                     # ASX playlist | ||||
|                     if supplier == 'asx': | ||||
|                         for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): | ||||
| @@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             href, programme_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                             m3u8_id=format_id, fatal=False)) | ||||
|                         if re.search(self._USP_RE, href): | ||||
|                             usp_formats = self._extract_m3u8_formats( | ||||
|                                 re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), | ||||
|                                 programme_id, ext='mp4', entry_protocol='m3u8_native', | ||||
|                                 m3u8_id=format_id, fatal=False) | ||||
|                             for f in usp_formats: | ||||
|                                 if f.get('height') and f['height'] > 720: | ||||
|                                     continue | ||||
|                                 formats.append(f) | ||||
|                     elif transfer_format == 'hds': | ||||
|                         formats.extend(self._extract_f4m_formats( | ||||
|                             href, programme_id, f4m_id=format_id, fatal=False)) | ||||
|                     else: | ||||
|                         if not service and not supplier and bitrate: | ||||
|                         if not supplier and bitrate: | ||||
|                             format_id += '-%d' % bitrate | ||||
|                         fmt = { | ||||
|                             'format_id': format_id, | ||||
| @@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, group_id, 'Downloading video page') | ||||
|  | ||||
|         error = self._search_regex( | ||||
|             r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<', | ||||
|             r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<', | ||||
|             webpage, 'error', default=None) | ||||
|         if error: | ||||
|             raise ExtractorError(error, expected=True) | ||||
| @@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE): | ||||
|     IE_DESC = 'BBC' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)' | ||||
|  | ||||
|     _MEDIASELECTOR_URLS = [ | ||||
|         # Provides HQ HLS streams but fails with geolocation in some cases when it's | ||||
|         # even not geo restricted at all | ||||
|         'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s', | ||||
|         # Provides more formats, namely direct mp4 links, but fails on some videos with | ||||
|         # notukerror for non UK (?) users (e.g. | ||||
|         # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) | ||||
|         'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s', | ||||
|         # Provides fewer formats, but works everywhere for everybody (hopefully) | ||||
|         'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s', | ||||
|     _MEDIA_SETS = [ | ||||
|         'mobile-tablet-main', | ||||
|         'pc', | ||||
|     ] | ||||
|  | ||||
|     _TESTS = [{ | ||||
|   | ||||
| @@ -1,194 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
|     compat_str, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BeamProBaseIE(InfoExtractor): | ||||
|     _API_BASE = 'https://mixer.com/api/v1' | ||||
|     _RATINGS = {'family': 0, 'teen': 13, '18+': 18} | ||||
|  | ||||
|     def _extract_channel_info(self, chan): | ||||
|         user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id']) | ||||
|         return { | ||||
|             'uploader': chan.get('token') or try_get( | ||||
|                 chan, lambda x: x['user']['username'], compat_str), | ||||
|             'uploader_id': compat_str(user_id) if user_id else None, | ||||
|             'age_limit': self._RATINGS.get(chan.get('audience')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class BeamProLiveIE(BeamProBaseIE): | ||||
|     IE_NAME = 'Mixer:live' | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://mixer.com/niterhayven', | ||||
|         'info_dict': { | ||||
|             'id': '261562', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Introducing The Witcher 3 //  The Grind Starts Now!', | ||||
|             'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', | ||||
|             'thumbnail': r're:https://.*\.jpg$', | ||||
|             'timestamp': 1483477281, | ||||
|             'upload_date': '20170103', | ||||
|             'uploader': 'niterhayven', | ||||
|             'uploader_id': '373396', | ||||
|             'age_limit': 18, | ||||
|             'is_live': True, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|         'skip': 'niterhayven is offline', | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_name = self._match_id(url) | ||||
|  | ||||
|         chan = self._download_json( | ||||
|             '%s/channels/%s' % (self._API_BASE, channel_name), channel_name) | ||||
|  | ||||
|         if chan.get('online') is False: | ||||
|             raise ExtractorError( | ||||
|                 '{0} is offline'.format(channel_name), expected=True) | ||||
|  | ||||
|         channel_id = chan['id'] | ||||
|  | ||||
|         def manifest_url(kind): | ||||
|             return self._MANIFEST_URL_TEMPLATE % (channel_id, kind) | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls', | ||||
|             fatal=False) | ||||
|         formats.extend(self._extract_smil_formats( | ||||
|             manifest_url('smil'), channel_name, fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = { | ||||
|             'id': compat_str(chan.get('id') or channel_name), | ||||
|             'title': self._live_title(chan.get('name') or channel_name), | ||||
|             'description': clean_html(chan.get('description')), | ||||
|             'thumbnail': try_get( | ||||
|                 chan, lambda x: x['thumbnail']['url'], compat_str), | ||||
|             'timestamp': parse_iso8601(chan.get('updatedAt')), | ||||
|             'is_live': True, | ||||
|             'view_count': int_or_none(chan.get('viewersTotal')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|         info.update(self._extract_channel_info(chan)) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class BeamProVodIE(BeamProBaseIE): | ||||
|     IE_NAME = 'Mixer:vod' | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://mixer.com/willow8714?vod=2259830', | ||||
|         'md5': 'b2431e6e8347dc92ebafb565d368b76b', | ||||
|         'info_dict': { | ||||
|             'id': '2259830', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'willow8714\'s Channel', | ||||
|             'duration': 6828.15, | ||||
|             'thumbnail': r're:https://.*source\.png$', | ||||
|             'timestamp': 1494046474, | ||||
|             'upload_date': '20170506', | ||||
|             'uploader': 'willow8714', | ||||
|             'uploader_id': '6085379', | ||||
|             'age_limit': 13, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_format(vod, vod_type): | ||||
|         if not vod.get('baseUrl'): | ||||
|             return [] | ||||
|  | ||||
|         if vod_type == 'hls': | ||||
|             filename, protocol = 'manifest.m3u8', 'm3u8_native' | ||||
|         elif vod_type == 'raw': | ||||
|             filename, protocol = 'source.mp4', 'https' | ||||
|         else: | ||||
|             assert False | ||||
|  | ||||
|         data = vod.get('data') if isinstance(vod.get('data'), dict) else {} | ||||
|  | ||||
|         format_id = [vod_type] | ||||
|         if isinstance(data.get('Height'), compat_str): | ||||
|             format_id.append('%sp' % data['Height']) | ||||
|  | ||||
|         return [{ | ||||
|             'url': urljoin(vod['baseUrl'], filename), | ||||
|             'format_id': '-'.join(format_id), | ||||
|             'ext': 'mp4', | ||||
|             'protocol': protocol, | ||||
|             'width': int_or_none(data.get('Width')), | ||||
|             'height': int_or_none(data.get('Height')), | ||||
|             'fps': int_or_none(data.get('Fps')), | ||||
|             'tbr': int_or_none(data.get('Bitrate'), 1000), | ||||
|         }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         vod_id = self._match_id(url) | ||||
|  | ||||
|         vod_info = self._download_json( | ||||
|             '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id) | ||||
|  | ||||
|         state = vod_info.get('state') | ||||
|         if state != 'AVAILABLE': | ||||
|             raise ExtractorError( | ||||
|                 'VOD %s is not available (state: %s)' % (vod_id, state), | ||||
|                 expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         thumbnail_url = None | ||||
|  | ||||
|         for vod in vod_info['vods']: | ||||
|             vod_type = vod.get('format') | ||||
|             if vod_type in ('hls', 'raw'): | ||||
|                 formats.extend(self._extract_format(vod, vod_type)) | ||||
|             elif vod_type == 'thumbnail': | ||||
|                 thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png') | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = { | ||||
|             'id': vod_id, | ||||
|             'title': vod_info.get('name') or vod_id, | ||||
|             'duration': float_or_none(vod_info.get('duration')), | ||||
|             'thumbnail': thumbnail_url, | ||||
|             'timestamp': parse_iso8601(vod_info.get('createdAt')), | ||||
|             'view_count': int_or_none(vod_info.get('viewsTotal')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|         info.update(self._extract_channel_info(vod_info.get('channel') or {})) | ||||
|  | ||||
|         return info | ||||
							
								
								
									
										103
									
								
								youtube_dl/extractor/bfmtv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								youtube_dl/extractor/bfmtv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import extract_attributes | ||||
|  | ||||
|  | ||||
| class BFMTVBaseIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/' | ||||
|     _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html' | ||||
|     _VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)' | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' | ||||
|  | ||||
|     def _brightcove_url_result(self, video_id, video_block): | ||||
|         account_id = video_block.get('accountid') or '876450612001' | ||||
|         player_id = video_block.get('playerid') or 'I2qBTln4u' | ||||
|         return self.url_result( | ||||
|             self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), | ||||
|             'BrightcoveNew', video_id) | ||||
|  | ||||
|  | ||||
| class BFMTVIE(BFMTVBaseIE): | ||||
|     IE_NAME = 'bfmtv' | ||||
|     _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html', | ||||
|         'info_dict': { | ||||
|             'id': '6196747868001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"', | ||||
|             'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.', | ||||
|             'uploader_id': '876450610001', | ||||
|             'upload_date': '20201002', | ||||
|             'timestamp': 1601629620, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         bfmtv_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, bfmtv_id) | ||||
|         video_block = extract_attributes(self._search_regex( | ||||
|             self._VIDEO_BLOCK_REGEX, webpage, 'video block')) | ||||
|         return self._brightcove_url_result(video_block['videoid'], video_block) | ||||
|  | ||||
|  | ||||
| class BFMTVLiveIE(BFMTVIE): | ||||
|     IE_NAME = 'bfmtv:live' | ||||
|     _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bfmtv.com/en-direct/', | ||||
|         'info_dict': { | ||||
|             'id': '5615950982001', | ||||
|             'ext': 'mp4', | ||||
|             'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | ||||
|             'uploader_id': '876450610001', | ||||
|             'upload_date': '20171018', | ||||
|             'timestamp': 1508329950, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.bfmtv.com/economie/en-direct/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|  | ||||
| class BFMTVArticleIE(BFMTVBaseIE): | ||||
|     IE_NAME = 'bfmtv:article' | ||||
|     _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html', | ||||
|         'info_dict': { | ||||
|             'id': '202101060198', | ||||
|             'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"', | ||||
|             'description': 'md5:947974089c303d3ac6196670ae262843', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|     }, { | ||||
|         'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         bfmtv_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, bfmtv_id) | ||||
|  | ||||
|         entries = [] | ||||
|         for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage): | ||||
|             video_block = extract_attributes(video_block_el) | ||||
|             video_id = video_block.get('videoid') | ||||
|             if not video_id: | ||||
|                 continue | ||||
|             entries.append(self._brightcove_url_result(video_id, video_block)) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, bfmtv_id, self._og_search_title(webpage, fatal=False), | ||||
|             self._html_search_meta(['og:description', 'description'], webpage)) | ||||
							
								
								
									
										30
									
								
								youtube_dl/extractor/bibeltv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								youtube_dl/extractor/bibeltv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BibelTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch', | ||||
|         'md5': '252f908192d611de038b8504b08bf97f', | ||||
|         'info_dict': { | ||||
|             'id': 'ref:329703', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sprachkurs in Malaiisch', | ||||
|             'description': 'md5:3e9f197d29ee164714e67351cf737dfe', | ||||
|             'timestamp': 1608316701, | ||||
|             'uploader_id': '5840105145001', | ||||
|             'upload_date': '20201218', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         crn_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew') | ||||
							
								
								
									
										60
									
								
								youtube_dl/extractor/bongacams.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								youtube_dl/extractor/bongacams.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     try_get, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BongaCamsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://de.bongacams.com/azumi-8', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://cn.bongacams.com/azumi-8', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         host = mobj.group('host') | ||||
|         channel_id = mobj.group('id') | ||||
|  | ||||
|         amf = self._download_json( | ||||
|             'https://%s/tools/amf.php' % host, channel_id, | ||||
|             data=urlencode_postdata(( | ||||
|                 ('method', 'getRoomData'), | ||||
|                 ('args[]', channel_id), | ||||
|                 ('args[]', 'false'), | ||||
|             )), headers={'X-Requested-With': 'XMLHttpRequest'}) | ||||
|  | ||||
|         server_url = amf['localData']['videoServerUrl'] | ||||
|  | ||||
|         uploader_id = try_get( | ||||
|             amf, lambda x: x['performerData']['username'], compat_str) or channel_id | ||||
|         uploader = try_get( | ||||
|             amf, lambda x: x['performerData']['displayName'], compat_str) | ||||
|         like_count = int_or_none(try_get( | ||||
|             amf, lambda x: x['performerData']['loversCount'])) | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), | ||||
|             channel_id, 'mp4', m3u8_id='hls', live=True) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': channel_id, | ||||
|             'title': self._live_title(uploader or uploader_id), | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'like_count': like_count, | ||||
|             'age_limit': 18, | ||||
|             'is_live': True, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -28,6 +28,7 @@ from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     smuggle_url, | ||||
|     str_or_none, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
|     unsmuggle_url, | ||||
|     UnsupportedError, | ||||
| @@ -470,13 +471,18 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|     def _parse_brightcove_metadata(self, json_data, video_id, headers={}): | ||||
|         title = json_data['name'].strip() | ||||
|  | ||||
|         num_drm_sources = 0 | ||||
|         formats = [] | ||||
|         for source in json_data.get('sources', []): | ||||
|         sources = json_data.get('sources') or [] | ||||
|         for source in sources: | ||||
|             container = source.get('container') | ||||
|             ext = mimetype2ext(source.get('type')) | ||||
|             src = source.get('src') | ||||
|             # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object | ||||
|             if ext == 'ism' or container == 'WVM' or source.get('key_systems'): | ||||
|             if container == 'WVM' or source.get('key_systems'): | ||||
|                 num_drm_sources += 1 | ||||
|                 continue | ||||
|             elif ext == 'ism': | ||||
|                 continue | ||||
|             elif ext == 'm3u8' or container == 'M2TS': | ||||
|                 if not src: | ||||
| @@ -533,20 +539,15 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|                         'format_id': build_format_id('rtmp'), | ||||
|                     }) | ||||
|                 formats.append(f) | ||||
|         if not formats: | ||||
|             # for sonyliv.com DRM protected videos | ||||
|             s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl') | ||||
|             if s3_source_url: | ||||
|                 formats.append({ | ||||
|                     'url': s3_source_url, | ||||
|                     'format_id': 'source', | ||||
|                 }) | ||||
|  | ||||
|         if not formats: | ||||
|             errors = json_data.get('errors') | ||||
|         if not formats and errors: | ||||
|             if errors: | ||||
|                 error = errors[0] | ||||
|                 raise ExtractorError( | ||||
|                     error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) | ||||
|             if sources and num_drm_sources == len(sources): | ||||
|                 raise ExtractorError('This video is DRM protected.', expected=True) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -600,11 +601,14 @@ class BrightcoveNewIE(AdobePassIE): | ||||
|         store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) | ||||
|  | ||||
|         def extract_policy_key(): | ||||
|             base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed) | ||||
|             config = self._download_json( | ||||
|                 base_url + 'config.json', video_id, fatal=False) or {} | ||||
|             policy_key = try_get( | ||||
|                 config, lambda x: x['video_cloud']['policy_key']) | ||||
|             if not policy_key: | ||||
|                 webpage = self._download_webpage( | ||||
|                 'http://players.brightcove.net/%s/%s_%s/index.min.js' | ||||
|                 % (account_id, player_id, embed), video_id) | ||||
|  | ||||
|             policy_key = None | ||||
|                     base_url + 'index.min.js', video_id) | ||||
|  | ||||
|                 catalog = self._search_regex( | ||||
|                     r'catalog\(({.+?})\);', webpage, 'catalog', default=None) | ||||
|   | ||||
| @@ -7,12 +7,12 @@ from .common import InfoExtractor | ||||
| from .gigya import GigyaBaseIE | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     ExtractorError, | ||||
|     strip_or_none, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     merge_dicts, | ||||
|     parse_iso8601, | ||||
|     str_or_none, | ||||
|     url_or_none, | ||||
| ) | ||||
| @@ -37,6 +37,7 @@ class CanvasIE(InfoExtractor): | ||||
|         'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _GEO_BYPASS = False | ||||
|     _HLS_ENTRY_PROTOCOLS_MAP = { | ||||
|         'HLS': 'm3u8_native', | ||||
|         'HLS_AES': 'm3u8', | ||||
| @@ -47,6 +48,8 @@ class CanvasIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site_id, video_id = mobj.group('site_id'), mobj.group('id') | ||||
|  | ||||
|         data = None | ||||
|         if site_id != 'vrtvideo': | ||||
|             # Old API endpoint, serves more formats but may fail for some videos | ||||
|             data = self._download_json( | ||||
|                 'https://mediazone.vrt.be/api/v1/%s/assets/%s' | ||||
| @@ -55,21 +58,24 @@ class CanvasIE(InfoExtractor): | ||||
|  | ||||
|         # New API endpoint | ||||
|         if not data: | ||||
|             headers = self.geo_verification_headers() | ||||
|             headers.update({'Content-Type': 'application/json'}) | ||||
|             token = self._download_json( | ||||
|                 '%s/tokens' % self._REST_API_BASE, video_id, | ||||
|                 'Downloading token', data=b'', | ||||
|                 headers={'Content-Type': 'application/json'})['vrtPlayerToken'] | ||||
|                 'Downloading token', data=b'', headers=headers)['vrtPlayerToken'] | ||||
|             data = self._download_json( | ||||
|                 '%s/videos/%s' % (self._REST_API_BASE, video_id), | ||||
|                 video_id, 'Downloading video JSON', fatal=False, query={ | ||||
|                 video_id, 'Downloading video JSON', query={ | ||||
|                     'vrtPlayerToken': token, | ||||
|                     'client': '%s@PROD' % site_id, | ||||
|                 }, expected_status=400) | ||||
|             message = data.get('message') | ||||
|             if message and not data.get('title'): | ||||
|                 if data.get('code') == 'AUTHENTICATION_REQUIRED': | ||||
|                     self.raise_login_required(message) | ||||
|                 raise ExtractorError(message, expected=True) | ||||
|             if not data.get('title'): | ||||
|                 code = data.get('code') | ||||
|                 if code == 'AUTHENTICATION_REQUIRED': | ||||
|                     self.raise_login_required() | ||||
|                 elif code == 'INVALID_LOCATION': | ||||
|                     self.raise_geo_restricted(countries=['BE']) | ||||
|                 raise ExtractorError(data.get('message') or code, expected=True) | ||||
|  | ||||
|         title = data['title'] | ||||
|         description = data.get('description') | ||||
| @@ -205,20 +211,24 @@ class CanvasEenIE(InfoExtractor): | ||||
|  | ||||
| class VrtNUIE(GigyaBaseIE): | ||||
|     IE_DESC = 'VrtNU.be' | ||||
|     _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         # Available via old API endpoint | ||||
|         'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', | ||||
|         'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/', | ||||
|         'info_dict': { | ||||
|             'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', | ||||
|             'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'De zwarte weduwe', | ||||
|             'description': 'md5:db1227b0f318c849ba5eab1fef895ee4', | ||||
|             'title': 'Postbus X - Aflevering 1 (Seizoen 1989)', | ||||
|             'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7', | ||||
|             'duration': 1457.04, | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'season': 'Season 1', | ||||
|             'season_number': 1, | ||||
|             'series': 'Postbus X', | ||||
|             'season': 'Seizoen 1989', | ||||
|             'season_number': 1989, | ||||
|             'episode': 'De zwarte weduwe', | ||||
|             'episode_number': 1, | ||||
|             'timestamp': 1595822400, | ||||
|             'upload_date': '20200727', | ||||
|         }, | ||||
|         'skip': 'This video is only available for registered users', | ||||
|         'params': { | ||||
| @@ -300,69 +310,25 @@ class VrtNUIE(GigyaBaseIE): | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage, urlh = self._download_webpage_handle(url, display_id) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         attrs = extract_attributes(self._search_regex( | ||||
|             r'(<nui-media[^>]+>)', webpage, 'media element')) | ||||
|         video_id = attrs['videoid'] | ||||
|         publication_id = attrs.get('publicationid') | ||||
|         if publication_id: | ||||
|             video_id = publication_id + '$' + video_id | ||||
|  | ||||
|         page = (self._parse_json(self._search_regex( | ||||
|             r'digitalData\s*=\s*({.+?});', webpage, 'digial data', | ||||
|             default='{}'), video_id, fatal=False) or {}).get('page') or {} | ||||
|  | ||||
|         info = self._search_json_ld(webpage, display_id, default={}) | ||||
|  | ||||
|         # title is optional here since it may be extracted by extractor | ||||
|         # that is delegated from here | ||||
|         title = strip_or_none(self._html_search_regex( | ||||
|             r'(?ms)<h1 class="content__heading">(.+?)</h1>', | ||||
|             webpage, 'title', default=None)) | ||||
|  | ||||
|         description = self._html_search_regex( | ||||
|             r'(?ms)<div class="content__description">(.+?)</div>', | ||||
|             webpage, 'description', default=None) | ||||
|  | ||||
|         season = self._html_search_regex( | ||||
|             [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s* | ||||
|                     <span>seizoen\ (.+?)</span>\s* | ||||
|                 </div>''', | ||||
|              r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'], | ||||
|             webpage, 'season', default=None) | ||||
|  | ||||
|         season_number = int_or_none(season) | ||||
|  | ||||
|         episode_number = int_or_none(self._html_search_regex( | ||||
|             r'''(?xms)<div\ class="content__episode">\s* | ||||
|                     <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span> | ||||
|                 </div>''', | ||||
|             webpage, 'episode_number', default=None)) | ||||
|  | ||||
|         release_date = parse_iso8601(self._html_search_regex( | ||||
|             r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"', | ||||
|             webpage, 'release_date', default=None)) | ||||
|  | ||||
|         # If there's a ? or a # in the URL, remove them and everything after | ||||
|         clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/') | ||||
|         securevideo_url = clean_url + '.mssecurevideo.json' | ||||
|  | ||||
|         try: | ||||
|             video = self._download_json(securevideo_url, display_id) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | ||||
|                 self.raise_login_required() | ||||
|             raise | ||||
|  | ||||
|         # We are dealing with a '../<show>.relevant' URL | ||||
|         redirect_url = video.get('url') | ||||
|         if redirect_url: | ||||
|             return self.url_result(self._proto_relative_url(redirect_url, 'https:')) | ||||
|  | ||||
|         # There is only one entry, but with an unknown key, so just get | ||||
|         # the first one | ||||
|         video_id = list(video.values())[0].get('videoid') | ||||
|  | ||||
|         return merge_dicts(info, { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, | ||||
|             'ie_key': CanvasIE.ie_key(), | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'season': season, | ||||
|             'season_number': season_number, | ||||
|             'episode_number': episode_number, | ||||
|             'release_date': release_date, | ||||
|             'season_number': int_or_none(page.get('episode_season')), | ||||
|         }) | ||||
|   | ||||
| @@ -11,7 +11,47 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class CBSLocalIE(AnvatoIE): | ||||
|     _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)' | ||||
|     _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/' | ||||
|     _VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', | ||||
|         'info_dict': { | ||||
|             'id': '3580809', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'A Very Blue Anniversary', | ||||
|             'description': 'CBS2’s Cindy Hsu has more.', | ||||
|             'thumbnail': 're:^https?://.*', | ||||
|             'timestamp': int, | ||||
|             'upload_date': r're:^\d{8}$', | ||||
|             'uploader': 'CBS', | ||||
|             'subtitles': { | ||||
|                 'en': 'mincount:5', | ||||
|             }, | ||||
|             'categories': [ | ||||
|                 'Stations\\Spoken Word\\WCBSTV', | ||||
|                 'Syndication\\AOL', | ||||
|                 'Syndication\\MSN', | ||||
|                 'Syndication\\NDN', | ||||
|                 'Syndication\\Yahoo', | ||||
|                 'Content\\News', | ||||
|                 'Content\\News\\Local News', | ||||
|             ], | ||||
|             'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mcp_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id) | ||||
|  | ||||
|  | ||||
| class CBSLocalArticleIE(AnvatoIE): | ||||
|     _VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # Anvato backend | ||||
| @@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', | ||||
|         'info_dict': { | ||||
|             'id': '3580809', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'A Very Blue Anniversary', | ||||
|             'description': 'CBS2’s Cindy Hsu has more.', | ||||
|             'thumbnail': 're:^https?://.*', | ||||
|             'timestamp': int, | ||||
|             'upload_date': r're:^\d{8}$', | ||||
|             'uploader': 'CBS', | ||||
|             'subtitles': { | ||||
|                 'en': 'mincount:5', | ||||
|             }, | ||||
|             'categories': [ | ||||
|                 'Stations\\Spoken Word\\WCBSTV', | ||||
|                 'Syndication\\AOL', | ||||
|                 'Syndication\\MSN', | ||||
|                 'Syndication\\NDN', | ||||
|                 'Syndication\\Yahoo', | ||||
|                 'Content\\News', | ||||
|                 'Content\\News\\Local News', | ||||
|             ], | ||||
|             'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -96,7 +96,10 @@ class CNNIE(TurnerBaseIE): | ||||
|             config['data_src'] % path, page_title, { | ||||
|                 'default': { | ||||
|                     'media_src': config['media_src'], | ||||
|                 } | ||||
|                 }, | ||||
|                 'f4m': { | ||||
|                     'host': 'cnn-vh.akamaihd.net', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -336,8 +336,8 @@ class InfoExtractor(object): | ||||
|     object, each element of which is a valid dictionary by this specification. | ||||
|  | ||||
|     Additionally, playlists can have "id", "title", "description", "uploader", | ||||
|     "uploader_id", "uploader_url" attributes with the same semantics as videos | ||||
|     (see above). | ||||
|     "uploader_id", "uploader_url", "duration" attributes with the same semantics | ||||
|     as videos (see above). | ||||
|  | ||||
|  | ||||
|     _type "multi_video" indicates that there are multiple videos that | ||||
| @@ -1237,8 +1237,16 @@ class InfoExtractor(object): | ||||
|             'ViewAction': 'view', | ||||
|         } | ||||
|  | ||||
|         def extract_interaction_type(e): | ||||
|             interaction_type = e.get('interactionType') | ||||
|             if isinstance(interaction_type, dict): | ||||
|                 interaction_type = interaction_type.get('@type') | ||||
|             return str_or_none(interaction_type) | ||||
|  | ||||
|         def extract_interaction_statistic(e): | ||||
|             interaction_statistic = e.get('interactionStatistic') | ||||
|             if isinstance(interaction_statistic, dict): | ||||
|                 interaction_statistic = [interaction_statistic] | ||||
|             if not isinstance(interaction_statistic, list): | ||||
|                 return | ||||
|             for is_e in interaction_statistic: | ||||
| @@ -1246,8 +1254,8 @@ class InfoExtractor(object): | ||||
|                     continue | ||||
|                 if is_e.get('@type') != 'InteractionCounter': | ||||
|                     continue | ||||
|                 interaction_type = is_e.get('interactionType') | ||||
|                 if not isinstance(interaction_type, compat_str): | ||||
|                 interaction_type = extract_interaction_type(is_e) | ||||
|                 if not interaction_type: | ||||
|                     continue | ||||
|                 # For interaction count some sites provide string instead of | ||||
|                 # an integer (as per spec) with non digit characters (e.g. ",") | ||||
| @@ -2513,16 +2521,18 @@ class InfoExtractor(object): | ||||
|         # amp-video and amp-audio are very similar to their HTML5 counterparts | ||||
|         # so we wll include them right here (see | ||||
|         # https://www.ampproject.org/docs/reference/components/amp-video) | ||||
|         media_tags = [(media_tag, media_type, '') | ||||
|                       for media_tag, media_type | ||||
|                       in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)] | ||||
|         # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/ | ||||
|         _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)' | ||||
|         media_tags = [(media_tag, media_tag_name, media_type, '') | ||||
|                       for media_tag, media_tag_name, media_type | ||||
|                       in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)] | ||||
|         media_tags.extend(re.findall( | ||||
|             # We only allow video|audio followed by a whitespace or '>'. | ||||
|             # Allowing more characters may end up in significant slow down (see | ||||
|             # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL: | ||||
|             # http://www.porntrex.com/maps/videositemap.xml). | ||||
|             r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage)) | ||||
|         for media_tag, media_type, media_content in media_tags: | ||||
|             r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage)) | ||||
|         for media_tag, _, media_type, media_content in media_tags: | ||||
|             media_info = { | ||||
|                 'formats': [], | ||||
|                 'subtitles': {}, | ||||
| @@ -2595,6 +2605,13 @@ class InfoExtractor(object): | ||||
|         return entries | ||||
|  | ||||
|     def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): | ||||
|         signed = 'hdnea=' in manifest_url | ||||
|         if not signed: | ||||
|             # https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html | ||||
|             manifest_url = re.sub( | ||||
|                 r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?', | ||||
|                 '', manifest_url).strip('?') | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         hdcore_sign = 'hdcore=3.7.0' | ||||
| @@ -2620,7 +2637,7 @@ class InfoExtractor(object): | ||||
|         formats.extend(m3u8_formats) | ||||
|  | ||||
|         http_host = hosts.get('http') | ||||
|         if http_host and m3u8_formats and 'hdnea=' not in m3u8_url: | ||||
|         if http_host and m3u8_formats and not signed: | ||||
|             REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+' | ||||
|             qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',') | ||||
|             qualities_length = len(qualities) | ||||
|   | ||||
| @@ -8,11 +8,14 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     extract_attributes, | ||||
|     find_xpath_attr, | ||||
|     get_element_by_attribute, | ||||
|     get_element_by_class, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     merge_dicts, | ||||
|     parse_iso8601, | ||||
|     smuggle_url, | ||||
|     str_to_int, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| from .senateisvp import SenateISVPIE | ||||
| @@ -116,8 +119,30 @@ class CSpanIE(InfoExtractor): | ||||
|                 jwsetup, video_id, require_title=False, m3u8_id='hls', | ||||
|                 base_url=url) | ||||
|             add_referer(info['formats']) | ||||
|             for subtitles in info['subtitles'].values(): | ||||
|                 for subtitle in subtitles: | ||||
|                     ext = determine_ext(subtitle['url']) | ||||
|                     if ext == 'php': | ||||
|                         ext = 'vtt' | ||||
|                     subtitle['ext'] = ext | ||||
|             ld_info = self._search_json_ld(webpage, video_id, default={}) | ||||
|             return merge_dicts(info, ld_info) | ||||
|             title = get_element_by_class('video-page-title', webpage) or \ | ||||
|                 self._og_search_title(webpage) | ||||
|             description = get_element_by_attribute('itemprop', 'description', webpage) or \ | ||||
|                 self._html_search_meta(['og:description', 'description'], webpage) | ||||
|             return merge_dicts(info, ld_info, { | ||||
|                 'title': title, | ||||
|                 'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage), | ||||
|                 'description': description, | ||||
|                 'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)), | ||||
|                 'location': get_element_by_attribute('itemprop', 'contentLocation', webpage), | ||||
|                 'duration': int_or_none(self._search_regex( | ||||
|                     r'jwsetup\.seclength\s*=\s*(\d+);', | ||||
|                     webpage, 'duration', fatal=False)), | ||||
|                 'view_count': str_to_int(self._search_regex( | ||||
|                     r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>", | ||||
|                     webpage, 'views', fatal=False)), | ||||
|             }) | ||||
|  | ||||
|         # Obsolete | ||||
|         # We first look for clipid, because clipprog always appears before | ||||
|   | ||||
							
								
								
									
										52
									
								
								youtube_dl/extractor/ctv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								youtube_dl/extractor/ctv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88', | ||||
|         'info_dict': { | ||||
|             'id': '2102249', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Wednesday, December 23, 2020', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.', | ||||
|             'timestamp': 1608732000, | ||||
|             'upload_date': '20201223', | ||||
|             'series': 'Your Morning', | ||||
|             'season': '2020-2021', | ||||
|             'season_number': 5, | ||||
|             'episode_number': 88, | ||||
|             'tags': ['Your Morning'], | ||||
|             'categories': ['Talk Show'], | ||||
|             'duration': 7467.126, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         content = self._download_json( | ||||
|             'https://www.ctv.ca/space-graphql/graphql', display_id, query={ | ||||
|                 'query': '''{ | ||||
|   resolvedPath(path: "/%s") { | ||||
|     lastSegment { | ||||
|       content { | ||||
|         ... on AxisContent { | ||||
|           axisId | ||||
|           videoPlayerDestCode | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }''' % display_id, | ||||
|             })['data']['resolvedPath']['lastSegment']['content'] | ||||
|         video_id = content['axisId'] | ||||
|         return self.url_result( | ||||
|             '9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id), | ||||
|             'NineCNineMedia', video_id) | ||||
| @@ -17,7 +17,12 @@ from ..utils import ( | ||||
| class DPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (?P<domain> | ||||
|             (?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))| | ||||
|             (?:www\.)?(?P<host>d | ||||
|                 (?: | ||||
|                     play\.(?P<country>dk|fi|jp|se|no)| | ||||
|                     iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no) | ||||
|                 ) | ||||
|             )| | ||||
|             (?P<subdomain_country>es|it)\.dplay\.com | ||||
|         )/[^/]+/(?P<id>[^/]+/[^/?#]+)''' | ||||
|  | ||||
| @@ -126,6 +131,24 @@ class DPlayIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://www.dplay.jp/video/gold-rush/24086', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _get_disco_api_info(self, url, display_id, disco_host, realm, country): | ||||
| @@ -241,7 +264,7 @@ class DPlayIE(InfoExtractor): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|         domain = mobj.group('domain').lstrip('www.') | ||||
|         country = mobj.group('country') or mobj.group('subdomain_country') | ||||
|         host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com' | ||||
|         country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') | ||||
|         host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' | ||||
|         return self._get_disco_api_info( | ||||
|             url, display_id, host, 'dplay' + country, country) | ||||
|   | ||||
| @@ -16,7 +16,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class EpornerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', | ||||
|         'md5': '39d486f046212d8e1b911c52ab4691f8', | ||||
| @@ -43,7 +43,10 @@ class EpornerIE(InfoExtractor): | ||||
|         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', | ||||
|         'url': 'http://www.eporner.com/embed/3YRUtzMcWn0', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
| @@ -57,7 +60,7 @@ class EpornerIE(InfoExtractor): | ||||
|         video_id = self._match_id(urlh.geturl()) | ||||
|  | ||||
|         hash = self._search_regex( | ||||
|             r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash') | ||||
|             r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash') | ||||
|  | ||||
|         title = self._og_search_title(webpage, default=None) or self._html_search_regex( | ||||
|             r'<title>(.+?) - EPORNER', webpage, 'title') | ||||
| @@ -115,8 +118,8 @@ class EpornerIE(InfoExtractor): | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, default=None)) | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'id="cinemaviews">\s*([0-9,]+)\s*<small>views', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|             r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)', | ||||
|             webpage, 'view count', default=None)) | ||||
|  | ||||
|         return merge_dicts(json_ld, { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -1,77 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     sanitized_Request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EveryonesMixtapeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5', | ||||
|         'info_dict': { | ||||
|             'id': '5bfseWNmlds', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)", | ||||
|             'uploader': 'FKR.TV', | ||||
|             'uploader_id': 'frenchkissrecords', | ||||
|             'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com", | ||||
|             'upload_date': '20081015' | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # This is simply YouTube | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi', | ||||
|         'info_dict': { | ||||
|             'id': 'm7m0jJAbMQi', | ||||
|             'title': 'Driving', | ||||
|         }, | ||||
|         'playlist_count': 24 | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|  | ||||
|         pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id | ||||
|         pllist_req = sanitized_Request(pllist_url) | ||||
|         pllist_req.add_header('X-Requested-With', 'XMLHttpRequest') | ||||
|  | ||||
|         playlist_list = self._download_json( | ||||
|             pllist_req, playlist_id, note='Downloading playlist metadata') | ||||
|         try: | ||||
|             playlist_no = next(playlist['id'] | ||||
|                                for playlist in playlist_list | ||||
|                                if playlist['code'] == playlist_id) | ||||
|         except StopIteration: | ||||
|             raise ExtractorError('Playlist id not found') | ||||
|  | ||||
|         pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no | ||||
|         pl_req = sanitized_Request(pl_url) | ||||
|         pl_req.add_header('X-Requested-With', 'XMLHttpRequest') | ||||
|         playlist = self._download_json( | ||||
|             pl_req, playlist_id, note='Downloading playlist info') | ||||
|  | ||||
|         entries = [{ | ||||
|             '_type': 'url', | ||||
|             'url': t['url'], | ||||
|             'title': t['title'], | ||||
|         } for t in playlist['tracks']] | ||||
|  | ||||
|         if mobj.group('songnr'): | ||||
|             songnr = int(mobj.group('songnr')) - 1 | ||||
|             return entries[songnr] | ||||
|  | ||||
|         playlist_title = playlist['mixData']['name'] | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': playlist_title, | ||||
|             'entries': entries, | ||||
|         } | ||||
| @@ -30,7 +30,11 @@ from .adobetv import ( | ||||
| from .adultswim import AdultSwimIE | ||||
| from .aenetworks import ( | ||||
|     AENetworksIE, | ||||
|     AENetworksCollectionIE, | ||||
|     AENetworksShowIE, | ||||
|     HistoryTopicIE, | ||||
|     HistoryPlayerIE, | ||||
|     BiographyIE, | ||||
| ) | ||||
| from .afreecatv import AfreecaTVIE | ||||
| from .airmozilla import AirMozillaIE | ||||
| @@ -51,7 +55,9 @@ from .appletrailers import ( | ||||
|     AppleTrailersIE, | ||||
|     AppleTrailersSectionIE, | ||||
| ) | ||||
| from .applepodcasts import ApplePodcastsIE | ||||
| from .archiveorg import ArchiveOrgIE | ||||
| from .arcpublishing import ArcPublishingIE | ||||
| from .arkena import ArkenaIE | ||||
| from .ard import ( | ||||
|     ARDBetaMediathekIE, | ||||
| @@ -89,16 +95,18 @@ from .bbc import ( | ||||
|     BBCCoUkPlaylistIE, | ||||
|     BBCIE, | ||||
| ) | ||||
| from .beampro import ( | ||||
|     BeamProLiveIE, | ||||
|     BeamProVodIE, | ||||
| ) | ||||
| from .beeg import BeegIE | ||||
| from .behindkink import BehindKinkIE | ||||
| from .bellmedia import BellMediaIE | ||||
| from .beatport import BeatportIE | ||||
| from .bet import BetIE | ||||
| from .bfi import BFIPlayerIE | ||||
| from .bfmtv import ( | ||||
|     BFMTVIE, | ||||
|     BFMTVLiveIE, | ||||
|     BFMTVArticleIE, | ||||
| ) | ||||
| from .bibeltv import BibelTVIE | ||||
| from .bigflix import BigflixIE | ||||
| from .bild import BildIE | ||||
| from .bilibili import ( | ||||
| @@ -121,6 +129,7 @@ from .bleacherreport import ( | ||||
| from .blinkx import BlinkxIE | ||||
| from .bloomberg import BloombergIE | ||||
| from .bokecc import BokeCCIE | ||||
| from .bongacams import BongaCamsIE | ||||
| from .bostonglobe import BostonGlobeIE | ||||
| from .box import BoxIE | ||||
| from .bpb import BpbIE | ||||
| @@ -165,7 +174,10 @@ from .cbc import ( | ||||
|     CBCOlympicsIE, | ||||
| ) | ||||
| from .cbs import CBSIE | ||||
| from .cbslocal import CBSLocalIE | ||||
| from .cbslocal import ( | ||||
|     CBSLocalIE, | ||||
|     CBSLocalArticleIE, | ||||
| ) | ||||
| from .cbsinteractive import CBSInteractiveIE | ||||
| from .cbsnews import ( | ||||
|     CBSNewsEmbedIE, | ||||
| @@ -243,6 +255,7 @@ from .crunchyroll import ( | ||||
| ) | ||||
| from .cspan import CSpanIE | ||||
| from .ctsnews import CtsNewsIE | ||||
| from .ctv import CTVIE | ||||
| from .ctvnews import CTVNewsIE | ||||
| from .cultureunplugged import CultureUnpluggedIE | ||||
| from .curiositystream import ( | ||||
| @@ -329,7 +342,6 @@ from .espn import ( | ||||
| ) | ||||
| from .esri import EsriVideoIE | ||||
| from .europa import EuropaIE | ||||
| from .everyonesmixtape import EveryonesMixtapeIE | ||||
| from .expotv import ExpoTVIE | ||||
| from .expressen import ExpressenIE | ||||
| from .extremetube import ExtremeTubeIE | ||||
| @@ -397,7 +409,6 @@ from .fujitv import FujiTVFODPlus7IE | ||||
| from .funimation import FunimationIE | ||||
| from .funk import FunkIE | ||||
| from .fusion import FusionIE | ||||
| from .fxnetworks import FXNetworksIE | ||||
| from .gaia import GaiaIE | ||||
| from .gameinformer import GameInformerIE | ||||
| from .gamespot import GameSpotIE | ||||
| @@ -418,7 +429,10 @@ from .go import GoIE | ||||
| from .godtube import GodTubeIE | ||||
| from .golem import GolemIE | ||||
| from .googledrive import GoogleDriveIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlepodcasts import ( | ||||
|     GooglePodcastsIE, | ||||
|     GooglePodcastsFeedIE, | ||||
| ) | ||||
| from .googlesearch import GoogleSearchIE | ||||
| from .goshgay import GoshgayIE | ||||
| from .gputechconf import GPUTechConfIE | ||||
| @@ -459,6 +473,10 @@ from .ign import ( | ||||
|     OneUPIE, | ||||
|     PCMagIE, | ||||
| ) | ||||
| from .iheart import ( | ||||
|     IHeartRadioIE, | ||||
|     IHeartRadioPodcastIE, | ||||
| ) | ||||
| from .imdb import ( | ||||
|     ImdbIE, | ||||
|     ImdbListIE | ||||
| @@ -503,13 +521,15 @@ from .joj import JojIE | ||||
| from .jwplatform import JWPlatformIE | ||||
| from .kakao import KakaoIE | ||||
| from .kaltura import KalturaIE | ||||
| from .kanalplay import KanalPlayIE | ||||
| from .kankan import KankanIE | ||||
| from .karaoketv import KaraoketvIE | ||||
| from .karrierevideos import KarriereVideosIE | ||||
| from .keezmovies import KeezMoviesIE | ||||
| from .ketnet import KetnetIE | ||||
| from .khanacademy import KhanAcademyIE | ||||
| from .khanacademy import ( | ||||
|     KhanAcademyIE, | ||||
|     KhanAcademyUnitIE, | ||||
| ) | ||||
| from .kickstarter import KickStarterIE | ||||
| from .kinja import KinjaEmbedIE | ||||
| from .kinopoisk import KinoPoiskIE | ||||
| @@ -532,7 +552,10 @@ from .laola1tv import ( | ||||
|     EHFTVIE, | ||||
|     ITTFIE, | ||||
| ) | ||||
| from .lbry import LBRYIE | ||||
| from .lbry import ( | ||||
|     LBRYIE, | ||||
|     LBRYChannelIE, | ||||
| ) | ||||
| from .lci import LCIIE | ||||
| from .lcp import ( | ||||
|     LcpPlayIE, | ||||
| @@ -678,9 +701,15 @@ from .nationalgeographic import ( | ||||
|     NationalGeographicTVIE, | ||||
| ) | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nba import ( | ||||
|     NBAWatchEmbedIE, | ||||
|     NBAWatchIE, | ||||
|     NBAWatchCollectionIE, | ||||
|     NBAEmbedIE, | ||||
|     NBAIE, | ||||
|     NBAChannelIE, | ||||
| ) | ||||
| from .nbc import ( | ||||
|     CSNNEIE, | ||||
|     NBCIE, | ||||
|     NBCNewsIE, | ||||
|     NBCOlympicsIE, | ||||
| @@ -723,8 +752,14 @@ from .nexx import ( | ||||
|     NexxIE, | ||||
|     NexxEmbedIE, | ||||
| ) | ||||
| from .nfl import NFLIE | ||||
| from .nhk import NhkVodIE | ||||
| from .nfl import ( | ||||
|     NFLIE, | ||||
|     NFLArticleIE, | ||||
| ) | ||||
| from .nhk import ( | ||||
|     NhkVodIE, | ||||
|     NhkVodProgramIE, | ||||
| ) | ||||
| from .nhl import NHLIE | ||||
| from .nick import ( | ||||
|     NickIE, | ||||
| @@ -740,7 +775,6 @@ from .ninenow import NineNowIE | ||||
| from .nintendo import NintendoIE | ||||
| from .njpwworld import NJPWWorldIE | ||||
| from .nobelprize import NobelPrizeIE | ||||
| from .noco import NocoIE | ||||
| from .nonktube import NonkTubeIE | ||||
| from .noovo import NoovoIE | ||||
| from .normalboots import NormalbootsIE | ||||
| @@ -773,6 +807,7 @@ from .nrk import ( | ||||
|     NRKSkoleIE, | ||||
|     NRKTVIE, | ||||
|     NRKTVDirekteIE, | ||||
|     NRKRadioPodkastIE, | ||||
|     NRKTVEpisodeIE, | ||||
|     NRKTVEpisodesIE, | ||||
|     NRKTVSeasonIE, | ||||
| @@ -1036,16 +1071,11 @@ from .skynewsarabia import ( | ||||
| from .sky import ( | ||||
|     SkyNewsIE, | ||||
|     SkySportsIE, | ||||
|     SkySportsNewsIE, | ||||
| ) | ||||
| from .slideshare import SlideshareIE | ||||
| from .slideslive import SlidesLiveIE | ||||
| from .slutload import SlutloadIE | ||||
| from .smotri import ( | ||||
|     SmotriIE, | ||||
|     SmotriCommunityIE, | ||||
|     SmotriUserIE, | ||||
|     SmotriBroadcastIE, | ||||
| ) | ||||
| from .snotr import SnotrIE | ||||
| from .sohu import SohuIE | ||||
| from .sonyliv import SonyLIVIE | ||||
| @@ -1079,7 +1109,10 @@ from .spike import ( | ||||
|     BellatorIE, | ||||
|     ParamountNetworkIE, | ||||
| ) | ||||
| from .stitcher import StitcherIE | ||||
| from .stitcher import ( | ||||
|     StitcherIE, | ||||
|     StitcherShowIE, | ||||
| ) | ||||
| from .sport5 import Sport5IE | ||||
| from .sportbox import SportBoxIE | ||||
| from .sportdeutschland import SportDeutschlandIE | ||||
| @@ -1123,7 +1156,6 @@ from .tagesschau import ( | ||||
|     TagesschauIE, | ||||
| ) | ||||
| from .tass import TassIE | ||||
| from .tastytrade import TastyTradeIE | ||||
| from .tbs import TBSIE | ||||
| from .tdslifeway import TDSLifewayIE | ||||
| from .teachable import ( | ||||
| @@ -1150,6 +1182,7 @@ from .telequebec import ( | ||||
|     TeleQuebecSquatIE, | ||||
|     TeleQuebecEmissionIE, | ||||
|     TeleQuebecLiveIE, | ||||
|     TeleQuebecVideoIE, | ||||
| ) | ||||
| from .teletask import TeleTaskIE | ||||
| from .telewebion import TelewebionIE | ||||
| @@ -1222,6 +1255,10 @@ from .tv2dk import ( | ||||
| from .tv2hu import TV2HuIE | ||||
| from .tv4 import TV4IE | ||||
| from .tv5mondeplus import TV5MondePlusIE | ||||
| from .tv5unis import ( | ||||
|     TV5UnisVideoIE, | ||||
|     TV5UnisIE, | ||||
| ) | ||||
| from .tva import ( | ||||
|     TVAIE, | ||||
|     QubIE, | ||||
| @@ -1411,7 +1448,10 @@ from .vshare import VShareIE | ||||
| from .medialaan import MedialaanIE | ||||
| from .vube import VubeIE | ||||
| from .vuclip import VuClipIE | ||||
| from .vvvvid import VVVVIDIE | ||||
| from .vvvvid import ( | ||||
|     VVVVIDIE, | ||||
|     VVVVIDShowIE, | ||||
| ) | ||||
| from .vyborymos import VyboryMosIE | ||||
| from .vzaar import VzaarIE | ||||
| from .wakanim import WakanimIE | ||||
| @@ -1442,7 +1482,10 @@ from .weibo import ( | ||||
|     WeiboMobileIE | ||||
| ) | ||||
| from .weiqitv import WeiqiTVIE | ||||
| from .wistia import WistiaIE | ||||
| from .wistia import ( | ||||
|     WistiaIE, | ||||
|     WistiaPlaylistIE, | ||||
| ) | ||||
| from .worldstarhiphop import WorldStarHipHopIE | ||||
| from .wsj import ( | ||||
|     WSJIE, | ||||
| @@ -1520,11 +1563,11 @@ from .youtube import ( | ||||
|     YoutubeSubscriptionsIE, | ||||
|     YoutubeTruncatedIDIE, | ||||
|     YoutubeTruncatedURLIE, | ||||
|     YoutubeYtBeIE, | ||||
|     YoutubeYtUserIE, | ||||
|     YoutubeWatchLaterIE, | ||||
| ) | ||||
| from .zapiks import ZapiksIE | ||||
| from .zaq1 import Zaq1IE | ||||
| from .zattoo import ( | ||||
|     BBVTVIE, | ||||
|     EinsUndEinsTVIE, | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import socket | ||||
|  | ||||
| @@ -8,6 +9,7 @@ from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_etree_fromstring, | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urllib_parse_unquote_plus, | ||||
| @@ -16,14 +18,17 @@ from ..utils import ( | ||||
|     clean_html, | ||||
|     error_to_compat_str, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     get_element_by_id, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     limit_length, | ||||
|     parse_count, | ||||
|     qualities, | ||||
|     sanitized_Request, | ||||
|     try_get, | ||||
|     urlencode_postdata, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -39,11 +44,13 @@ class FacebookIE(InfoExtractor): | ||||
|                                 photo\.php| | ||||
|                                 video\.php| | ||||
|                                 video/embed| | ||||
|                                 story\.php | ||||
|                                 story\.php| | ||||
|                                 watch(?:/live)?/? | ||||
|                             )\?(?:.*?)(?:v|video_id|story_fbid)=| | ||||
|                             [^/]+/videos/(?:[^/]+/)?| | ||||
|                             [^/]+/posts/| | ||||
|                             groups/[^/]+/permalink/ | ||||
|                             groups/[^/]+/permalink/| | ||||
|                             watchparty/ | ||||
|                         )| | ||||
|                     facebook: | ||||
|                 ) | ||||
| @@ -54,8 +61,6 @@ class FacebookIE(InfoExtractor): | ||||
|     _NETRC_MACHINE = 'facebook' | ||||
|     IE_NAME = 'facebook' | ||||
|  | ||||
|     _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' | ||||
|  | ||||
|     _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' | ||||
|     _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary' | ||||
|  | ||||
| @@ -72,6 +77,7 @@ class FacebookIE(InfoExtractor): | ||||
|         }, | ||||
|         'skip': 'Requires logging in', | ||||
|     }, { | ||||
|         # data.video | ||||
|         'url': 'https://www.facebook.com/video.php?v=274175099429670', | ||||
|         'info_dict': { | ||||
|             'id': '274175099429670', | ||||
| @@ -133,6 +139,7 @@ class FacebookIE(InfoExtractor): | ||||
|         }, | ||||
|     }, { | ||||
|         # have 1080P, but only up to 720p in swf params | ||||
|         # data.video.story.attachments[].media | ||||
|         'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', | ||||
|         'md5': '9571fae53d4165bbbadb17a94651dcdc', | ||||
|         'info_dict': { | ||||
| @@ -147,6 +154,7 @@ class FacebookIE(InfoExtractor): | ||||
|         }, | ||||
|     }, { | ||||
|         # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall | ||||
|         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media | ||||
|         'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', | ||||
|         'info_dict': { | ||||
|             'id': '1417995061575415', | ||||
| @@ -174,6 +182,7 @@ class FacebookIE(InfoExtractor): | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media | ||||
|         'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', | ||||
|         'info_dict': { | ||||
|             'id': '1396382447100162', | ||||
| @@ -193,18 +202,23 @@ class FacebookIE(InfoExtractor): | ||||
|         'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # data.mediaset.currMedia.edges | ||||
|         'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # data.video.story.attachments[].media | ||||
|         'url': 'facebook:544765982287235', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media | ||||
|         'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # data.video.creation_story.attachments[].media | ||||
|         'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # data.video | ||||
|         'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
| @@ -212,6 +226,7 @@ class FacebookIE(InfoExtractor): | ||||
|         'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # data.video | ||||
|         'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/', | ||||
|         'info_dict': { | ||||
|             'id': '359649331226507', | ||||
| @@ -222,7 +237,64 @@ class FacebookIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media | ||||
|         'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', | ||||
|         'info_dict': { | ||||
|             'id': '106560053808006', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|     }, { | ||||
|         # data.video.story.attachments[].media | ||||
|         'url': 'https://www.facebook.com/watch/?v=647537299265662', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media | ||||
|         'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271', | ||||
|         'info_dict': { | ||||
|             'id': '10157667649866271', | ||||
|         }, | ||||
|         'playlist_count': 3, | ||||
|     }, { | ||||
|         # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media | ||||
|         'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330', | ||||
|         'info_dict': { | ||||
|             'id': '117576630041613', | ||||
|             'ext': 'mp4', | ||||
|             # TODO: title can be extracted from video page | ||||
|             'title': 'Facebook video #117576630041613', | ||||
|             'uploader_id': '189393014416438', | ||||
|             'upload_date': '20201123', | ||||
|             'timestamp': 1606162592, | ||||
|         }, | ||||
|         'skip': 'Requires logging in', | ||||
|     }, { | ||||
|         # node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media | ||||
|         'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/', | ||||
|         'info_dict': { | ||||
|             'id': '211567722618337', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Facebook video #211567722618337', | ||||
|             'uploader_id': '127875227654254', | ||||
|             'upload_date': '20161122', | ||||
|             'timestamp': 1479793574, | ||||
|         }, | ||||
|     }, { | ||||
|         # data.video.creation_story.attachments[].media | ||||
|         'url': 'https://www.facebook.com/watch/live/?v=1823658634322275', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.facebook.com/watchparty/211641140192478', | ||||
|         'info_dict': { | ||||
|             'id': '211641140192478', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|         'skip': 'Requires logging in', | ||||
|     }] | ||||
|     _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' | ||||
|     _api_config = { | ||||
|         'graphURI': '/api/graphql/' | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(webpage): | ||||
| @@ -305,23 +377,24 @@ class FacebookIE(InfoExtractor): | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _extract_from_url(self, url, video_id, fatal_if_no_video=True): | ||||
|         req = sanitized_Request(url) | ||||
|         req.add_header('User-Agent', self._CHROME_USER_AGENT) | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|     def _extract_from_url(self, url, video_id): | ||||
|         webpage = self._download_webpage( | ||||
|             url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id) | ||||
|  | ||||
|         video_data = None | ||||
|  | ||||
|         def extract_video_data(instances): | ||||
|             video_data = [] | ||||
|             for item in instances: | ||||
|                 if item[1][0] == 'VideoConfig': | ||||
|                 if try_get(item, lambda x: x[1][0]) == 'VideoConfig': | ||||
|                     video_item = item[2][0] | ||||
|                     if video_item.get('video_id'): | ||||
|                         return video_item['videoData'] | ||||
|                         video_data.append(video_item['videoData']) | ||||
|             return video_data | ||||
|  | ||||
|         server_js_data = self._parse_json(self._search_regex( | ||||
|             r'handleServerJS\(({.+})(?:\);|,")', webpage, | ||||
|             'server js data', default='{}'), video_id, fatal=False) | ||||
|             [r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'], | ||||
|             webpage, 'server js data', default='{}'), video_id, fatal=False) | ||||
|  | ||||
|         if server_js_data: | ||||
|             video_data = extract_video_data(server_js_data.get('instances', [])) | ||||
| @@ -331,17 +404,118 @@ class FacebookIE(InfoExtractor): | ||||
|                 return extract_video_data(try_get( | ||||
|                     js_data, lambda x: x['jsmods']['instances'], list) or []) | ||||
|  | ||||
|         def extract_dash_manifest(video, formats): | ||||
|             dash_manifest = video.get('dash_manifest') | ||||
|             if dash_manifest: | ||||
|                 formats.extend(self._parse_mpd_formats( | ||||
|                     compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)))) | ||||
|  | ||||
|         def process_formats(formats): | ||||
|             # Downloads with browser's User-Agent are rate limited. Working around | ||||
|             # with non-browser User-Agent. | ||||
|             for f in formats: | ||||
|                 f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1' | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|         def extract_relay_data(_filter): | ||||
|             return self._parse_json(self._search_regex( | ||||
|                 r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter, | ||||
|                 webpage, 'replay data', default='{}'), video_id, fatal=False) or {} | ||||
|  | ||||
|         def extract_relay_prefetched_data(_filter): | ||||
|             replay_data = extract_relay_data(_filter) | ||||
|             for require in (replay_data.get('require') or []): | ||||
|                 if require[0] == 'RelayPrefetchedStreamCache': | ||||
|                     return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {} | ||||
|  | ||||
|         if not video_data: | ||||
|             server_js_data = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)', | ||||
|                     webpage, 'js data', default='{}'), | ||||
|                 video_id, transform_source=js_to_json, fatal=False) | ||||
|             server_js_data = self._parse_json(self._search_regex([ | ||||
|                 r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX, | ||||
|                 r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX | ||||
|             ], webpage, 'js data', default='{}'), video_id, js_to_json, False) | ||||
|             video_data = extract_from_jsmods_instances(server_js_data) | ||||
|  | ||||
|         if not video_data: | ||||
|             if not fatal_if_no_video: | ||||
|                 return webpage, False | ||||
|             data = extract_relay_prefetched_data( | ||||
|                 r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"') | ||||
|             if data: | ||||
|                 entries = [] | ||||
|  | ||||
|                 def parse_graphql_video(video): | ||||
|                     formats = [] | ||||
|                     q = qualities(['sd', 'hd']) | ||||
|                     for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]: | ||||
|                         playable_url = video.get('playable_url' + suffix) | ||||
|                         if not playable_url: | ||||
|                             continue | ||||
|                         formats.append({ | ||||
|                             'format_id': format_id, | ||||
|                             'quality': q(format_id), | ||||
|                             'url': playable_url, | ||||
|                         }) | ||||
|                     extract_dash_manifest(video, formats) | ||||
|                     process_formats(formats) | ||||
|                     v_id = video.get('videoId') or video.get('id') or video_id | ||||
|                     info = { | ||||
|                         'id': v_id, | ||||
|                         'formats': formats, | ||||
|                         'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']), | ||||
|                         'uploader_id': try_get(video, lambda x: x['owner']['id']), | ||||
|                         'timestamp': int_or_none(video.get('publish_time')), | ||||
|                         'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), | ||||
|                     } | ||||
|                     description = try_get(video, lambda x: x['savable_description']['text']) | ||||
|                     title = video.get('name') | ||||
|                     if title: | ||||
|                         info.update({ | ||||
|                             'title': title, | ||||
|                             'description': description, | ||||
|                         }) | ||||
|                     else: | ||||
|                         info['title'] = description or 'Facebook video #%s' % v_id | ||||
|                     entries.append(info) | ||||
|  | ||||
|                 def parse_attachment(attachment, key='media'): | ||||
|                     media = attachment.get(key) or {} | ||||
|                     if media.get('__typename') == 'Video': | ||||
|                         return parse_graphql_video(media) | ||||
|  | ||||
|                 nodes = data.get('nodes') or [] | ||||
|                 node = data.get('node') or {} | ||||
|                 if not nodes and node: | ||||
|                     nodes.append(node) | ||||
|                 for node in nodes: | ||||
|                     story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {} | ||||
|                     attachments = try_get(story, [ | ||||
|                         lambda x: x['attached_story']['attachments'], | ||||
|                         lambda x: x['attachments'] | ||||
|                     ], list) or [] | ||||
|                     for attachment in attachments: | ||||
|                         attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict) | ||||
|                         ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or [] | ||||
|                         for n in ns: | ||||
|                             parse_attachment(n) | ||||
|                         parse_attachment(attachment) | ||||
|  | ||||
|                 edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or [] | ||||
|                 for edge in edges: | ||||
|                     parse_attachment(edge, key='node') | ||||
|  | ||||
|                 video = data.get('video') or {} | ||||
|                 if video: | ||||
|                     attachments = try_get(video, [ | ||||
|                         lambda x: x['story']['attachments'], | ||||
|                         lambda x: x['creation_story']['attachments'] | ||||
|                     ], list) or [] | ||||
|                     for attachment in attachments: | ||||
|                         parse_attachment(attachment) | ||||
|                     if not entries: | ||||
|                         parse_graphql_video(video) | ||||
|  | ||||
|                 return self.playlist_result(entries, video_id) | ||||
|  | ||||
|         if not video_data: | ||||
|             m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) | ||||
|             if m_msg is not None: | ||||
|                 raise ExtractorError( | ||||
| @@ -350,6 +524,43 @@ class FacebookIE(InfoExtractor): | ||||
|             elif '>You must log in to continue' in webpage: | ||||
|                 self.raise_login_required() | ||||
|  | ||||
|         if not video_data and '/watchparty/' in url: | ||||
|             post_data = { | ||||
|                 'doc_id': 3731964053542869, | ||||
|                 'variables': json.dumps({ | ||||
|                     'livingRoomID': video_id, | ||||
|                 }), | ||||
|             } | ||||
|  | ||||
|             prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{') | ||||
|             if prefetched_data: | ||||
|                 lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict) | ||||
|                 if lsd: | ||||
|                     post_data[lsd['name']] = lsd['value'] | ||||
|  | ||||
|             relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,') | ||||
|             for define in (relay_data.get('define') or []): | ||||
|                 if define[0] == 'RelayAPIConfigDefaults': | ||||
|                     self._api_config = define[2] | ||||
|  | ||||
|             living_room = self._download_json( | ||||
|                 urljoin(url, self._api_config['graphURI']), video_id, | ||||
|                 data=urlencode_postdata(post_data))['data']['living_room'] | ||||
|  | ||||
|             entries = [] | ||||
|             for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []): | ||||
|                 video = try_get(edge, lambda x: x['node']['video']) or {} | ||||
|                 v_id = video.get('id') | ||||
|                 if not v_id: | ||||
|                     continue | ||||
|                 v_id = compat_str(v_id) | ||||
|                 entries.append(self.url_result( | ||||
|                     self._VIDEO_PAGE_TEMPLATE % v_id, | ||||
|                     self.ie_key(), v_id, video.get('name'))) | ||||
|  | ||||
|             return self.playlist_result(entries, video_id) | ||||
|  | ||||
|         if not video_data: | ||||
|             # Video info not in first request, do a secondary request using | ||||
|             # tahoe player specific URL | ||||
|             tahoe_data = self._download_webpage( | ||||
| @@ -379,8 +590,19 @@ class FacebookIE(InfoExtractor): | ||||
|         if not video_data: | ||||
|             raise ExtractorError('Cannot parse data') | ||||
|  | ||||
|         subtitles = {} | ||||
|         if len(video_data) > 1: | ||||
|             entries = [] | ||||
|             for v in video_data: | ||||
|                 video_url = v[0].get('video_url') | ||||
|                 if not video_url: | ||||
|                     continue | ||||
|                 entries.append(self.url_result(urljoin( | ||||
|                     url, video_url), self.ie_key(), v[0].get('video_id'))) | ||||
|             return self.playlist_result(entries, video_id) | ||||
|         video_data = video_data[0] | ||||
|  | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         for f in video_data: | ||||
|             format_id = f['stream_type'] | ||||
|             if f and isinstance(f, dict): | ||||
| @@ -399,22 +621,14 @@ class FacebookIE(InfoExtractor): | ||||
|                             'url': src, | ||||
|                             'preference': preference, | ||||
|                         }) | ||||
|             dash_manifest = f[0].get('dash_manifest') | ||||
|             if dash_manifest: | ||||
|                 formats.extend(self._parse_mpd_formats( | ||||
|                     compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)))) | ||||
|             extract_dash_manifest(f[0], formats) | ||||
|             subtitles_src = f[0].get('subtitles_src') | ||||
|             if subtitles_src: | ||||
|                 subtitles.setdefault('en', []).append({'url': subtitles_src}) | ||||
|         if not formats: | ||||
|             raise ExtractorError('Cannot find video formats') | ||||
|  | ||||
|         # Downloads with browser's User-Agent are rate limited. Working around | ||||
|         # with non-browser User-Agent. | ||||
|         for f in formats: | ||||
|             f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1' | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         process_formats(formats) | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, | ||||
| @@ -454,35 +668,13 @@ class FacebookIE(InfoExtractor): | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|         return webpage, info_dict | ||||
|         return info_dict | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url | ||||
|         webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False) | ||||
|  | ||||
|         if info_dict: | ||||
|             return info_dict | ||||
|  | ||||
|         if '/posts/' in url: | ||||
|             video_id_json = self._search_regex( | ||||
|                 r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids', | ||||
|                 default='') | ||||
|             if video_id_json: | ||||
|                 entries = [ | ||||
|                     self.url_result('facebook:%s' % vid, FacebookIE.ie_key()) | ||||
|                     for vid in self._parse_json(video_id_json, video_id)] | ||||
|                 return self.playlist_result(entries, video_id) | ||||
|  | ||||
|             # Single Video? | ||||
|             video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id') | ||||
|             return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key()) | ||||
|         else: | ||||
|             _, info_dict = self._extract_from_url( | ||||
|                 self._VIDEO_PAGE_TEMPLATE % video_id, | ||||
|                 video_id, fatal_if_no_video=True) | ||||
|             return info_dict | ||||
|         return self._extract_from_url(real_url, video_id) | ||||
|  | ||||
|  | ||||
| class FacebookPluginsVideoIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,77 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .adobepass import AdobePassIE | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FXNetworksIE(AdobePassIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.fxnetworks.com/video/1032565827847', | ||||
|         'md5': '8d99b97b4aa7a202f55b6ed47ea7e703', | ||||
|         'info_dict': { | ||||
|             'id': 'dRzwHC_MMqIv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'First Look: Better Things - Season 2', | ||||
|             'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.', | ||||
|             'age_limit': 14, | ||||
|             'uploader': 'NEWA-FNG-FX', | ||||
|             'upload_date': '20170825', | ||||
|             'timestamp': 1503686274, | ||||
|             'episode_number': 0, | ||||
|             'season_number': 2, | ||||
|             'series': 'Better Things', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }, { | ||||
|         'url': 'http://www.simpsonsworld.com/video/716094019682', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         if 'The content you are trying to access is not available in your region.' in webpage: | ||||
|             self.raise_geo_restricted() | ||||
|         video_data = extract_attributes(self._search_regex( | ||||
|             r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data')) | ||||
|         player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None) | ||||
|         release_url = video_data['rel'] | ||||
|         title = video_data['data-title'] | ||||
|         rating = video_data.get('data-rating') | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|         } | ||||
|         if player_type == 'movies': | ||||
|             query.update({ | ||||
|                 'manifest': 'm3u', | ||||
|             }) | ||||
|         else: | ||||
|             query.update({ | ||||
|                 'switch': 'http', | ||||
|             }) | ||||
|         if video_data.get('data-req-auth') == '1': | ||||
|             resource = self._get_mvpd_resource( | ||||
|                 video_data['data-channel'], title, | ||||
|                 video_data.get('data-guid'), rating) | ||||
|             query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), | ||||
|             'series': video_data.get('data-show-title'), | ||||
|             'episode_number': int_or_none(video_data.get('data-episode')), | ||||
|             'season_number': int_or_none(video_data.get('data-season')), | ||||
|             'thumbnail': video_data.get('data-large-thumb'), | ||||
|             'age_limit': parse_age_limit(rating), | ||||
|             'ie_key': 'ThePlatform', | ||||
|         } | ||||
| @@ -20,19 +20,24 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     HEADRequest, | ||||
|     int_or_none, | ||||
|     is_html, | ||||
|     js_to_json, | ||||
|     KNOWN_EXTENSIONS, | ||||
|     merge_dicts, | ||||
|     mimetype2ext, | ||||
|     orderedSet, | ||||
|     parse_duration, | ||||
|     sanitized_Request, | ||||
|     smuggle_url, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     unified_timestamp, | ||||
|     unsmuggle_url, | ||||
|     UnsupportedError, | ||||
|     url_or_none, | ||||
|     xpath_attr, | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
| from .commonprotocols import RtmpIE | ||||
| from .brightcove import ( | ||||
| @@ -48,7 +53,6 @@ from .ooyala import OoyalaIE | ||||
| from .rutv import RUTVIE | ||||
| from .tvc import TVCIE | ||||
| from .sportbox import SportBoxIE | ||||
| from .smotri import SmotriIE | ||||
| from .myvi import MyviIE | ||||
| from .condenast import CondeNastIE | ||||
| from .udn import UDNEmbedIE | ||||
| @@ -63,7 +67,10 @@ from .tube8 import Tube8IE | ||||
| from .mofosex import MofosexEmbedIE | ||||
| from .spankwire import SpankwireIE | ||||
| from .youporn import YouPornIE | ||||
| from .vimeo import VimeoIE | ||||
| from .vimeo import ( | ||||
|     VimeoIE, | ||||
|     VHXEmbedIE, | ||||
| ) | ||||
| from .dailymotion import DailymotionIE | ||||
| from .dailymail import DailyMailIE | ||||
| from .onionstudios import OnionStudiosIE | ||||
| @@ -120,6 +127,7 @@ from .expressen import ExpressenIE | ||||
| from .zype import ZypeIE | ||||
| from .odnoklassniki import OdnoklassnikiIE | ||||
| from .kinja import KinjaEmbedIE | ||||
| from .arcpublishing import ArcPublishingIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -198,11 +206,48 @@ class GenericIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', | ||||
|             'info_dict': { | ||||
|                 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', | ||||
|                 'ext': 'm4v', | ||||
|                 'upload_date': '20150228', | ||||
|                 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', | ||||
|             } | ||||
|                 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', | ||||
|                 'title': 'MSNBC Rachel Maddow (video)', | ||||
|                 'description': 're:.*her unique approach to storytelling.*', | ||||
|             }, | ||||
|             'playlist': [{ | ||||
|                 'info_dict': { | ||||
|                     'ext': 'mov', | ||||
|                     'id': 'pdv_maddow_netcast_mov-12-04-2020-224335', | ||||
|                     'title': 're:MSNBC Rachel Maddow', | ||||
|                     'description': 're:.*her unique approach to storytelling.*', | ||||
|                     'timestamp': int, | ||||
|                     'upload_date': compat_str, | ||||
|                     'duration': float, | ||||
|                 }, | ||||
|             }], | ||||
|         }, | ||||
|         # RSS feed with item with description and thumbnails | ||||
|         { | ||||
|             'url': 'https://anchor.fm/s/dd00e14/podcast/rss', | ||||
|             'info_dict': { | ||||
|                 'id': 'https://anchor.fm/s/dd00e14/podcast/rss', | ||||
|                 'title': 're:.*100% Hydrogen.*', | ||||
|                 'description': 're:.*In this episode.*', | ||||
|             }, | ||||
|             'playlist': [{ | ||||
|                 'info_dict': { | ||||
|                     'ext': 'm4a', | ||||
|                     'id': 'c1c879525ce2cb640b344507e682c36d', | ||||
|                     'title': 're:Hydrogen!', | ||||
|                     'description': 're:.*In this episode we are going.*', | ||||
|                     'timestamp': 1567977776, | ||||
|                     'upload_date': '20190908', | ||||
|                     'duration': 459, | ||||
|                     'thumbnail': r're:^https?://.*\.jpg$', | ||||
|                     'episode_number': 1, | ||||
|                     'season_number': 1, | ||||
|                     'age_limit': 0, | ||||
|                 }, | ||||
|             }], | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # RSS feed with enclosures and unsupported link URLs | ||||
|         { | ||||
| @@ -1983,22 +2028,6 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'add_ie': [SpringboardPlatformIE.ie_key()], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU', | ||||
|             'info_dict': { | ||||
|                 'id': 'uPDB5I9wfp8', | ||||
|                 'ext': 'webm', | ||||
|                 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3', | ||||
|                 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d', | ||||
|                 'upload_date': '20160219', | ||||
|                 'uploader': 'Pocoyo - Português (BR)', | ||||
|                 'uploader_id': 'PocoyoBrazil', | ||||
|             }, | ||||
|             'add_ie': [YoutubeIE.ie_key()], | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html', | ||||
|             'info_dict': { | ||||
| @@ -2103,23 +2132,23 @@ class GenericIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # Zype embed | ||||
|             'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', | ||||
|             'info_dict': { | ||||
|                 'id': '5b400b834b32992a310622b9', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Smoky Barbecue Favorites', | ||||
|                 'thumbnail': r're:^https?://.*\.jpe?g', | ||||
|                 'description': 'md5:5ff01e76316bd8d46508af26dc86023b', | ||||
|                 'upload_date': '20170909', | ||||
|                 'timestamp': 1504915200, | ||||
|             }, | ||||
|             'add_ie': [ZypeIE.ie_key()], | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # { | ||||
|         #     # Zype embed | ||||
|         #     'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', | ||||
|         #     'info_dict': { | ||||
|         #         'id': '5b400b834b32992a310622b9', | ||||
|         #         'ext': 'mp4', | ||||
|         #         'title': 'Smoky Barbecue Favorites', | ||||
|         #         'thumbnail': r're:^https?://.*\.jpe?g', | ||||
|         #         'description': 'md5:5ff01e76316bd8d46508af26dc86023b', | ||||
|         #         'upload_date': '20170909', | ||||
|         #         'timestamp': 1504915200, | ||||
|         #     }, | ||||
|         #     'add_ie': [ZypeIE.ie_key()], | ||||
|         #     'params': { | ||||
|         #         'skip_download': True, | ||||
|         #     }, | ||||
|         # }, | ||||
|         { | ||||
|             # videojs embed | ||||
|             'url': 'https://video.sibnet.ru/shell.php?videoid=3422904', | ||||
| @@ -2168,7 +2197,32 @@ class GenericIE(InfoExtractor): | ||||
|         #     'params': { | ||||
|         #         'force_generic_extractor': True, | ||||
|         #     }, | ||||
|         # } | ||||
|         # }, | ||||
|         { | ||||
|             # VHX Embed | ||||
|             'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy', | ||||
|             'info_dict': { | ||||
|                 'id': '858208', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Untitled', | ||||
|                 'uploader_id': 'user80538407', | ||||
|                 'uploader': 'OTT Videos', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # ArcPublishing PoWa video player | ||||
|             'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/', | ||||
|             'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3', | ||||
|             'info_dict': { | ||||
|                 'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Senate candidates wave to voters on Anchorage streets', | ||||
|                 'description': 'md5:91f51a6511f090617353dc720318b20e', | ||||
|                 'timestamp': 1604378735, | ||||
|                 'upload_date': '20201103', | ||||
|                 'duration': 1581, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -2180,6 +2234,10 @@ class GenericIE(InfoExtractor): | ||||
|         playlist_desc_el = doc.find('./channel/description') | ||||
|         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', | ||||
|         } | ||||
|  | ||||
|         entries = [] | ||||
|         for it in doc.findall('./channel/item'): | ||||
|             next_url = None | ||||
| @@ -2195,10 +2253,33 @@ class GenericIE(InfoExtractor): | ||||
|             if not next_url: | ||||
|                 continue | ||||
|  | ||||
|             def itunes(key): | ||||
|                 return xpath_text( | ||||
|                     it, xpath_with_ns('./itunes:%s' % key, NS_MAP), | ||||
|                     default=None) | ||||
|  | ||||
|             duration = itunes('duration') | ||||
|             explicit = (itunes('explicit') or '').lower() | ||||
|             if explicit in ('true', 'yes'): | ||||
|                 age_limit = 18 | ||||
|             elif explicit in ('false', 'no'): | ||||
|                 age_limit = 0 | ||||
|             else: | ||||
|                 age_limit = None | ||||
|  | ||||
|             entries.append({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': next_url, | ||||
|                 'title': it.find('title').text, | ||||
|                 'description': xpath_text(it, 'description', default=None), | ||||
|                 'timestamp': unified_timestamp( | ||||
|                     xpath_text(it, 'pubDate', default=None)), | ||||
|                 'duration': int_or_none(duration) or parse_duration(duration), | ||||
|                 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')), | ||||
|                 'episode': itunes('title'), | ||||
|                 'episode_number': int_or_none(itunes('episode')), | ||||
|                 'season_number': int_or_none(itunes('season')), | ||||
|                 'age_limit': age_limit, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
| @@ -2318,7 +2399,7 @@ class GenericIE(InfoExtractor): | ||||
|         info_dict = { | ||||
|             'id': video_id, | ||||
|             'title': self._generic_title(url), | ||||
|             'upload_date': unified_strdate(head_response.headers.get('Last-Modified')) | ||||
|             'timestamp': unified_timestamp(head_response.headers.get('Last-Modified')) | ||||
|         } | ||||
|  | ||||
|         # Check for direct link to a video | ||||
| @@ -2424,7 +2505,9 @@ class GenericIE(InfoExtractor): | ||||
|         # Sometimes embedded video player is hidden behind percent encoding | ||||
|         # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448) | ||||
|         # Unescaping the whole page allows to handle those cases in a generic way | ||||
|         webpage = compat_urllib_parse_unquote(webpage) | ||||
|         # FIXME: unescaping the whole page may break URLs, commenting out for now. | ||||
|         # There probably should be a second run of generic extractor on unescaped webpage. | ||||
|         # webpage = compat_urllib_parse_unquote(webpage) | ||||
|  | ||||
|         # Unescape squarespace embeds to be detected by generic extractor, | ||||
|         # see https://github.com/ytdl-org/youtube-dl/issues/21294 | ||||
| @@ -2506,6 +2589,10 @@ class GenericIE(InfoExtractor): | ||||
|         if tp_urls: | ||||
|             return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform') | ||||
|  | ||||
|         arc_urls = ArcPublishingIE._extract_urls(webpage) | ||||
|         if arc_urls: | ||||
|             return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key()) | ||||
|  | ||||
|         # Look for embedded rtl.nl player | ||||
|         matches = re.findall( | ||||
|             r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"', | ||||
| @@ -2517,6 +2604,10 @@ class GenericIE(InfoExtractor): | ||||
|         if vimeo_urls: | ||||
|             return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key()) | ||||
|  | ||||
|         vhx_url = VHXEmbedIE._extract_url(webpage) | ||||
|         if vhx_url: | ||||
|             return self.url_result(vhx_url, VHXEmbedIE.ie_key()) | ||||
|  | ||||
|         vid_me_embed_url = self._search_regex( | ||||
|             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', | ||||
|             webpage, 'vid.me embed', default=None) | ||||
| @@ -2772,11 +2863,6 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url')) | ||||
|  | ||||
|         # Look for embedded smotri.com player | ||||
|         smotri_url = SmotriIE._extract_url(webpage) | ||||
|         if smotri_url: | ||||
|             return self.url_result(smotri_url, 'Smotri') | ||||
|  | ||||
|         # Look for embedded Myvi.ru player | ||||
|         myvi_url = MyviIE._extract_url(webpage) | ||||
|         if myvi_url: | ||||
|   | ||||
| @@ -38,13 +38,17 @@ class GoIE(AdobePassIE): | ||||
|         'disneynow': { | ||||
|             'brand': '011', | ||||
|             'resource_id': 'Disney', | ||||
|         } | ||||
|         }, | ||||
|         'fxnow.fxnetworks': { | ||||
|             'brand': '025', | ||||
|             'requestor_id': 'dtci', | ||||
|         }, | ||||
|     } | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:(?P<sub_domain>%s)\.)?go| | ||||
|                             (?P<sub_domain_2>abc|freeform|disneynow) | ||||
|                             (?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks) | ||||
|                         )\.com/ | ||||
|                         (?: | ||||
|                             (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)| | ||||
| @@ -99,6 +103,19 @@ class GoIE(AdobePassIE): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841', | ||||
|         'info_dict': { | ||||
|             'id': 'VDKA12782841', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'First Look: Better Things - Season 2', | ||||
|             'description': 'md5:fa73584a95761c605d9d54904e35b407', | ||||
|         }, | ||||
|         'params': { | ||||
|             'geo_bypass_ip_block': '3.244.239.0/24', | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', | ||||
|         'only_matching': True, | ||||
|   | ||||
| @@ -1,73 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import codecs | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
|  | ||||
| class GooglePlusIE(InfoExtractor): | ||||
|     IE_DESC = 'Google Plus' | ||||
|     _VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' | ||||
|     IE_NAME = 'plus.google' | ||||
|     _TEST = { | ||||
|         'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH', | ||||
|         'info_dict': { | ||||
|             'id': 'ZButuJc6CtH', | ||||
|             'ext': 'flv', | ||||
|             'title': '嘆きの天使 降臨', | ||||
|             'upload_date': '20120613', | ||||
|             'uploader': '井上ヨシマサ', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         # Step 1, Retrieve post webpage to extract further information | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading entry webpage') | ||||
|  | ||||
|         title = self._og_search_description(webpage).splitlines()[0] | ||||
|         upload_date = unified_strdate(self._html_search_regex( | ||||
|             r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*> | ||||
|                     ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', | ||||
|             webpage, 'upload date', fatal=False, flags=re.VERBOSE)) | ||||
|         uploader = self._html_search_regex( | ||||
|             r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         # Step 2, Simulate clicking the image box to launch video | ||||
|         DOMAIN = 'https://plus.google.com/' | ||||
|         video_page = self._search_regex( | ||||
|             r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN), | ||||
|             webpage, 'video page URL') | ||||
|         if not video_page.startswith(DOMAIN): | ||||
|             video_page = DOMAIN + video_page | ||||
|  | ||||
|         webpage = self._download_webpage(video_page, video_id, 'Downloading video page') | ||||
|  | ||||
|         def unicode_escape(s): | ||||
|             decoder = codecs.getdecoder('unicode_escape') | ||||
|             return re.sub( | ||||
|                 r'\\u[0-9a-fA-F]{4,}', | ||||
|                 lambda m: decoder(m.group(0))[0], | ||||
|                 s) | ||||
|  | ||||
|         # Extract video links all sizes | ||||
|         formats = [{ | ||||
|             'url': unicode_escape(video_url), | ||||
|             'ext': 'flv', | ||||
|             'width': int(width), | ||||
|             'height': int(height), | ||||
|         } for width, height, video_url in re.findall( | ||||
|             r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										88
									
								
								youtube_dl/extractor/googlepodcasts.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								youtube_dl/extractor/googlepodcasts.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_podcast_url, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GooglePodcastsBaseIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/' | ||||
|  | ||||
|     def _batch_execute(self, func_id, video_id, params): | ||||
|         return json.loads(self._download_json( | ||||
|             'https://podcasts.google.com/_/PodcastsUi/data/batchexecute', | ||||
|             video_id, data=urlencode_postdata({ | ||||
|                 'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]), | ||||
|             }), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2]) | ||||
|  | ||||
|     def _extract_episode(self, episode): | ||||
|         return { | ||||
|             'id': episode[4][3], | ||||
|             'title': episode[8], | ||||
|             'url': clean_podcast_url(episode[13]), | ||||
|             'thumbnail': episode[2], | ||||
|             'description': episode[9], | ||||
|             'creator': try_get(episode, lambda x: x[14]), | ||||
|             'timestamp': int_or_none(episode[11]), | ||||
|             'duration': int_or_none(episode[12]), | ||||
|             'series': episode[1], | ||||
|         } | ||||
|  | ||||
|  | ||||
| class GooglePodcastsIE(GooglePodcastsBaseIE): | ||||
|     IE_NAME = 'google:podcasts' | ||||
|     _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh', | ||||
|         'md5': 'fa56b2ee8bd0703e27e42d4b104c4766', | ||||
|         'info_dict': { | ||||
|             'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'WWDTM New Year 2021', | ||||
|             'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.', | ||||
|             'upload_date': '20210102', | ||||
|             'timestamp': 1609606800, | ||||
|             'duration': 2901, | ||||
|             'series': "Wait Wait... Don't Tell Me!", | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups() | ||||
|         episode = self._batch_execute( | ||||
|             'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1] | ||||
|         return self._extract_episode(episode) | ||||
|  | ||||
|  | ||||
| class GooglePodcastsFeedIE(GooglePodcastsBaseIE): | ||||
|     IE_NAME = 'google:podcasts:feed' | ||||
|     _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)' | ||||
|     _TEST = { | ||||
|         'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA', | ||||
|         'info_dict': { | ||||
|             'title': "Wait Wait... Don't Tell Me!", | ||||
|             'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.", | ||||
|         }, | ||||
|         'playlist_mincount': 20, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         b64_feed_url = self._match_id(url) | ||||
|         data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url]) | ||||
|  | ||||
|         entries = [] | ||||
|         for episode in (try_get(data, lambda x: x[1][0]) or []): | ||||
|             entries.append(self._extract_episode(episode)) | ||||
|  | ||||
|         feed = try_get(data, lambda x: x[3]) or [] | ||||
|         return self.playlist_result( | ||||
|             entries, playlist_title=try_get(feed, lambda x: x[0]), | ||||
|             playlist_description=try_get(feed, lambda x: x[2])) | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import hashlib | ||||
| import hmac | ||||
| import json | ||||
| import re | ||||
| import time | ||||
| import uuid | ||||
| @@ -25,43 +26,50 @@ from ..utils import ( | ||||
| class HotStarBaseIE(InfoExtractor): | ||||
|     _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' | ||||
|  | ||||
|     def _call_api_impl(self, path, video_id, query): | ||||
|     def _call_api_impl(self, path, video_id, headers, query, data=None): | ||||
|         st = int(time.time()) | ||||
|         exp = st + 6000 | ||||
|         auth = 'st=%d~exp=%d~acl=/*' % (st, exp) | ||||
|         auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() | ||||
|         response = self._download_json( | ||||
|             'https://api.hotstar.com/' + path, video_id, headers={ | ||||
|                 'hotstarauth': auth, | ||||
|         h = {'hotstarauth': auth} | ||||
|         h.update(headers) | ||||
|         return self._download_json( | ||||
|             'https://api.hotstar.com/' + path, | ||||
|             video_id, headers=h, query=query, data=data) | ||||
|  | ||||
|     def _call_api(self, path, video_id, query_name='contentId'): | ||||
|         response = self._call_api_impl(path, video_id, { | ||||
|             'x-country-code': 'IN', | ||||
|             'x-platform-code': 'JIO', | ||||
|             }, query=query) | ||||
|         }, { | ||||
|             query_name: video_id, | ||||
|             'tas': 10000, | ||||
|         }) | ||||
|         if response['statusCode'] != 'OK': | ||||
|             raise ExtractorError( | ||||
|                 response['body']['message'], expected=True) | ||||
|         return response['body']['results'] | ||||
|  | ||||
|     def _call_api(self, path, video_id, query_name='contentId'): | ||||
|         return self._call_api_impl(path, video_id, { | ||||
|             query_name: video_id, | ||||
|             'tas': 10000, | ||||
|         }) | ||||
|  | ||||
|     def _call_api_v2(self, path, video_id): | ||||
|     def _call_api_v2(self, path, video_id, headers, query=None, data=None): | ||||
|         h = {'X-Request-Id': compat_str(uuid.uuid4())} | ||||
|         h.update(headers) | ||||
|         try: | ||||
|             return self._call_api_impl( | ||||
|             '%s/in/contents/%s' % (path, video_id), video_id, { | ||||
|                 'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash', | ||||
|                 'client': 'mweb', | ||||
|                 'clientVersion': '6.18.0', | ||||
|                 'deviceId': compat_str(uuid.uuid4()), | ||||
|                 'osName': 'Windows', | ||||
|                 'osVersion': '10', | ||||
|             }) | ||||
|                 path, video_id, h, query, data) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 if e.cause.code == 402: | ||||
|                     self.raise_login_required() | ||||
|                 message = self._parse_json(e.cause.read().decode(), video_id)['message'] | ||||
|                 if message in ('Content not available in region', 'Country is not supported'): | ||||
|                     raise self.raise_geo_restricted(message) | ||||
|                 raise ExtractorError(message) | ||||
|             raise e | ||||
|  | ||||
|  | ||||
| class HotStarIE(HotStarBaseIE): | ||||
|     IE_NAME = 'hotstar' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})' | ||||
|     _TESTS = [{ | ||||
|         # contentData | ||||
|         'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', | ||||
| @@ -92,8 +100,13 @@ class HotStarIE(HotStarBaseIE): | ||||
|         # only available via api v2 | ||||
|         'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _GEO_BYPASS = False | ||||
|     _DEVICE_ID = None | ||||
|     _USER_TOKEN = None | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -121,7 +134,30 @@ class HotStarIE(HotStarBaseIE): | ||||
|         headers = {'Referer': url} | ||||
|         formats = [] | ||||
|         geo_restricted = False | ||||
|         playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets'] | ||||
|  | ||||
|         if not self._USER_TOKEN: | ||||
|             self._DEVICE_ID = compat_str(uuid.uuid4()) | ||||
|             self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, { | ||||
|                 'X-HS-Platform': 'PCTV', | ||||
|                 'Content-Type': 'application/json', | ||||
|             }, data=json.dumps({ | ||||
|                 'device_ids': [{ | ||||
|                     'id': self._DEVICE_ID, | ||||
|                     'type': 'device_id', | ||||
|                 }], | ||||
|             }).encode())['user_identity'] | ||||
|  | ||||
|         playback_sets = self._call_api_v2( | ||||
|             'play/v2/playback/content/' + video_id, video_id, { | ||||
|                 'X-HS-Platform': 'web', | ||||
|                 'X-HS-AppVersion': '6.99.1', | ||||
|                 'X-HS-UserToken': self._USER_TOKEN, | ||||
|             }, query={ | ||||
|                 'device-id': self._DEVICE_ID, | ||||
|                 'desired-config': 'encryption:plain', | ||||
|                 'os-name': 'Windows', | ||||
|                 'os-version': '10', | ||||
|             })['data']['playBackSets'] | ||||
|         for playback_set in playback_sets: | ||||
|             if not isinstance(playback_set, dict): | ||||
|                 continue | ||||
| @@ -163,19 +199,22 @@ class HotStarIE(HotStarBaseIE): | ||||
|         for f in formats: | ||||
|             f.setdefault('http_headers', {}).update(headers) | ||||
|  | ||||
|         image = try_get(video_data, lambda x: x['image']['h'], compat_str) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None, | ||||
|             'description': video_data.get('description'), | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), | ||||
|             'formats': formats, | ||||
|             'channel': video_data.get('channelName'), | ||||
|             'channel_id': video_data.get('channelId'), | ||||
|             'channel_id': str_or_none(video_data.get('channelId')), | ||||
|             'series': video_data.get('showName'), | ||||
|             'season': video_data.get('seasonName'), | ||||
|             'season_number': int_or_none(video_data.get('seasonNo')), | ||||
|             'season_id': video_data.get('seasonId'), | ||||
|             'season_id': str_or_none(video_data.get('seasonId')), | ||||
|             'episode': title, | ||||
|             'episode_number': int_or_none(video_data.get('episodeNo')), | ||||
|         } | ||||
| @@ -183,7 +222,7 @@ class HotStarIE(HotStarBaseIE): | ||||
|  | ||||
| class HotStarPlaylistIE(HotStarBaseIE): | ||||
|     IE_NAME = 'hotstar:playlist' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', | ||||
|         'info_dict': { | ||||
| @@ -193,6 +232,9 @@ class HotStarPlaylistIE(HotStarBaseIE): | ||||
|     }, { | ||||
|         'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										97
									
								
								youtube_dl/extractor/iheart.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								youtube_dl/extractor/iheart.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     clean_podcast_url, | ||||
|     int_or_none, | ||||
|     str_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class IHeartRadioBaseIE(InfoExtractor): | ||||
|     def _call_api(self, path, video_id, fatal=True, query=None): | ||||
|         return self._download_json( | ||||
|             'https://api.iheart.com/api/v3/podcast/' + path, | ||||
|             video_id, fatal=fatal, query=query) | ||||
|  | ||||
|     def _extract_episode(self, episode): | ||||
|         return { | ||||
|             'thumbnail': episode.get('imageUrl'), | ||||
|             'description': clean_html(episode.get('description')), | ||||
|             'timestamp': int_or_none(episode.get('startDate'), 1000), | ||||
|             'duration': int_or_none(episode.get('duration')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class IHeartRadioIE(IHeartRadioBaseIE): | ||||
|     IENAME = 'iheartradio' | ||||
|     _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true', | ||||
|         'md5': 'c8609c92c8688dcb69d8541042b8abca', | ||||
|         'info_dict': { | ||||
|             'id': '70346499', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus', | ||||
|             'description': 'md5:96cc7297b3a5a9ebae28643801c96fae', | ||||
|             'timestamp': 1597741200, | ||||
|             'upload_date': '20200818', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         episode_id = self._match_id(url) | ||||
|         episode = self._call_api( | ||||
|             'episodes/' + episode_id, episode_id)['episode'] | ||||
|         info = self._extract_episode(episode) | ||||
|         info.update({ | ||||
|             'id': episode_id, | ||||
|             'title': episode['title'], | ||||
|             'url': clean_podcast_url(episode['mediaUrl']), | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class IHeartRadioPodcastIE(IHeartRadioBaseIE): | ||||
|     IE_NAME = 'iheartradio:podcast' | ||||
|     _VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/', | ||||
|         'info_dict': { | ||||
|             'id': '30717896', | ||||
|             'title': 'It Could Happen Here', | ||||
|             'description': 'md5:5842117412a967eb0b01f8088eb663e2', | ||||
|         }, | ||||
|         'playlist_mincount': 11, | ||||
|     }, { | ||||
|         'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         podcast_id = self._match_id(url) | ||||
|         path = 'podcasts/' + podcast_id | ||||
|         episodes = self._call_api( | ||||
|             path + '/episodes', podcast_id, query={'limit': 1000000000})['data'] | ||||
|  | ||||
|         entries = [] | ||||
|         for episode in episodes: | ||||
|             episode_id = str_or_none(episode.get('id')) | ||||
|             if not episode_id: | ||||
|                 continue | ||||
|             info = self._extract_episode(episode) | ||||
|             info.update({ | ||||
|                 '_type': 'url', | ||||
|                 'id': episode_id, | ||||
|                 'title': episode.get('title'), | ||||
|                 'url': 'iheartradio:' + episode_id, | ||||
|                 'ie_key': IHeartRadioIE.ie_key(), | ||||
|             }) | ||||
|             entries.append(info) | ||||
|  | ||||
|         podcast = self._call_api(path, podcast_id, False) or {} | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, podcast_id, podcast.get('title'), podcast.get('description')) | ||||
| @@ -22,7 +22,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class InstagramIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))' | ||||
|     _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', | ||||
|         'md5': '0d2da106a9d2631273e192b372806516', | ||||
| @@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor): | ||||
|             'timestamp': 1371748545, | ||||
|             'upload_date': '20130620', | ||||
|             'uploader_id': 'naomipq', | ||||
|             'uploader': 'Naomi Leonor Phan-Quang', | ||||
|             'uploader': 'B E A U T Y  F O R  A S H E S', | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'comments': list, | ||||
| @@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://www.instagram.com/tv/aye83DjauH/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.instagram.com/reel/CDUMkliABpa/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -122,9 +125,9 @@ class InstagramIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         (video_url, description, thumbnail, timestamp, uploader, | ||||
|         (media, video_url, description, thumbnail, timestamp, uploader, | ||||
|          uploader_id, like_count, comment_count, comments, height, | ||||
|          width) = [None] * 11 | ||||
|          width) = [None] * 12 | ||||
|  | ||||
|         shared_data = self._parse_json( | ||||
|             self._search_regex( | ||||
| @@ -137,6 +140,18 @@ class InstagramIE(InfoExtractor): | ||||
|                 (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], | ||||
|                  lambda x: x['entry_data']['PostPage'][0]['media']), | ||||
|                 dict) | ||||
|         # _sharedData.entry_data.PostPage is empty when authenticated (see | ||||
|         # https://github.com/ytdl-org/youtube-dl/pull/22880) | ||||
|         if not media: | ||||
|             additional_data = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;', | ||||
|                     webpage, 'additional data', default='{}'), | ||||
|                 video_id, fatal=False) | ||||
|             if additional_data: | ||||
|                 media = try_get( | ||||
|                     additional_data, lambda x: x['graphql']['shortcode_media'], | ||||
|                     dict) | ||||
|         if media: | ||||
|             video_url = media.get('video_url') | ||||
|             height = int_or_none(media.get('dimensions', {}).get('height')) | ||||
| @@ -144,17 +159,23 @@ class InstagramIE(InfoExtractor): | ||||
|             description = try_get( | ||||
|                 media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], | ||||
|                 compat_str) or media.get('caption') | ||||
|                 thumbnail = media.get('display_src') | ||||
|             thumbnail = media.get('display_src') or media.get('display_url') | ||||
|             timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) | ||||
|             uploader = media.get('owner', {}).get('full_name') | ||||
|             uploader_id = media.get('owner', {}).get('username') | ||||
|  | ||||
|                 def get_count(key, kind): | ||||
|                     return int_or_none(try_get( | ||||
|             def get_count(keys, kind): | ||||
|                 if not isinstance(keys, (list, tuple)): | ||||
|                     keys = [keys] | ||||
|                 for key in keys: | ||||
|                     count = int_or_none(try_get( | ||||
|                         media, (lambda x: x['edge_media_%s' % key]['count'], | ||||
|                                 lambda x: x['%ss' % kind]['count']))) | ||||
|                     if count is not None: | ||||
|                         return count | ||||
|             like_count = get_count('preview_like', 'like') | ||||
|                 comment_count = get_count('to_comment', 'comment') | ||||
|             comment_count = get_count( | ||||
|                 ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment') | ||||
|  | ||||
|             comments = [{ | ||||
|                 'author': comment.get('user', {}).get('username'), | ||||
|   | ||||
| @@ -1,29 +1,21 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import uuid | ||||
| import xml.etree.ElementTree as etree | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .brightcove import BrightcoveNewIE | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_etree_register_namespace, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
|     get_element_by_class, | ||||
|     JSON_LD_RE, | ||||
|     merge_dicts, | ||||
|     parse_duration, | ||||
|     smuggle_url, | ||||
|     url_or_none, | ||||
|     xpath_with_ns, | ||||
|     xpath_element, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -31,14 +23,18 @@ class ITVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' | ||||
|     _GEO_COUNTRIES = ['GB'] | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', | ||||
|         'url': 'https://www.itv.com/hub/liar/2a4547a0012', | ||||
|         'info_dict': { | ||||
|             'id': '2a2936a0053', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Home Movie', | ||||
|             'id': '2a4547a0012', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Liar - Series 2 - Episode 6', | ||||
|             'description': 'md5:d0f91536569dec79ea184f0a44cca089', | ||||
|             'series': 'Liar', | ||||
|             'season_number': 2, | ||||
|             'episode_number': 6, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
| @@ -61,139 +57,8 @@ class ITVIE(InfoExtractor): | ||||
|         params = extract_attributes(self._search_regex( | ||||
|             r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params')) | ||||
|  | ||||
|         ns_map = { | ||||
|             'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/', | ||||
|             'tem': 'http://tempuri.org/', | ||||
|             'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types', | ||||
|             'com': 'http://schemas.itv.com/2009/05/Common', | ||||
|         } | ||||
|         for ns, full_ns in ns_map.items(): | ||||
|             compat_etree_register_namespace(ns, full_ns) | ||||
|  | ||||
|         def _add_ns(name): | ||||
|             return xpath_with_ns(name, ns_map) | ||||
|  | ||||
|         def _add_sub_element(element, name): | ||||
|             return etree.SubElement(element, _add_ns(name)) | ||||
|  | ||||
|         production_id = ( | ||||
|             params.get('data-video-autoplay-id') | ||||
|             or '%s#001' % ( | ||||
|                 params.get('data-video-episode-id') | ||||
|                 or video_id.replace('a', '/'))) | ||||
|  | ||||
|         req_env = etree.Element(_add_ns('soapenv:Envelope')) | ||||
|         _add_sub_element(req_env, 'soapenv:Header') | ||||
|         body = _add_sub_element(req_env, 'soapenv:Body') | ||||
|         get_playlist = _add_sub_element(body, ('tem:GetPlaylist')) | ||||
|         request = _add_sub_element(get_playlist, 'tem:request') | ||||
|         _add_sub_element(request, 'itv:ProductionId').text = production_id | ||||
|         _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper() | ||||
|         vodcrid = _add_sub_element(request, 'itv:Vodcrid') | ||||
|         _add_sub_element(vodcrid, 'com:Id') | ||||
|         _add_sub_element(request, 'itv:Partition') | ||||
|         user_info = _add_sub_element(get_playlist, 'tem:userInfo') | ||||
|         _add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv' | ||||
|         _add_sub_element(user_info, 'itv:DM') | ||||
|         _add_sub_element(user_info, 'itv:RevenueScienceValue') | ||||
|         _add_sub_element(user_info, 'itv:SessionId') | ||||
|         _add_sub_element(user_info, 'itv:SsoToken') | ||||
|         _add_sub_element(user_info, 'itv:UserToken') | ||||
|         site_info = _add_sub_element(get_playlist, 'tem:siteInfo') | ||||
|         _add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None' | ||||
|         _add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV' | ||||
|         _add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any' | ||||
|         _add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO' | ||||
|         _add_sub_element(site_info, 'itv:Category') | ||||
|         _add_sub_element(site_info, 'itv:Platform').text = 'DotCom' | ||||
|         _add_sub_element(site_info, 'itv:Site').text = 'ItvCom' | ||||
|         device_info = _add_sub_element(get_playlist, 'tem:deviceInfo') | ||||
|         _add_sub_element(device_info, 'itv:ScreenSize').text = 'Big' | ||||
|         player_info = _add_sub_element(get_playlist, 'tem:playerInfo') | ||||
|         _add_sub_element(player_info, 'itv:Version').text = '2' | ||||
|  | ||||
|         headers = self.geo_verification_headers() | ||||
|         headers.update({ | ||||
|             'Content-Type': 'text/xml; charset=utf-8', | ||||
|             'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist', | ||||
|         }) | ||||
|  | ||||
|         info = self._search_json_ld(webpage, video_id, default={}) | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|  | ||||
|         def extract_subtitle(sub_url): | ||||
|             ext = determine_ext(sub_url, 'ttml') | ||||
|             subtitles.setdefault('en', []).append({ | ||||
|                 'url': sub_url, | ||||
|                 'ext': 'ttml' if ext == 'xml' else ext, | ||||
|             }) | ||||
|  | ||||
|         resp_env = self._download_xml( | ||||
|             params['data-playlist-url'], video_id, | ||||
|             headers=headers, data=etree.tostring(req_env), fatal=False) | ||||
|         if resp_env: | ||||
|             playlist = xpath_element(resp_env, './/Playlist') | ||||
|             if playlist is None: | ||||
|                 fault_code = xpath_text(resp_env, './/faultcode') | ||||
|                 fault_string = xpath_text(resp_env, './/faultstring') | ||||
|                 if fault_code == 'InvalidGeoRegion': | ||||
|                     self.raise_geo_restricted( | ||||
|                         msg=fault_string, countries=self._GEO_COUNTRIES) | ||||
|                 elif fault_code not in ( | ||||
|                         'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'): | ||||
|                     raise ExtractorError( | ||||
|                         '%s said: %s' % (self.IE_NAME, fault_string), expected=True) | ||||
|                 info.update({ | ||||
|                     'title': self._og_search_title(webpage), | ||||
|                     'episode_title': params.get('data-video-episode'), | ||||
|                     'series': params.get('data-video-title'), | ||||
|                 }) | ||||
|             else: | ||||
|                 title = xpath_text(playlist, 'EpisodeTitle', default=None) | ||||
|                 info.update({ | ||||
|                     'title': title, | ||||
|                     'episode_title': title, | ||||
|                     'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), | ||||
|                     'series': xpath_text(playlist, 'ProgrammeTitle'), | ||||
|                     'duration': parse_duration(xpath_text(playlist, 'Duration')), | ||||
|                 }) | ||||
|                 video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) | ||||
|                 media_files = xpath_element(video_element, 'MediaFiles', fatal=True) | ||||
|                 rtmp_url = media_files.attrib['base'] | ||||
|  | ||||
|                 for media_file in media_files.findall('MediaFile'): | ||||
|                     play_path = xpath_text(media_file, 'URL') | ||||
|                     if not play_path: | ||||
|                         continue | ||||
|                     tbr = int_or_none(media_file.get('bitrate'), 1000) | ||||
|                     f = { | ||||
|                         'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), | ||||
|                         'play_path': play_path, | ||||
|                         # Providing this swfVfy allows to avoid truncated downloads | ||||
|                         'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', | ||||
|                         'page_url': url, | ||||
|                         'tbr': tbr, | ||||
|                         'ext': 'flv', | ||||
|                     } | ||||
|                     app = self._search_regex( | ||||
|                         'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None) | ||||
|                     if app: | ||||
|                         f.update({ | ||||
|                             'url': rtmp_url.split('?', 1)[0], | ||||
|                             'app': app, | ||||
|                         }) | ||||
|                     else: | ||||
|                         f['url'] = rtmp_url | ||||
|                     formats.append(f) | ||||
|  | ||||
|                 for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): | ||||
|                     if caption_url.text: | ||||
|                         extract_subtitle(caption_url.text) | ||||
|  | ||||
|         ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id') | ||||
|         hmac = params.get('data-video-hmac') | ||||
|         if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url): | ||||
|         ios_playlist_url = params.get('data-video-playlist') or params['data-video-id'] | ||||
|         hmac = params['data-video-hmac'] | ||||
|         headers = self.geo_verification_headers() | ||||
|         headers.update({ | ||||
|             'Accept': 'application/vnd.itv.vod.playlist.v2+json', | ||||
| @@ -227,11 +92,12 @@ class ITVIE(InfoExtractor): | ||||
|                     }, | ||||
|                     'platformTag': 'dotcom' | ||||
|                 } | ||||
|                 }).encode(), headers=headers, fatal=False) | ||||
|             if ios_playlist: | ||||
|                 video_data = ios_playlist.get('Playlist', {}).get('Video', {}) | ||||
|             }).encode(), headers=headers) | ||||
|         video_data = ios_playlist['Playlist']['Video'] | ||||
|         ios_base_url = video_data.get('Base') | ||||
|                 for media_file in video_data.get('MediaFiles', []): | ||||
|  | ||||
|         formats = [] | ||||
|         for media_file in (video_data.get('MediaFiles') or []): | ||||
|             href = media_file.get('Href') | ||||
|             if not href: | ||||
|                 continue | ||||
| @@ -246,35 +112,42 @@ class ITVIE(InfoExtractor): | ||||
|                 formats.append({ | ||||
|                     'url': href, | ||||
|                 }) | ||||
|                 subs = video_data.get('Subtitles') | ||||
|                 if isinstance(subs, list): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         subs = video_data.get('Subtitles') or [] | ||||
|         for sub in subs: | ||||
|             if not isinstance(sub, dict): | ||||
|                 continue | ||||
|             href = url_or_none(sub.get('Href')) | ||||
|                         if href: | ||||
|                             extract_subtitle(href) | ||||
|                 if not info.get('duration'): | ||||
|                     info['duration'] = parse_duration(video_data.get('Duration')) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             if not href: | ||||
|                 continue | ||||
|             subtitles.setdefault('en', []).append({ | ||||
|                 'url': href, | ||||
|                 'ext': determine_ext(href, 'vtt'), | ||||
|             }) | ||||
|  | ||||
|         webpage_info = self._search_json_ld(webpage, video_id, default={}) | ||||
|         if not webpage_info.get('title'): | ||||
|             webpage_info['title'] = self._html_search_regex( | ||||
|                 r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<', | ||||
|                 webpage, 'title', default=None) or self._og_search_title( | ||||
|                 webpage, default=None) or self._html_search_meta( | ||||
|                 'twitter:title', webpage, 'title', | ||||
|                 default=None) or webpage_info['episode'] | ||||
|         info = self._search_json_ld(webpage, video_id, default={}) | ||||
|         if not info: | ||||
|             json_ld = self._parse_json(self._search_regex( | ||||
|                 JSON_LD_RE, webpage, 'JSON-LD', '{}', | ||||
|                 group='json_ld'), video_id, fatal=False) | ||||
|             if json_ld and json_ld.get('@type') == 'BreadcrumbList': | ||||
|                 for ile in (json_ld.get('itemListElement:') or []): | ||||
|                     item = ile.get('item:') or {} | ||||
|                     if item.get('@type') == 'TVEpisode': | ||||
|                         item['@context'] = 'http://schema.org' | ||||
|                         info = self._json_ld(item, video_id, fatal=False) or {} | ||||
|                         break | ||||
|  | ||||
|         return merge_dicts(info, webpage_info) | ||||
|         return merge_dicts({ | ||||
|             'id': video_id, | ||||
|             'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'duration': parse_duration(video_data.get('Duration')), | ||||
|             'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)), | ||||
|         }, info) | ||||
|  | ||||
|  | ||||
| class ITVBTCCIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,97 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     srt_subtitles_timecode, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class KanalPlayIE(InfoExtractor): | ||||
|     IE_DESC = 'Kanal 5/9/11 Play' | ||||
|     _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277', | ||||
|         'info_dict': { | ||||
|             'id': '3270012277', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Saknar både dusch och avlopp', | ||||
|             'description': 'md5:6023a95832a06059832ae93bc3c7efb7', | ||||
|             'duration': 2636.36, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _fix_subtitles(self, subs): | ||||
|         return '\r\n\r\n'.join( | ||||
|             '%s\r\n%s --> %s\r\n%s' | ||||
|             % ( | ||||
|                 num, | ||||
|                 srt_subtitles_timecode(item['startMillis'] / 1000.0), | ||||
|                 srt_subtitles_timecode(item['endMillis'] / 1000.0), | ||||
|                 item['text'], | ||||
|             ) for num, item in enumerate(subs, 1)) | ||||
|  | ||||
|     def _get_subtitles(self, channel_id, video_id): | ||||
|         subs = self._download_json( | ||||
|             'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id), | ||||
|             video_id, 'Downloading subtitles JSON', fatal=False) | ||||
|         return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {} | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         channel_id = mobj.group('channel_id') | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id), | ||||
|             video_id) | ||||
|  | ||||
|         reasons_for_no_streams = video.get('reasonsForNoStreams') | ||||
|         if reasons_for_no_streams: | ||||
|             raise ExtractorError( | ||||
|                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)), | ||||
|                 expected=True) | ||||
|  | ||||
|         title = video['title'] | ||||
|         description = video.get('description') | ||||
|         duration = float_or_none(video.get('length'), 1000) | ||||
|         thumbnail = video.get('posterUrl') | ||||
|  | ||||
|         stream_base_url = video['streamBaseUrl'] | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': stream_base_url, | ||||
|             'play_path': stream['source'], | ||||
|             'ext': 'flv', | ||||
|             'tbr': float_or_none(stream.get('bitrate'), 1000), | ||||
|             'rtmp_real_time': True, | ||||
|         } for stream in video['streams']] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         if video.get('hasSubtitle'): | ||||
|             subtitles = self.extract_subtitles(channel_id, video_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
| @@ -2,92 +2,71 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .canvas import CanvasIE | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class KetnetIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes', | ||||
|         'md5': '6bdeb65998930251bbd1c510750edba9', | ||||
|         'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook', | ||||
|         'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9', | ||||
|         'info_dict': { | ||||
|             'id': 'zomerse-filmpjes', | ||||
|             'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Gluur mee op de filmset en op Pennenzakkenrock', | ||||
|             'description': 'Gluur mee met Ghost Rockers op de filmset', | ||||
|             'title': 'Nachtwacht - Reeks 3: Aflevering 1', | ||||
|             'description': 'De Nachtwacht krijgt te maken met een parasiet', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     }, { | ||||
|         # mzid in playerConfig instead of sources | ||||
|         'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook', | ||||
|         'md5': '90139b746a0a9bd7bb631283f6e2a64e', | ||||
|         'info_dict': { | ||||
|             'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | ||||
|             'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Nachtwacht: De Greystook', | ||||
|             'description': 'md5:1db3f5dc4c7109c821261e7512975be7', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 1468.03, | ||||
|             'duration': 1468.02, | ||||
|             'timestamp': 1609225200, | ||||
|             'upload_date': '20201229', | ||||
|             'series': 'Nachtwacht', | ||||
|             'season': 'Reeks 3', | ||||
|             'episode': 'De Greystook', | ||||
|             'episode_number': 1, | ||||
|         }, | ||||
|         'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], | ||||
|     }, { | ||||
|         'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # mzsource, geo restricted to Belgium | ||||
|         'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe', | ||||
|         'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video = self._download_json( | ||||
|             'https://senior-bff.ketnet.be/graphql', display_id, query={ | ||||
|                 'query': '''{ | ||||
|   video(id: "content/ketnet/nl/%s.model.json") { | ||||
|     description | ||||
|     episodeNr | ||||
|     imageUrl | ||||
|     mediaReference | ||||
|     programTitle | ||||
|     publicationDate | ||||
|     seasonTitle | ||||
|     subtitleVideodetail | ||||
|     titleVideodetail | ||||
|   } | ||||
| }''' % display_id, | ||||
|             })['data']['video'] | ||||
|  | ||||
|         config = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage, | ||||
|                 'player config'), | ||||
|             video_id) | ||||
|  | ||||
|         mzid = config.get('mzid') | ||||
|         if mzid: | ||||
|             return self.url_result( | ||||
|                 'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid, | ||||
|                 CanvasIE.ie_key(), video_id=mzid) | ||||
|  | ||||
|         title = config['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for source_key in ('', 'mz'): | ||||
|             source = config.get('%ssource' % source_key) | ||||
|             if not isinstance(source, dict): | ||||
|                 continue | ||||
|             for format_id, format_url in source.items(): | ||||
|                 if format_id == 'hls': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         format_url, video_id, 'mp4', | ||||
|                         entry_protocol='m3u8_native', m3u8_id=format_id, | ||||
|                         fatal=False)) | ||||
|                 elif format_id == 'hds': | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         format_url, video_id, f4m_id=format_id, fatal=False)) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'url': format_url, | ||||
|                         'format_id': format_id, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|         mz_id = compat_urllib_parse_unquote(video['mediaReference']) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': config.get('description'), | ||||
|             'thumbnail': config.get('image'), | ||||
|             'series': config.get('program'), | ||||
|             'episode': config.get('episode'), | ||||
|             'formats': formats, | ||||
|             '_type': 'url_transparent', | ||||
|             'id': mz_id, | ||||
|             'title': video['titleVideodetail'], | ||||
|             'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id, | ||||
|             'thumbnail': video.get('imageUrl'), | ||||
|             'description': video.get('description'), | ||||
|             'timestamp': parse_iso8601(video.get('publicationDate')), | ||||
|             'series': video.get('programTitle'), | ||||
|             'season': video.get('seasonTitle'), | ||||
|             'episode': video.get('subtitleVideodetail'), | ||||
|             'episode_number': int_or_none(video.get('episodeNr')), | ||||
|             'ie_key': CanvasIE.ie_key(), | ||||
|         } | ||||
|   | ||||
| @@ -1,82 +1,107 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class KhanAcademyIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])' | ||||
|     IE_NAME = 'KhanAcademy' | ||||
| class KhanAcademyBaseIE(InfoExtractor): | ||||
|     _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.khanacademy.org/video/one-time-pad', | ||||
|         'md5': '7b391cce85e758fb94f763ddc1bbb979', | ||||
|     def _parse_video(self, video): | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': video['youtubeId'], | ||||
|             'id': video.get('slug'), | ||||
|             'title': video.get('title'), | ||||
|             'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'), | ||||
|             'duration': int_or_none(video.get('duration')), | ||||
|             'description': video.get('description'), | ||||
|             'ie_key': 'Youtube', | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         component_props = self._parse_json(self._download_json( | ||||
|             'https://www.khanacademy.org/api/internal/graphql', | ||||
|             display_id, query={ | ||||
|                 'hash': 1604303425, | ||||
|                 'variables': json.dumps({ | ||||
|                     'path': display_id, | ||||
|                     'queryParams': '', | ||||
|                 }), | ||||
|             })['data']['contentJson'], display_id)['componentProps'] | ||||
|         return self._parse_component_props(component_props) | ||||
|  | ||||
|  | ||||
| class KhanAcademyIE(KhanAcademyBaseIE): | ||||
|     IE_NAME = 'khanacademy' | ||||
|     _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/') | ||||
|     _TEST = { | ||||
|         'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad', | ||||
|         'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0', | ||||
|         'info_dict': { | ||||
|             'id': 'one-time-pad', | ||||
|             'ext': 'webm', | ||||
|             'id': 'FlIG3TvQCBQ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The one-time pad', | ||||
|             'description': 'The perfect cipher', | ||||
|             'duration': 176, | ||||
|             'uploader': 'Brit Cruise', | ||||
|             'uploader_id': 'khanacademy', | ||||
|             'upload_date': '20120411', | ||||
|             'timestamp': 1334170113, | ||||
|             'license': 'cc-by-nc-sa', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }, { | ||||
|         'url': 'https://www.khanacademy.org/math/applied-math/cryptography', | ||||
|     } | ||||
|  | ||||
|     def _parse_component_props(self, component_props): | ||||
|         video = component_props['tutorialPageData']['contentModel'] | ||||
|         info = self._parse_video(video) | ||||
|         author_names = video.get('authorNames') | ||||
|         info.update({ | ||||
|             'uploader': ', '.join(author_names) if author_names else None, | ||||
|             'timestamp': parse_iso8601(video.get('dateAdded')), | ||||
|             'license': video.get('kaUserLicense'), | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class KhanAcademyUnitIE(KhanAcademyBaseIE): | ||||
|     IE_NAME = 'khanacademy:unit' | ||||
|     _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.khanacademy.org/computing/computer-science/cryptography', | ||||
|         'info_dict': { | ||||
|             'id': 'cryptography', | ||||
|             'title': 'Journey into cryptography', | ||||
|             'title': 'Cryptography', | ||||
|             'description': 'How have humans protected their secret messages through history? What has changed today?', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         video_id = m.group('id') | ||||
|  | ||||
|         if m.group('key') == 'video': | ||||
|             data = self._download_json( | ||||
|                 'http://api.khanacademy.org/api/v1/videos/' + video_id, | ||||
|                 video_id, 'Downloading video info') | ||||
|  | ||||
|             upload_date = unified_strdate(data['date_added']) | ||||
|             uploader = ', '.join(data['author_names']) | ||||
|             return { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': data['url'], | ||||
|                 'id': video_id, | ||||
|                 'title': data['title'], | ||||
|                 'thumbnail': data['image_url'], | ||||
|                 'duration': data['duration'], | ||||
|                 'description': data['description'], | ||||
|                 'uploader': uploader, | ||||
|                 'upload_date': upload_date, | ||||
|         'playlist_mincount': 31, | ||||
|     } | ||||
|         else: | ||||
|             # topic | ||||
|             data = self._download_json( | ||||
|                 'http://api.khanacademy.org/api/v1/topic/' + video_id, | ||||
|                 video_id, 'Downloading topic info') | ||||
|  | ||||
|             entries = [ | ||||
|                 { | ||||
|                     '_type': 'url', | ||||
|                     'url': c['url'], | ||||
|                     'id': c['id'], | ||||
|                     'title': c['title'], | ||||
|                 } | ||||
|                 for c in data['children'] if c['kind'] in ('Video', 'Topic')] | ||||
|     def _parse_component_props(self, component_props): | ||||
|         curation = component_props['curation'] | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'playlist', | ||||
|                 'id': video_id, | ||||
|                 'title': data['title'], | ||||
|                 'description': data['description'], | ||||
|                 'entries': entries, | ||||
|         entries = [] | ||||
|         tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or [] | ||||
|         for tutorial_number, tutorial in enumerate(tutorials, 1): | ||||
|             chapter_info = { | ||||
|                 'chapter': tutorial.get('title'), | ||||
|                 'chapter_number': tutorial_number, | ||||
|                 'chapter_id': tutorial.get('id'), | ||||
|             } | ||||
|             for content_item in (tutorial.get('contentItems') or []): | ||||
|                 if content_item.get('kind') == 'Video': | ||||
|                     info = self._parse_video(content_item) | ||||
|                     info.update(chapter_info) | ||||
|                     entries.append(info) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, curation.get('unit'), curation.get('title'), | ||||
|             curation.get('description')) | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import functools | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -10,13 +11,73 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     mimetype2ext, | ||||
|     OnDemandPagedList, | ||||
|     try_get, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LBRYIE(InfoExtractor): | ||||
|     IE_NAME = 'lbry.tv' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])' | ||||
| class LBRYBaseIE(InfoExtractor): | ||||
|     _BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/' | ||||
|     _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}' | ||||
|     _OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX | ||||
|     _SUPPORTED_STREAM_TYPES = ['video', 'audio'] | ||||
|  | ||||
|     def _call_api_proxy(self, method, display_id, params, resource): | ||||
|         return self._download_json( | ||||
|             'https://api.lbry.tv/api/v1/proxy', | ||||
|             display_id, 'Downloading %s JSON metadata' % resource, | ||||
|             headers={'Content-Type': 'application/json-rpc'}, | ||||
|             data=json.dumps({ | ||||
|                 'method': method, | ||||
|                 'params': params, | ||||
|             }).encode())['result'] | ||||
|  | ||||
|     def _resolve_url(self, url, display_id, resource): | ||||
|         return self._call_api_proxy( | ||||
|             'resolve', display_id, {'urls': url}, resource)[url] | ||||
|  | ||||
|     def _permanent_url(self, url, claim_name, claim_id): | ||||
|         return urljoin(url, '/%s:%s' % (claim_name, claim_id)) | ||||
|  | ||||
|     def _parse_stream(self, stream, url): | ||||
|         stream_value = stream.get('value') or {} | ||||
|         stream_type = stream_value.get('stream_type') | ||||
|         source = stream_value.get('source') or {} | ||||
|         media = stream_value.get(stream_type) or {} | ||||
|         signing_channel = stream.get('signing_channel') or {} | ||||
|         channel_name = signing_channel.get('name') | ||||
|         channel_claim_id = signing_channel.get('claim_id') | ||||
|         channel_url = None | ||||
|         if channel_name and channel_claim_id: | ||||
|             channel_url = self._permanent_url(url, channel_name, channel_claim_id) | ||||
|  | ||||
|         info = { | ||||
|             'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str), | ||||
|             'description': stream_value.get('description'), | ||||
|             'license': stream_value.get('license'), | ||||
|             'timestamp': int_or_none(stream.get('timestamp')), | ||||
|             'tags': stream_value.get('tags'), | ||||
|             'duration': int_or_none(media.get('duration')), | ||||
|             'channel': try_get(signing_channel, lambda x: x['value']['title']), | ||||
|             'channel_id': channel_claim_id, | ||||
|             'channel_url': channel_url, | ||||
|             'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')), | ||||
|             'filesize': int_or_none(source.get('size')), | ||||
|         } | ||||
|         if stream_type == 'audio': | ||||
|             info['vcodec'] = 'none' | ||||
|         else: | ||||
|             info.update({ | ||||
|                 'width': int_or_none(media.get('width')), | ||||
|                 'height': int_or_none(media.get('height')), | ||||
|             }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class LBRYIE(LBRYBaseIE): | ||||
|     IE_NAME = 'lbry' | ||||
|     _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX) | ||||
|     _TESTS = [{ | ||||
|         # Video | ||||
|         'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1', | ||||
| @@ -28,6 +89,8 @@ class LBRYIE(InfoExtractor): | ||||
|             'description': 'md5:f6cb5c704b332d37f5119313c2c98f51', | ||||
|             'timestamp': 1595694354, | ||||
|             'upload_date': '20200725', | ||||
|             'width': 1280, | ||||
|             'height': 720, | ||||
|         } | ||||
|     }, { | ||||
|         # Audio | ||||
| @@ -40,6 +103,12 @@ class LBRYIE(InfoExtractor): | ||||
|             'description': 'md5:661ac4f1db09f31728931d7b88807a61', | ||||
|             'timestamp': 1591312601, | ||||
|             'upload_date': '20200604', | ||||
|             'tags': list, | ||||
|             'duration': 2570, | ||||
|             'channel': 'The LBRY Foundation', | ||||
|             'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212', | ||||
|             'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212', | ||||
|             'vcodec': 'none', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e', | ||||
| @@ -47,45 +116,99 @@ class LBRYIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b", | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://lbry.tv/$/embed/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://lbry.tv/Episode-1:e7', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://lbry.tv/@LBRYFoundation/Episode-1', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _call_api_proxy(self, method, display_id, params): | ||||
|         return self._download_json( | ||||
|             'https://api.lbry.tv/api/v1/proxy', display_id, | ||||
|             headers={'Content-Type': 'application/json-rpc'}, | ||||
|             data=json.dumps({ | ||||
|                 'method': method, | ||||
|                 'params': params, | ||||
|             }).encode())['result'] | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         if display_id.startswith('$/'): | ||||
|             display_id = display_id.split('/', 2)[-1].replace('/', ':') | ||||
|         else: | ||||
|             display_id = display_id.replace(':', '#') | ||||
|         uri = 'lbry://' + display_id | ||||
|         result = self._resolve_url(uri, display_id, 'stream') | ||||
|         result_value = result['value'] | ||||
|         if result_value.get('stream_type') not in self._SUPPORTED_STREAM_TYPES: | ||||
|             raise ExtractorError('Unsupported URL', expected=True) | ||||
|         claim_id = result['claim_id'] | ||||
|         title = result_value['title'] | ||||
|         streaming_url = self._call_api_proxy( | ||||
|             'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url'] | ||||
|         info = self._parse_stream(result, url) | ||||
|         info.update({ | ||||
|             'id': claim_id, | ||||
|             'title': title, | ||||
|             'url': streaming_url, | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class LBRYChannelIE(LBRYBaseIE): | ||||
|     IE_NAME = 'lbry:channel' | ||||
|     _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://lbry.tv/@LBRYFoundation:0', | ||||
|         'info_dict': { | ||||
|             'id': '0ed629d2b9c601300cacf7eabe9da0be79010212', | ||||
|             'title': 'The LBRY Foundation', | ||||
|             'description': 'Channel for the LBRY Foundation. Follow for updates and news.', | ||||
|         }, | ||||
|         'playlist_count': 29, | ||||
|     }, { | ||||
|         'url': 'https://lbry.tv/@LBRYFoundation', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _PAGE_SIZE = 50 | ||||
|  | ||||
|     def _fetch_page(self, claim_id, url, page): | ||||
|         page += 1 | ||||
|         result = self._call_api_proxy( | ||||
|             'claim_search', claim_id, { | ||||
|                 'channel_ids': [claim_id], | ||||
|                 'claim_type': 'stream', | ||||
|                 'no_totals': True, | ||||
|                 'page': page, | ||||
|                 'page_size': self._PAGE_SIZE, | ||||
|                 'stream_types': self._SUPPORTED_STREAM_TYPES, | ||||
|             }, 'page %d' % page) | ||||
|         for item in (result.get('items') or []): | ||||
|             stream_claim_name = item.get('name') | ||||
|             stream_claim_id = item.get('claim_id') | ||||
|             if not (stream_claim_name and stream_claim_id): | ||||
|                 continue | ||||
|  | ||||
|             info = self._parse_stream(item, url) | ||||
|             info.update({ | ||||
|                 '_type': 'url', | ||||
|                 'id': stream_claim_id, | ||||
|                 'title': try_get(item, lambda x: x['value']['title']), | ||||
|                 'url': self._permanent_url(url, stream_claim_name, stream_claim_id), | ||||
|             }) | ||||
|             yield info | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url).replace(':', '#') | ||||
|         uri = 'lbry://' + display_id | ||||
|         result = self._call_api_proxy( | ||||
|             'resolve', display_id, {'urls': [uri]})[uri] | ||||
|         result_value = result['value'] | ||||
|         if result_value.get('stream_type') not in ('video', 'audio'): | ||||
|             raise ExtractorError('Unsupported URL', expected=True) | ||||
|         streaming_url = self._call_api_proxy( | ||||
|             'get', display_id, {'uri': uri})['streaming_url'] | ||||
|         source = result_value.get('source') or {} | ||||
|         media = result_value.get('video') or result_value.get('audio') or {} | ||||
|         signing_channel = result_value.get('signing_channel') or {} | ||||
|  | ||||
|         return { | ||||
|             'id': result['claim_id'], | ||||
|             'title': result_value['title'], | ||||
|             'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str), | ||||
|             'description': result_value.get('description'), | ||||
|             'license': result_value.get('license'), | ||||
|             'timestamp': int_or_none(result.get('timestamp')), | ||||
|             'tags': result_value.get('tags'), | ||||
|             'width': int_or_none(media.get('width')), | ||||
|             'height': int_or_none(media.get('height')), | ||||
|             'duration': int_or_none(media.get('duration')), | ||||
|             'channel': signing_channel.get('name'), | ||||
|             'channel_id': signing_channel.get('claim_id'), | ||||
|             'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')), | ||||
|             'filesize': int_or_none(source.get('size')), | ||||
|             'url': streaming_url, | ||||
|         } | ||||
|         result = self._resolve_url( | ||||
|             'lbry://' + display_id, display_id, 'channel') | ||||
|         claim_id = result['claim_id'] | ||||
|         entries = OnDemandPagedList( | ||||
|             functools.partial(self._fetch_page, claim_id, url), | ||||
|             self._PAGE_SIZE) | ||||
|         result_value = result.get('value') or {} | ||||
|         return self.playlist_result( | ||||
|             entries, claim_id, result_value.get('title'), | ||||
|             result_value.get('description')) | ||||
|   | ||||
| @@ -8,11 +8,15 @@ from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_b64decode, | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     orderedSet, | ||||
|     unescapeHTML, | ||||
|     js_to_json, | ||||
|     parse_duration, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     urlencode_postdata, | ||||
|     urljoin, | ||||
| ) | ||||
| @@ -28,11 +32,15 @@ class LinuxAcademyIE(InfoExtractor): | ||||
|                         ) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154', | ||||
|         'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675', | ||||
|         'info_dict': { | ||||
|             'id': '1498-2', | ||||
|             'id': '7971-2', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Introduction to the Practitioner's Brief", | ||||
|             'title': 'What Is Data Science', | ||||
|             'description': 'md5:c574a3c20607144fb36cb65bdde76c99', | ||||
|             'timestamp': 1607387907, | ||||
|             'upload_date': '20201208', | ||||
|             'duration': 304, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -46,7 +54,8 @@ class LinuxAcademyIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'id': '154', | ||||
|             'title': 'AWS Certified Cloud Practitioner', | ||||
|             'description': 'md5:039db7e60e4aac9cf43630e0a75fa834', | ||||
|             'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c', | ||||
|             'duration': 28835, | ||||
|         }, | ||||
|         'playlist_count': 41, | ||||
|         'skip': 'Requires Linux Academy account credentials', | ||||
| @@ -74,6 +83,7 @@ class LinuxAcademyIE(InfoExtractor): | ||||
|             self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ | ||||
|                 'client_id': self._CLIENT_ID, | ||||
|                 'response_type': 'token id_token', | ||||
|                 'response_mode': 'web_message', | ||||
|                 'redirect_uri': self._ORIGIN_URL, | ||||
|                 'scope': 'openid email user_impersonation profile', | ||||
|                 'audience': self._ORIGIN_URL, | ||||
| @@ -129,7 +139,13 @@ class LinuxAcademyIE(InfoExtractor): | ||||
|  | ||||
|         access_token = self._search_regex( | ||||
|             r'access_token=([^=&]+)', urlh.geturl(), | ||||
|             'access token') | ||||
|             'access token', default=None) | ||||
|         if not access_token: | ||||
|             access_token = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page, | ||||
|                     'authorization response'), None, | ||||
|                 transform_source=js_to_json)['response']['access_token'] | ||||
|  | ||||
|         self._download_webpage( | ||||
|             'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' | ||||
| @@ -144,30 +160,84 @@ class LinuxAcademyIE(InfoExtractor): | ||||
|  | ||||
|         # course path | ||||
|         if course_id: | ||||
|             entries = [ | ||||
|                 self.url_result( | ||||
|                     urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key()) | ||||
|                 for lesson_url in orderedSet(re.findall( | ||||
|                     r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)', | ||||
|                     webpage))] | ||||
|             title = unescapeHTML(self._html_search_regex( | ||||
|                 (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)', | ||||
|                  r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), | ||||
|                 webpage, 'title', default=None, group='value')) | ||||
|             description = unescapeHTML(self._html_search_regex( | ||||
|                 r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', | ||||
|                 webpage, 'description', default=None, group='value')) | ||||
|             return self.playlist_result(entries, course_id, title, description) | ||||
|             module = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'), | ||||
|                 item_id) | ||||
|             entries = [] | ||||
|             chapter_number = None | ||||
|             chapter = None | ||||
|             chapter_id = None | ||||
|             for item in module['items']: | ||||
|                 if not isinstance(item, dict): | ||||
|                     continue | ||||
|  | ||||
|                 def type_field(key): | ||||
|                     return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower() | ||||
|                 type_fields = (type_field('name'), type_field('slug')) | ||||
|                 # Move to next module section | ||||
|                 if 'section' in type_fields: | ||||
|                     chapter = item.get('course_name') | ||||
|                     chapter_id = item.get('course_module') | ||||
|                     chapter_number = 1 if not chapter_number else chapter_number + 1 | ||||
|                     continue | ||||
|                 # Skip non-lessons | ||||
|                 if 'lesson' not in type_fields: | ||||
|                     continue | ||||
|                 lesson_url = urljoin(url, item.get('url')) | ||||
|                 if not lesson_url: | ||||
|                     continue | ||||
|                 title = item.get('title') or item.get('lesson_name') | ||||
|                 description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text')) | ||||
|                 entries.append({ | ||||
|                     '_type': 'url_transparent', | ||||
|                     'url': lesson_url, | ||||
|                     'ie_key': LinuxAcademyIE.ie_key(), | ||||
|                     'title': title, | ||||
|                     'description': description, | ||||
|                     'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')), | ||||
|                     'duration': parse_duration(item.get('duration')), | ||||
|                     'chapter': chapter, | ||||
|                     'chapter_id': chapter_id, | ||||
|                     'chapter_number': chapter_number, | ||||
|                 }) | ||||
|             return { | ||||
|                 '_type': 'playlist', | ||||
|                 'entries': entries, | ||||
|                 'id': course_id, | ||||
|                 'title': module.get('title'), | ||||
|                 'description': module.get('md_desc') or clean_html(module.get('desc')), | ||||
|                 'duration': parse_duration(module.get('duration')), | ||||
|             } | ||||
|  | ||||
|         # single video path | ||||
|         info = self._extract_jwplayer_data( | ||||
|             webpage, item_id, require_title=False, m3u8_id='hls',) | ||||
|         title = self._search_regex( | ||||
|         m3u8_url = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'), | ||||
|             item_id)[0]['file'] | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', | ||||
|             m3u8_id='hls') | ||||
|         self._sort_formats(formats) | ||||
|         info = { | ||||
|             'id': item_id, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         lesson = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 (r'window\.lesson\s*=\s*({.+?})\s*;', | ||||
|                  r'player\.lesson\s*=\s*({.+?})\s*;'), | ||||
|                 webpage, 'lesson', default='{}'), item_id, fatal=False) | ||||
|         if lesson: | ||||
|             info.update({ | ||||
|                 'title': lesson.get('lesson_name'), | ||||
|                 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')), | ||||
|                 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')), | ||||
|                 'duration': parse_duration(lesson.get('duration')), | ||||
|             }) | ||||
|         if not info.get('title'): | ||||
|             info['title'] = self._search_regex( | ||||
|                 (r'>Lecture\s*:\s*(?P<value>[^<]+)', | ||||
|                  r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, | ||||
|                 'title', group='value') | ||||
|         info.update({ | ||||
|             'id': item_id, | ||||
|             'title': title, | ||||
|         }) | ||||
|         return info | ||||
|   | ||||
| @@ -2,12 +2,16 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     url_or_none, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
| @@ -16,6 +20,8 @@ class MDRIE(InfoExtractor): | ||||
|     IE_DESC = 'MDR.DE and KiKA' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' | ||||
|  | ||||
|     _GEO_COUNTRIES = ['DE'] | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # MDR regularly deletes its videos | ||||
|         'url': 'http://www.mdr.de/fakt/video189002.html', | ||||
| @@ -66,6 +72,22 @@ class MDRIE(InfoExtractor): | ||||
|             'duration': 3239, | ||||
|             'uploader': 'MITTELDEUTSCHER RUNDFUNK', | ||||
|         }, | ||||
|     }, { | ||||
|         # empty bitrateVideo and bitrateAudio | ||||
|         'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html', | ||||
|         'info_dict': { | ||||
|             'id': '128372', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Der kleine Wichtel kehrt zurück', | ||||
|             'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a', | ||||
|             'duration': 4876, | ||||
|             'timestamp': 1607823300, | ||||
|             'upload_date': '20201213', | ||||
|             'uploader': 'ZDF', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', | ||||
|         'only_matching': True, | ||||
| @@ -91,10 +113,13 @@ class MDRIE(InfoExtractor): | ||||
|  | ||||
|         title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True) | ||||
|  | ||||
|         type_ = xpath_text(doc, './type', default=None) | ||||
|  | ||||
|         formats = [] | ||||
|         processed_urls = [] | ||||
|         for asset in doc.findall('./assets/asset'): | ||||
|             for source in ( | ||||
|                     'download', | ||||
|                     'progressiveDownload', | ||||
|                     'dynamicHttpStreamingRedirector', | ||||
|                     'adaptiveHttpStreamingRedirector'): | ||||
| @@ -102,63 +127,49 @@ class MDRIE(InfoExtractor): | ||||
|                 if url_el is None: | ||||
|                     continue | ||||
|  | ||||
|                 video_url = url_el.text | ||||
|                 if video_url in processed_urls: | ||||
|                 video_url = url_or_none(url_el.text) | ||||
|                 if not video_url or video_url in processed_urls: | ||||
|                     continue | ||||
|  | ||||
|                 processed_urls.append(video_url) | ||||
|  | ||||
|                 vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) | ||||
|                 abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) | ||||
|  | ||||
|                 ext = determine_ext(url_el.text) | ||||
|                 ext = determine_ext(video_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     url_formats = self._extract_m3u8_formats( | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         video_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                         preference=0, m3u8_id='HLS', fatal=False) | ||||
|                         preference=0, m3u8_id='HLS', fatal=False)) | ||||
|                 elif ext == 'f4m': | ||||
|                     url_formats = self._extract_f4m_formats( | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, | ||||
|                         preference=0, f4m_id='HDS', fatal=False) | ||||
|                         preference=0, f4m_id='HDS', fatal=False)) | ||||
|                 else: | ||||
|                     media_type = xpath_text(asset, './mediaType', 'media type', default='MP4') | ||||
|                     vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) | ||||
|                     abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) | ||||
|                     filesize = int_or_none(xpath_text(asset, './fileSize', 'file size')) | ||||
|  | ||||
|                     format_id = [media_type] | ||||
|                     if vbr or abr: | ||||
|                         format_id.append(compat_str(vbr or abr)) | ||||
|  | ||||
|                     f = { | ||||
|                         'url': video_url, | ||||
|                         'format_id': '%s-%d' % (media_type, vbr or abr), | ||||
|                         'format_id': '-'.join(format_id), | ||||
|                         'filesize': filesize, | ||||
|                         'abr': abr, | ||||
|                         'preference': 1, | ||||
|                         'vbr': vbr, | ||||
|                     } | ||||
|  | ||||
|                     if vbr: | ||||
|                         width = int_or_none(xpath_text(asset, './frameWidth', 'width')) | ||||
|                         height = int_or_none(xpath_text(asset, './frameHeight', 'height')) | ||||
|                         f.update({ | ||||
|                             'vbr': vbr, | ||||
|                             'width': width, | ||||
|                             'height': height, | ||||
|                             'width': int_or_none(xpath_text(asset, './frameWidth', 'width')), | ||||
|                             'height': int_or_none(xpath_text(asset, './frameHeight', 'height')), | ||||
|                         }) | ||||
|  | ||||
|                     url_formats = [f] | ||||
|                     if type_ == 'audio': | ||||
|                         f['vcodec'] = 'none' | ||||
|  | ||||
|                 if not url_formats: | ||||
|                     continue | ||||
|  | ||||
|                 if not vbr: | ||||
|                     for f in url_formats: | ||||
|                         abr = f.get('tbr') or abr | ||||
|                         if 'tbr' in f: | ||||
|                             del f['tbr'] | ||||
|                         f.update({ | ||||
|                             'abr': abr, | ||||
|                             'vcodec': 'none', | ||||
|                         }) | ||||
|  | ||||
|                 formats.extend(url_formats) | ||||
|                     formats.append(f) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -1,15 +1,14 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .telecinco import TelecincoIE | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     smuggle_url, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MiTeleIE(InfoExtractor): | ||||
| class MiTeleIE(TelecincoIE): | ||||
|     IE_DESC = 'mitele.es' | ||||
|     _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' | ||||
|  | ||||
| @@ -31,7 +30,6 @@ class MiTeleIE(InfoExtractor): | ||||
|             'timestamp': 1471209401, | ||||
|             'upload_date': '20160814', | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         # no explicit title | ||||
|         'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', | ||||
| @@ -54,7 +52,6 @@ class MiTeleIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', | ||||
|         'only_matching': True, | ||||
| @@ -70,16 +67,11 @@ class MiTeleIE(InfoExtractor): | ||||
|             r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})', | ||||
|             webpage, 'Pre Player'), display_id)['prePlayer'] | ||||
|         title = pre_player['title'] | ||||
|         video = pre_player['video'] | ||||
|         video_id = video['dataMediaId'] | ||||
|         video_info = self._parse_content(pre_player['video'], url) | ||||
|         content = pre_player.get('content') or {} | ||||
|         info = content.get('info') or {} | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             # for some reason only HLS is supported | ||||
|             'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}), | ||||
|             'id': video_id, | ||||
|         video_info.update({ | ||||
|             'title': title, | ||||
|             'description': info.get('synopsis'), | ||||
|             'series': content.get('title'), | ||||
| @@ -87,7 +79,7 @@ class MiTeleIE(InfoExtractor): | ||||
|             'episode': content.get('subtitle'), | ||||
|             'episode_number': int_or_none(info.get('episode_number')), | ||||
|             'duration': int_or_none(info.get('duration')), | ||||
|             'thumbnail': video.get('dataPoster'), | ||||
|             'age_limit': int_or_none(info.get('rating')), | ||||
|             'timestamp': parse_iso8601(pre_player.get('publishedTime')), | ||||
|         } | ||||
|         }) | ||||
|         return video_info | ||||
|   | ||||
| @@ -251,8 +251,13 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE): | ||||
|                 cloudcast_url = cloudcast.get('url') | ||||
|                 if not cloudcast_url: | ||||
|                     continue | ||||
|                 video_id = cloudcast.get('slug') | ||||
|                 if video_id: | ||||
|                     owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str) | ||||
|                     if owner_username: | ||||
|                         video_id = '%s_%s' % (owner_username, video_id) | ||||
|                 entries.append(self.url_result( | ||||
|                     cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug'))) | ||||
|                     cloudcast_url, MixcloudIE.ie_key(), video_id)) | ||||
|  | ||||
|             page_info = items['pageInfo'] | ||||
|             has_next_page = page_info['hasNextPage'] | ||||
| @@ -321,7 +326,8 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): | ||||
|     _DESCRIPTION_KEY = 'biog' | ||||
|     _ROOT_TYPE = 'user' | ||||
|     _NODE_TEMPLATE = '''slug | ||||
|           url''' | ||||
|           url | ||||
|           owner { username }''' | ||||
|  | ||||
|     def _get_playlist_title(self, title, slug): | ||||
|         return '%s (%s)' % (title, slug) | ||||
| @@ -345,6 +351,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): | ||||
|     _NODE_TEMPLATE = '''cloudcast { | ||||
|             slug | ||||
|             url | ||||
|             owner { username } | ||||
|           }''' | ||||
|  | ||||
|     def _get_cloudcast(self, node): | ||||
|   | ||||
| @@ -61,6 +61,23 @@ class MotherlessIE(InfoExtractor): | ||||
|         # no keywords | ||||
|         'url': 'http://motherless.com/8B4BBC1', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # see https://motherless.com/videos/recent for recent videos with | ||||
|         # uploaded date in "ago" format | ||||
|         'url': 'https://motherless.com/3C3E2CF', | ||||
|         'info_dict': { | ||||
|             'id': '3C3E2CF', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'a/ Hot Teens', | ||||
|             'categories': list, | ||||
|             'upload_date': '20210104', | ||||
|             'uploader_id': 'yonbiw', | ||||
|             'thumbnail': r're:https?://.*\.jpg', | ||||
|             'age_limit': 18, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -85,20 +102,28 @@ class MotherlessIE(InfoExtractor): | ||||
|             or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id) | ||||
|         age_limit = self._rta_search(webpage) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             (r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'), | ||||
|             (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'), | ||||
|             webpage, 'view count', fatal=False)) | ||||
|         like_count = str_to_int(self._html_search_regex( | ||||
|             (r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'), | ||||
|             (r'>([\d,.]+)\s+Favorites<', | ||||
|              r'<strong>Favorited</strong>\s+([^<]+)<'), | ||||
|             webpage, 'like count', fatal=False)) | ||||
|  | ||||
|         upload_date = self._html_search_regex( | ||||
|             (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', | ||||
|              r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date') | ||||
|         if 'Ago' in upload_date: | ||||
|             days = int(re.search(r'([0-9]+)', upload_date).group(1)) | ||||
|             upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') | ||||
|         else: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage, | ||||
|             'upload date', default=None)) | ||||
|         if not upload_date: | ||||
|             uploaded_ago = self._search_regex( | ||||
|                 r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago', | ||||
|                 default=None) | ||||
|             if uploaded_ago: | ||||
|                 delta = int(uploaded_ago[:-1]) | ||||
|                 _AGO_UNITS = { | ||||
|                     'h': 'hours', | ||||
|                     'd': 'days', | ||||
|                 } | ||||
|                 kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} | ||||
|                 upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d') | ||||
|  | ||||
|         comment_count = webpage.count('class="media-comment-contents"') | ||||
|         uploader_id = self._html_search_regex( | ||||
|   | ||||
| @@ -5,33 +5,137 @@ import re | ||||
|  | ||||
| from .turner import TurnerBaseIE | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urlparse, | ||||
|     compat_parse_qs, | ||||
|     compat_str, | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     merge_dicts, | ||||
|     OnDemandPagedList, | ||||
|     remove_start, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
|     update_url_query, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NBAIE(TurnerBaseIE): | ||||
|     _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$' | ||||
| class NBACVPBaseIE(TurnerBaseIE): | ||||
|     def _extract_nba_cvp_info(self, path, video_id, fatal=False): | ||||
|         return self._extract_cvp_info( | ||||
|             'http://secure.nba.com/%s' % path, video_id, { | ||||
|                 'default': { | ||||
|                     'media_src': 'http://nba.cdn.turner.com/nba/big', | ||||
|                 }, | ||||
|                 'm3u8': { | ||||
|                     'media_src': 'http://nbavod-f.akamaihd.net', | ||||
|                 }, | ||||
|             }, fatal=fatal) | ||||
|  | ||||
|  | ||||
| class NBAWatchBaseIE(NBACVPBaseIE): | ||||
|     _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/' | ||||
|  | ||||
|     def _extract_video(self, filter_key, filter_value): | ||||
|         video = self._download_json( | ||||
|             'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch', | ||||
|             filter_value, query={ | ||||
|                 'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName', | ||||
|                 'q': filter_key + ':' + filter_value, | ||||
|                 'wt': 'json', | ||||
|             })['response']['docs'][0] | ||||
|  | ||||
|         video_id = str(video['pid']) | ||||
|         title = video['name'] | ||||
|  | ||||
|         formats = [] | ||||
|         m3u8_url = (self._download_json( | ||||
|             'https://watch.nba.com/service/publishpoint', video_id, query={ | ||||
|                 'type': 'video', | ||||
|                 'format': 'json', | ||||
|                 'id': video_id, | ||||
|             }, headers={ | ||||
|                 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1', | ||||
|             }, fatal=False) or {}).get('path') | ||||
|         if m3u8_url: | ||||
|             m3u8_formats = self._extract_m3u8_formats( | ||||
|                 re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4', | ||||
|                 'm3u8_native', m3u8_id='hls', fatal=False) | ||||
|             formats.extend(m3u8_formats) | ||||
|             for f in m3u8_formats: | ||||
|                 http_f = f.copy() | ||||
|                 http_f.update({ | ||||
|                     'format_id': http_f['format_id'].replace('hls-', 'http-'), | ||||
|                     'protocol': 'http', | ||||
|                     'url': http_f['url'].replace('.m3u8', ''), | ||||
|                 }) | ||||
|                 formats.append(http_f) | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')), | ||||
|             'description': video.get('description'), | ||||
|             'duration': int_or_none(video.get('runtime')), | ||||
|             'timestamp': parse_iso8601(video.get('releaseDate')), | ||||
|             'tags': video.get('tags'), | ||||
|         } | ||||
|  | ||||
|         seo_name = video.get('seoName') | ||||
|         if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name): | ||||
|             base_path = '' | ||||
|             if seo_name.startswith('teams/'): | ||||
|                 base_path += seo_name.split('/')[1] + '/' | ||||
|             base_path += 'video/' | ||||
|             cvp_info = self._extract_nba_cvp_info( | ||||
|                 base_path + seo_name + '.xml', video_id, False) | ||||
|             if cvp_info: | ||||
|                 formats.extend(cvp_info['formats']) | ||||
|                 info = merge_dicts(info, cvp_info) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|         info['formats'] = formats | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class NBAWatchEmbedIE(NBAWatchBaseIE): | ||||
|     IENAME = 'nba:watch:embed' | ||||
|     _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://watch.nba.com/embed?id=659395', | ||||
|         'md5': 'b7e3f9946595f4ca0a13903ce5edd120', | ||||
|         'info_dict': { | ||||
|             'id': '659395', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Mix clip: More than 7 points of  Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017', | ||||
|             'description': 'Mix clip: More than 7 points of  Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017', | ||||
|             'timestamp': 1492228800, | ||||
|             'upload_date': '20170415', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self._extract_video('pid', video_id) | ||||
|  | ||||
|  | ||||
| class NBAWatchIE(NBAWatchBaseIE): | ||||
|     IE_NAME = 'nba:watch' | ||||
|     _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', | ||||
|         'md5': '9e7729d3010a9c71506fd1248f74e4f4', | ||||
|         'md5': '9d902940d2a127af3f7f9d2f3dc79c96', | ||||
|         'info_dict': { | ||||
|             'id': '0021200253-okc-bkn-recap', | ||||
|             'id': '70946', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Thunder vs. Nets', | ||||
|             'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', | ||||
|             'duration': 181, | ||||
|             'timestamp': 1354638466, | ||||
|             'timestamp': 1354597200, | ||||
|             'upload_date': '20121204', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', | ||||
|         'only_matching': True, | ||||
| @@ -39,116 +143,286 @@ class NBAIE(TurnerBaseIE): | ||||
|         'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', | ||||
|         'md5': 'b2b39b81cf28615ae0c3360a3f9668c4', | ||||
|         'info_dict': { | ||||
|             'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', | ||||
|             'id': '330865', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hawks vs. Cavaliers Game 1', | ||||
|             'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d', | ||||
|             'duration': 228, | ||||
|             'timestamp': 1432134543, | ||||
|             'upload_date': '20150520', | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|     }, { | ||||
|         'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake', | ||||
|         'info_dict': { | ||||
|             'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Practice: Doc Rivers - 2/16/16', | ||||
|             'description': 'Head Coach Doc Rivers addresses the media following practice.', | ||||
|             'upload_date': '20160216', | ||||
|             'timestamp': 1455672000, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|     }, { | ||||
|         'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#', | ||||
|         'info_dict': { | ||||
|             'id': 'timberwolves', | ||||
|             'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins', | ||||
|         }, | ||||
|         'playlist_count': 30, | ||||
|         'params': { | ||||
|             # Download the whole playlist takes too long time | ||||
|             'playlist_items': '1-30', | ||||
|             'timestamp': 1432094400, | ||||
|             'upload_date': '20150521', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#', | ||||
|         'info_dict': { | ||||
|             'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins', | ||||
|             'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.', | ||||
|             'upload_date': '20141212', | ||||
|             'timestamp': 1418418600, | ||||
|         }, | ||||
|         'params': { | ||||
|             'noplaylist': True, | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|         'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # only CVP mp4 format available | ||||
|         'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _PAGE_SIZE = 30 | ||||
|  | ||||
|     def _fetch_page(self, team, video_id, page): | ||||
|         search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({ | ||||
|             'type': 'teamvideo', | ||||
|             'start': page * self._PAGE_SIZE + 1, | ||||
|             'npp': (page + 1) * self._PAGE_SIZE + 1, | ||||
|             'sort': 'recent', | ||||
|             'output': 'json', | ||||
|             'site': team, | ||||
|         }) | ||||
|         results = self._download_json( | ||||
|             search_url, video_id, note='Download page %d of playlist data' % page)['results'][0] | ||||
|         for item in results: | ||||
|             yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url'])) | ||||
|  | ||||
|     def _extract_playlist(self, orig_path, video_id, webpage): | ||||
|         team = orig_path.split('/')[0] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0] | ||||
|         if collection_id: | ||||
|             if self._downloader.params.get('noplaylist'): | ||||
|             self.to_screen('Downloading just video because of --no-playlist') | ||||
|             video_path = self._search_regex( | ||||
|                 r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path') | ||||
|             video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path) | ||||
|             return self.url_result(video_url) | ||||
|                 self.to_screen('Downloading just video %s because of --no-playlist' % display_id) | ||||
|             else: | ||||
|                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id) | ||||
|                 return self.url_result( | ||||
|                     'https://www.nba.com/watch/list/collection/' + collection_id, | ||||
|                     NBAWatchCollectionIE.ie_key(), collection_id) | ||||
|         return self._extract_video('seoName', display_id) | ||||
|  | ||||
|         self.to_screen('Downloading playlist - add --no-playlist to just download video') | ||||
|         playlist_title = self._og_search_title(webpage, fatal=False) | ||||
|         entries = OnDemandPagedList( | ||||
|             functools.partial(self._fetch_page, team, video_id), | ||||
|             self._PAGE_SIZE) | ||||
|  | ||||
|         return self.playlist_result(entries, team, playlist_title) | ||||
| class NBAWatchCollectionIE(NBAWatchBaseIE): | ||||
|     IE_NAME = 'nba:watch:collection' | ||||
|     _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://watch.nba.com/list/collection/season-preview-2020', | ||||
|         'info_dict': { | ||||
|             'id': 'season-preview-2020', | ||||
|         }, | ||||
|         'playlist_mincount': 43, | ||||
|     }] | ||||
|     _PAGE_SIZE = 100 | ||||
|  | ||||
|     def _fetch_page(self, collection_id, page): | ||||
|         page += 1 | ||||
|         videos = self._download_json( | ||||
|             'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id, | ||||
|             collection_id, 'Downloading page %d JSON metadata' % page, query={ | ||||
|                 'count': self._PAGE_SIZE, | ||||
|                 'page': page, | ||||
|             })['results']['videos'] | ||||
|         for video in videos: | ||||
|             program = video.get('program') or {} | ||||
|             seo_name = program.get('seoName') or program.get('slug') | ||||
|             if not seo_name: | ||||
|                 continue | ||||
|             yield { | ||||
|                 '_type': 'url', | ||||
|                 'id': program.get('id'), | ||||
|                 'title': program.get('title') or video.get('title'), | ||||
|                 'url': 'https://www.nba.com/watch/video/' + seo_name, | ||||
|                 'thumbnail': video.get('image'), | ||||
|                 'description': program.get('description') or video.get('description'), | ||||
|                 'duration': parse_duration(program.get('runtimeHours')), | ||||
|                 'timestamp': parse_iso8601(video.get('releaseDate')), | ||||
|             } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         path, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         orig_path = path | ||||
|         if path.startswith('nba/'): | ||||
|             path = path[3:] | ||||
|         collection_id = self._match_id(url) | ||||
|         entries = OnDemandPagedList( | ||||
|             functools.partial(self._fetch_page, collection_id), | ||||
|             self._PAGE_SIZE) | ||||
|         return self.playlist_result(entries, collection_id) | ||||
|  | ||||
|         if 'video/' not in path: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|             path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/') | ||||
|  | ||||
|             if path == '{{id}}': | ||||
|                 return self._extract_playlist(orig_path, video_id, webpage) | ||||
| class NBABaseIE(NBACVPBaseIE): | ||||
|     _VALID_URL_BASE = r'''(?x) | ||||
|         https?://(?:www\.)?nba\.com/ | ||||
|             (?P<team> | ||||
|                 blazers| | ||||
|                 bucks| | ||||
|                 bulls| | ||||
|                 cavaliers| | ||||
|                 celtics| | ||||
|                 clippers| | ||||
|                 grizzlies| | ||||
|                 hawks| | ||||
|                 heat| | ||||
|                 hornets| | ||||
|                 jazz| | ||||
|                 kings| | ||||
|                 knicks| | ||||
|                 lakers| | ||||
|                 magic| | ||||
|                 mavericks| | ||||
|                 nets| | ||||
|                 nuggets| | ||||
|                 pacers| | ||||
|                 pelicans| | ||||
|                 pistons| | ||||
|                 raptors| | ||||
|                 rockets| | ||||
|                 sixers| | ||||
|                 spurs| | ||||
|                 suns| | ||||
|                 thunder| | ||||
|                 timberwolves| | ||||
|                 warriors| | ||||
|                 wizards | ||||
|             ) | ||||
|         (?:/play\#)?/''' | ||||
|     _CHANNEL_PATH_REGEX = r'video/channel|series' | ||||
|  | ||||
|             # See prepareContentId() of pkgCvp.js | ||||
|             if path.startswith('video/teams'): | ||||
|                 path = 'video/channels/proxy/' + path[6:] | ||||
|     def _embed_url_result(self, team, content_id): | ||||
|         return self.url_result(update_url_query( | ||||
|             'https://secure.nba.com/assets/amp/include/video/iframe.html', { | ||||
|                 'contentId': content_id, | ||||
|                 'team': team, | ||||
|             }), NBAEmbedIE.ie_key()) | ||||
|  | ||||
|         return self._extract_cvp_info( | ||||
|             'http://www.nba.com/%s.xml' % path, video_id, { | ||||
|                 'default': { | ||||
|                     'media_src': 'http://nba.cdn.turner.com/nba/big', | ||||
|                 }, | ||||
|                 'm3u8': { | ||||
|                     'media_src': 'http://nbavod-f.akamaihd.net', | ||||
|                 }, | ||||
|     def _call_api(self, team, content_id, query, resource): | ||||
|         return self._download_json( | ||||
|             'https://api.nba.net/2/%s/video,imported_video,wsc/' % team, | ||||
|             content_id, 'Download %s JSON metadata' % resource, | ||||
|             query=query, headers={ | ||||
|                 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b', | ||||
|             })['response']['result'] | ||||
|  | ||||
|     def _extract_video(self, video, team, extract_all=True): | ||||
|         video_id = compat_str(video['nid']) | ||||
|         team = video['brand'] | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': video.get('title') or video.get('headline') or video['shortHeadline'], | ||||
|             'description': video.get('description'), | ||||
|             'timestamp': parse_iso8601(video.get('published')), | ||||
|         } | ||||
|  | ||||
|         subtitles = {} | ||||
|         captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {} | ||||
|         for caption_url in captions.values(): | ||||
|             subtitles.setdefault('en', []).append({'url': caption_url}) | ||||
|  | ||||
|         formats = [] | ||||
|         mp4_url = video.get('mp4') | ||||
|         if mp4_url: | ||||
|             formats.append({ | ||||
|                 'url': mp4_url, | ||||
|             }) | ||||
|  | ||||
|         if extract_all: | ||||
|             source_url = video.get('videoSource') | ||||
|             if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'): | ||||
|                 formats.append({ | ||||
|                     'format_id': 'source', | ||||
|                     'url': source_url, | ||||
|                     'preference': 1, | ||||
|                 }) | ||||
|  | ||||
|             m3u8_url = video.get('m3u8') | ||||
|             if m3u8_url: | ||||
|                 if '.akamaihd.net/i/' in m3u8_url: | ||||
|                     formats.extend(self._extract_akamai_formats( | ||||
|                         m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'})) | ||||
|                 else: | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         m3u8_url, video_id, 'mp4', | ||||
|                         'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|             content_xml = video.get('contentXml') | ||||
|             if team and content_xml: | ||||
|                 cvp_info = self._extract_nba_cvp_info( | ||||
|                     team + content_xml, video_id, fatal=False) | ||||
|                 if cvp_info: | ||||
|                     formats.extend(cvp_info['formats']) | ||||
|                     subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles']) | ||||
|                     info = merge_dicts(info, cvp_info) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|         else: | ||||
|             info.update(self._embed_url_result(team, video['videoId'])) | ||||
|  | ||||
|         info.update({ | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         }) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         team, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         if '/play#/' in url: | ||||
|             display_id = compat_urllib_parse_unquote(display_id) | ||||
|         else: | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             display_id = self._search_regex( | ||||
|                 self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id') | ||||
|         return self._extract_url_results(team, display_id) | ||||
|  | ||||
|  | ||||
| class NBAEmbedIE(NBABaseIE): | ||||
|     IENAME = 'nba:embed' | ||||
|     _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) | ||||
|         content_id = qs['contentId'][0] | ||||
|         team = qs.get('team', [None])[0] | ||||
|         if not team: | ||||
|             return self.url_result( | ||||
|                 'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key()) | ||||
|         video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0] | ||||
|         return self._extract_video(video, team) | ||||
|  | ||||
|  | ||||
| class NBAIE(NBABaseIE): | ||||
|     IENAME = 'nba' | ||||
|     _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774', | ||||
|         'info_dict': { | ||||
|             'id': '45039', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'AND WE BACK.', | ||||
|             'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.', | ||||
|             'duration': 94, | ||||
|             'timestamp': 1607112000, | ||||
|             'upload_date': '20201218', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _CONTENT_ID_REGEX = r'videoID' | ||||
|  | ||||
|     def _extract_url_results(self, team, content_id): | ||||
|         return self._embed_url_result(team, content_id) | ||||
|  | ||||
|  | ||||
| class NBAChannelIE(NBABaseIE): | ||||
|     IENAME = 'nba:channel' | ||||
|     _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.nba.com/blazers/video/channel/summer_league', | ||||
|         'info_dict': { | ||||
|             'title': 'Summer League', | ||||
|         }, | ||||
|         'playlist_mincount': 138, | ||||
|     }, { | ||||
|         'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _CONTENT_ID_REGEX = r'videoSubCategory' | ||||
|     _PAGE_SIZE = 100 | ||||
|  | ||||
|     def _fetch_page(self, team, channel, page): | ||||
|         results = self._call_api(team, channel, { | ||||
|             'channels': channel, | ||||
|             'count': self._PAGE_SIZE, | ||||
|             'offset': page * self._PAGE_SIZE, | ||||
|         }, 'page %d' % (page + 1)) | ||||
|         for video in results: | ||||
|             yield self._extract_video(video, team, False) | ||||
|  | ||||
|     def _extract_url_results(self, team, content_id): | ||||
|         entries = OnDemandPagedList( | ||||
|             functools.partial(self._fetch_page, team, content_id), | ||||
|             self._PAGE_SIZE) | ||||
|         return self.playlist_result(entries, playlist_title=content_id) | ||||
|   | ||||
| @@ -158,7 +158,8 @@ class NBCIE(AdobePassIE): | ||||
|  | ||||
|  | ||||
| class NBCSportsVPlayerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' | ||||
|     _VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/' | ||||
|     _VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI', | ||||
| @@ -174,12 +175,15 @@ class NBCSportsVPlayerIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         iframe_m = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) | ||||
|             r'<(?:iframe[^>]+|div[^>]+data-(?:mpx-)?)src="(?P<url>%s[^"]+)"' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage) | ||||
|         if iframe_m: | ||||
|             return iframe_m.group('url') | ||||
|  | ||||
| @@ -192,21 +196,29 @@ class NBCSportsVPlayerIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class NBCSportsIE(InfoExtractor): | ||||
|     # Does not include https because its certificate is invalid | ||||
|     _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         # iframe src | ||||
|         'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', | ||||
|         'info_dict': { | ||||
|             'id': 'PHJSaFWbrTY9', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', | ||||
|             'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', | ||||
|             'uploader': 'NBCU-SPORTS', | ||||
|             'upload_date': '20150330', | ||||
|             'timestamp': 1427726529, | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         # data-mpx-src | ||||
|         'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # data-src | ||||
|         'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -274,33 +286,6 @@ class NBCSportsStreamIE(AdobePassIE): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CSNNEIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', | ||||
|         'info_dict': { | ||||
|             'id': 'yvBLLUgQ8WU0', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.', | ||||
|             'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3', | ||||
|             'timestamp': 1459369979, | ||||
|             'upload_date': '20160330', | ||||
|             'uploader': 'NBCU-SPORTS', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': 'ThePlatform', | ||||
|             'url': self._html_search_meta('twitter:player:stream', webpage), | ||||
|             'display_id': display_id, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NBCNewsIE(ThePlatformIE): | ||||
|     _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)' | ||||
|  | ||||
|   | ||||
| @@ -4,19 +4,15 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     remove_end, | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     get_element_by_class, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NFLIE(InfoExtractor): | ||||
|     IE_NAME = 'nfl.com' | ||||
|     _VALID_URL = r'''(?x) | ||||
| class NFLBaseIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?P<host> | ||||
|                             (?:www\.)? | ||||
| @@ -34,15 +30,15 @@ class NFLIE(InfoExtractor): | ||||
|                                     houstontexans| | ||||
|                                     colts| | ||||
|                                     jaguars| | ||||
|                                     titansonline| | ||||
|                                     (?:titansonline|tennesseetitans)| | ||||
|                                     denverbroncos| | ||||
|                                     kcchiefs| | ||||
|                                     (?:kc)?chiefs| | ||||
|                                     raiders| | ||||
|                                     chargers| | ||||
|                                     dallascowboys| | ||||
|                                     giants| | ||||
|                                     philadelphiaeagles| | ||||
|                                     redskins| | ||||
|                                     (?:redskins|washingtonfootball)| | ||||
|                                     chicagobears| | ||||
|                                     detroitlions| | ||||
|                                     packers| | ||||
| @@ -52,180 +48,113 @@ class NFLIE(InfoExtractor): | ||||
|                                     neworleanssaints| | ||||
|                                     buccaneers| | ||||
|                                     azcardinals| | ||||
|                                     stlouisrams| | ||||
|                                     (?:stlouis|the)rams| | ||||
|                                     49ers| | ||||
|                                     seahawks | ||||
|                                 )\.com| | ||||
|                                 .+?\.clubs\.nfl\.com | ||||
|                             ) | ||||
|                         )/ | ||||
|                         (?:.+?/)* | ||||
|                         (?P<id>[^/#?&]+) | ||||
|                     ''' | ||||
|     _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+})' | ||||
|     _WORKING = False | ||||
|  | ||||
|     def _parse_video_config(self, video_config, display_id): | ||||
|         video_config = self._parse_json(video_config, display_id) | ||||
|         item = video_config['playlist'][0] | ||||
|         mcp_id = item.get('mcpID') | ||||
|         if mcp_id: | ||||
|             info = self.url_result( | ||||
|                 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + mcp_id, | ||||
|                 'Anvato', mcp_id) | ||||
|         else: | ||||
|             media_id = item.get('id') or item['entityId'] | ||||
|             title = item['title'] | ||||
|             item_url = item['url'] | ||||
|             info = {'id': media_id} | ||||
|             ext = determine_ext(item_url) | ||||
|             if ext == 'm3u8': | ||||
|                 info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4') | ||||
|                 self._sort_formats(info['formats']) | ||||
|             else: | ||||
|                 info['url'] = item_url | ||||
|                 if item.get('audio') is True: | ||||
|                     info['vcodec'] = 'none' | ||||
|             is_live = video_config.get('live') is True | ||||
|             thumbnails = None | ||||
|             image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage')) | ||||
|             if image_url: | ||||
|                 thumbnails = [{ | ||||
|                     'url': image_url, | ||||
|                     'ext': determine_ext(image_url, 'jpg'), | ||||
|                 }] | ||||
|             info.update({ | ||||
|                 'title': self._live_title(title) if is_live else title, | ||||
|                 'is_live': is_live, | ||||
|                 'description': clean_html(item.get('description')), | ||||
|                 'thumbnails': thumbnails, | ||||
|             }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class NFLIE(NFLBaseIE): | ||||
|     IE_NAME = 'nfl.com' | ||||
|     _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', | ||||
|         'md5': '394ef771ddcd1354f665b471d78ec4c6', | ||||
|         'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14', | ||||
|         'info_dict': { | ||||
|             'id': '0ap3000000398478', | ||||
|             'id': '899441', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Week 3: Redskins vs. Eagles highlights', | ||||
|             'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', | ||||
|             'upload_date': '20140921', | ||||
|             'timestamp': 1411337580, | ||||
|             'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14", | ||||
|             'description': 'md5:85e05a3cc163f8c344340f220521136d', | ||||
|             'upload_date': '20201215', | ||||
|             'timestamp': 1608009755, | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'NFL', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', | ||||
|         'md5': 'cf85bdb4bc49f6e9d3816d130c78279c', | ||||
|         'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown', | ||||
|         'md5': '6886b32c24b463038c760ceb55a34566', | ||||
|         'info_dict': { | ||||
|             'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'LIVE: Post Game vs. Browns', | ||||
|             'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', | ||||
|             'upload_date': '20131229', | ||||
|             'timestamp': 1388354455, | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown', | ||||
|             'description': 'md5:12ada8ee70e6762658c30e223e095075', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish', | ||||
|         'info_dict': { | ||||
|             'id': '0ap3000000467607', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Frustrations flare on the field', | ||||
|             'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.', | ||||
|             'timestamp': 1422850320, | ||||
|             'upload_date': '20150202', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette', | ||||
|         'md5': '4c319e2f625ffd0b481b4382c6fc124c', | ||||
|         'info_dict': { | ||||
|             'id': 'n-238346', | ||||
|             'ext': 'mp4', | ||||
|             'title': '10 Days at Gillette', | ||||
|             'description': 'md5:8cd9cd48fac16de596eadc0b24add951', | ||||
|             'timestamp': 1442618809, | ||||
|             'upload_date': '20150918', | ||||
|         }, | ||||
|     }, { | ||||
|         # lowercase data-contentid | ||||
|         'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7', | ||||
|         'info_dict': { | ||||
|             'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tomlin looks ahead to Ravens on a short week', | ||||
|             'description': 'md5:32f3f7b139f43913181d5cbb24ecad75', | ||||
|             'timestamp': 1443459651, | ||||
|             'upload_date': '20150928', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood', | ||||
|         'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a', | ||||
|         'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def prepend_host(host, url): | ||||
|         if not url.startswith('http'): | ||||
|             if not url.startswith('/'): | ||||
|                 url = '/%s' % url | ||||
|             url = 'http://{0:}{1:}'.format(host, url) | ||||
|         return url | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         return self._parse_video_config(self._search_regex( | ||||
|             self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_from_stream(stream, protocol, host, path_prefix='', | ||||
|                            preference=0, note=None): | ||||
|         url = '{protocol:}://{host:}/{prefix:}{path:}'.format( | ||||
|             protocol=protocol, | ||||
|             host=host, | ||||
|             prefix=path_prefix, | ||||
|             path=stream.get('path'), | ||||
|         ) | ||||
|         return { | ||||
|             'url': url, | ||||
|             'vbr': int_or_none(stream.get('rate', 0), 1000), | ||||
|             'preference': preference, | ||||
|             'format_note': note, | ||||
|  | ||||
| class NFLArticleIE(NFLBaseIE): | ||||
|     IE_NAME = 'nfl.com:article' | ||||
|     _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e', | ||||
|         'info_dict': { | ||||
|             'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e', | ||||
|             'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations", | ||||
|         }, | ||||
|         'playlist_count': 4, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id, host = mobj.group('id'), mobj.group('host') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         config_url = NFLIE.prepend_host(host, self._search_regex( | ||||
|             r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1', | ||||
|             webpage, 'config URL', default='static/content/static/config/video/config.json', | ||||
|             group='config')) | ||||
|         # For articles, the id in the url is not the video id | ||||
|         video_id = self._search_regex( | ||||
|             r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1', | ||||
|             webpage, 'video id', default=video_id, group='id') | ||||
|         config = self._download_json(config_url, video_id, 'Downloading player config') | ||||
|         url_template = NFLIE.prepend_host( | ||||
|             host, '{contentURLTemplate:}'.format(**config)) | ||||
|         video_data = self._download_json( | ||||
|             url_template.format(id=video_id), video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         cdn_data = video_data.get('cdnData', {}) | ||||
|         streams = cdn_data.get('bitrateInfo', []) | ||||
|         if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM': | ||||
|             parts = compat_urllib_parse_urlparse(cdn_data.get('uri')) | ||||
|             protocol, host = parts.scheme, parts.netloc | ||||
|             for stream in streams: | ||||
|                 formats.append( | ||||
|                     NFLIE.format_from_stream(stream, protocol, host)) | ||||
|         else: | ||||
|             cdns = config.get('cdns') | ||||
|             if not cdns: | ||||
|                 raise ExtractorError('Failed to get CDN data', expected=True) | ||||
|  | ||||
|             for name, cdn in cdns.items(): | ||||
|                 # LimeLight streams don't seem to work | ||||
|                 if cdn.get('name') == 'LIMELIGHT': | ||||
|                     continue | ||||
|  | ||||
|                 protocol = cdn.get('protocol') | ||||
|                 host = remove_end(cdn.get('host', ''), '/') | ||||
|                 if not (protocol and host): | ||||
|                     continue | ||||
|  | ||||
|                 prefix = cdn.get('pathprefix', '') | ||||
|                 if prefix and not prefix.endswith('/'): | ||||
|                     prefix = '%s/' % prefix | ||||
|  | ||||
|                 preference = 0 | ||||
|                 if protocol == 'rtmp': | ||||
|                     preference = -2 | ||||
|                 elif 'prog' in name.lower(): | ||||
|                     preference = 1 | ||||
|  | ||||
|                 for stream in streams: | ||||
|                     formats.append( | ||||
|                         NFLIE.format_from_stream(stream, protocol, host, | ||||
|                                                  prefix, preference, name)) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = None | ||||
|         for q in ('xl', 'l', 'm', 's', 'xs'): | ||||
|             thumbnail = video_data.get('imagePaths', {}).get(q) | ||||
|             if thumbnail: | ||||
|                 break | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_data.get('headline'), | ||||
|             'formats': formats, | ||||
|             'description': video_data.get('caption'), | ||||
|             'duration': video_data.get('duration'), | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': int_or_none(video_data.get('posted'), 1000), | ||||
|         } | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         entries = [] | ||||
|         for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage): | ||||
|             entries.append(self._parse_video_config(video_config, display_id)) | ||||
|         title = clean_html(get_element_by_class( | ||||
|             'nfl-c-article__title', webpage)) or self._html_search_meta( | ||||
|             ['og:title', 'twitter:title'], webpage) | ||||
|         return self.playlist_result(entries, display_id, title) | ||||
|   | ||||
| @@ -3,16 +3,94 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import urljoin | ||||
|  | ||||
|  | ||||
| class NhkVodIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)' | ||||
| class NhkBaseIE(InfoExtractor): | ||||
|     _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json' | ||||
|     _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand' | ||||
|     _TYPE_REGEX = r'/(?P<type>video|audio)/' | ||||
|  | ||||
|     def _call_api(self, m_id, lang, is_video, is_episode, is_clip): | ||||
|         return self._download_json( | ||||
|             self._API_URL_TEMPLATE % ( | ||||
|                 'v' if is_video else 'r', | ||||
|                 'clip' if is_clip else 'esd', | ||||
|                 'episode' if is_episode else 'program', | ||||
|                 m_id, lang, '/all' if is_video else ''), | ||||
|             m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] | ||||
|  | ||||
|     def _extract_episode_info(self, url, episode=None): | ||||
|         fetch_episode = episode is None | ||||
|         lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups() | ||||
|         if episode_id.isdigit(): | ||||
|             episode_id = episode_id[:4] + '-' + episode_id[4:] | ||||
|  | ||||
|         is_video = m_type == 'video' | ||||
|         if fetch_episode: | ||||
|             episode = self._call_api( | ||||
|                 episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] | ||||
|         title = episode.get('sub_title_clean') or episode['sub_title'] | ||||
|  | ||||
|         def get_clean_field(key): | ||||
|             return episode.get(key + '_clean') or episode.get(key) | ||||
|  | ||||
|         series = get_clean_field('title') | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: | ||||
|             img_path = episode.get('image' + s) | ||||
|             if not img_path: | ||||
|                 continue | ||||
|             thumbnails.append({ | ||||
|                 'id': '%dp' % h, | ||||
|                 'height': h, | ||||
|                 'width': w, | ||||
|                 'url': 'https://www3.nhk.or.jp' + img_path, | ||||
|             }) | ||||
|  | ||||
|         info = { | ||||
|             'id': episode_id + '-' + lang, | ||||
|             'title': '%s - %s' % (series, title) if series and title else title, | ||||
|             'description': get_clean_field('description'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'series': series, | ||||
|             'episode': title, | ||||
|         } | ||||
|         if is_video: | ||||
|             vod_id = episode['vod_id'] | ||||
|             info.update({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'ie_key': 'Piksel', | ||||
|                 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id, | ||||
|                 'id': vod_id, | ||||
|             }) | ||||
|         else: | ||||
|             if fetch_episode: | ||||
|                 audio_path = episode['audio']['audio'] | ||||
|                 info['formats'] = self._extract_m3u8_formats( | ||||
|                     'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, | ||||
|                     episode_id, 'm4a', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False) | ||||
|                 for f in info['formats']: | ||||
|                     f['language'] = lang | ||||
|             else: | ||||
|                 info.update({ | ||||
|                     '_type': 'url_transparent', | ||||
|                     'ie_key': NhkVodIE.ie_key(), | ||||
|                     'url': url, | ||||
|                 }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class NhkVodIE(NhkBaseIE): | ||||
|     _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) | ||||
|     # Content available only for a limited period of time. Visit | ||||
|     # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. | ||||
|     _TESTS = [{ | ||||
|         # video clip | ||||
|         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', | ||||
|         'md5': '256a1be14f48d960a7e61e2532d95ec3', | ||||
|         'md5': '7a90abcfe610ec22a6bfe15bd46b30ca', | ||||
|         'info_dict': { | ||||
|             'id': 'a95j5iza', | ||||
|             'ext': 'mp4', | ||||
| @@ -47,60 +125,54 @@ class NhkVodIE(InfoExtractor): | ||||
|         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() | ||||
|         if episode_id.isdigit(): | ||||
|             episode_id = episode_id[:4] + '-' + episode_id[4:] | ||||
|         return self._extract_episode_info(url) | ||||
|  | ||||
|         is_video = m_type == 'video' | ||||
|         episode = self._download_json( | ||||
|             self._API_URL_TEMPLATE % ( | ||||
|                 'v' if is_video else 'r', | ||||
|                 'clip' if episode_id[:4] == '9999' else 'esd', | ||||
|                 episode_id, lang, '/all' if is_video else ''), | ||||
|             episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0] | ||||
|         title = episode.get('sub_title_clean') or episode['sub_title'] | ||||
|  | ||||
|         def get_clean_field(key): | ||||
|             return episode.get(key + '_clean') or episode.get(key) | ||||
| class NhkVodProgramIE(NhkBaseIE): | ||||
|     _VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) | ||||
|     _TESTS = [{ | ||||
|         # video program episodes | ||||
|         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', | ||||
|         'info_dict': { | ||||
|             'id': 'japanrailway', | ||||
|             'title': 'Japan Railway Journal', | ||||
|         }, | ||||
|         'playlist_mincount': 1, | ||||
|     }, { | ||||
|         # video program clips | ||||
|         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip', | ||||
|         'info_dict': { | ||||
|             'id': 'japanrailway', | ||||
|             'title': 'Japan Railway Journal', | ||||
|         }, | ||||
|         'playlist_mincount': 5, | ||||
|     }, { | ||||
|         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # audio program | ||||
|         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|         series = get_clean_field('title') | ||||
|     def _real_extract(self, url): | ||||
|         lang, m_type, program_id, episode_type = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for s, w, h in [('', 640, 360), ('_l', 1280, 720)]: | ||||
|             img_path = episode.get('image' + s) | ||||
|             if not img_path: | ||||
|         episodes = self._call_api( | ||||
|             program_id, lang, m_type == 'video', False, episode_type == 'clip') | ||||
|  | ||||
|         entries = [] | ||||
|         for episode in episodes: | ||||
|             episode_path = episode.get('url') | ||||
|             if not episode_path: | ||||
|                 continue | ||||
|             thumbnails.append({ | ||||
|                 'id': '%dp' % h, | ||||
|                 'height': h, | ||||
|                 'width': w, | ||||
|                 'url': 'https://www3.nhk.or.jp' + img_path, | ||||
|             }) | ||||
|             entries.append(self._extract_episode_info( | ||||
|                 urljoin(url, episode_path), episode)) | ||||
|  | ||||
|         info = { | ||||
|             'id': episode_id + '-' + lang, | ||||
|             'title': '%s - %s' % (series, title) if series and title else title, | ||||
|             'description': get_clean_field('description'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'series': series, | ||||
|             'episode': title, | ||||
|         } | ||||
|         if is_video: | ||||
|             info.update({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'ie_key': 'Piksel', | ||||
|                 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'], | ||||
|             }) | ||||
|         else: | ||||
|             audio = episode['audio'] | ||||
|             audio_path = audio['audio'] | ||||
|             info['formats'] = self._extract_m3u8_formats( | ||||
|                 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, | ||||
|                 episode_id, 'm4a', entry_protocol='m3u8_native', | ||||
|                 m3u8_id='hls', fatal=False) | ||||
|             for f in info['formats']: | ||||
|                 f['language'] = lang | ||||
|         return info | ||||
|         program_title = None | ||||
|         if entries: | ||||
|             program_title = entries[0].get('series') | ||||
|  | ||||
|         return self.playlist_result(entries, program_id, program_title) | ||||
|   | ||||
| @@ -1,20 +1,23 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import datetime | ||||
| import functools | ||||
| import json | ||||
| import math | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     dict_get, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     remove_start, | ||||
| @@ -181,7 +184,7 @@ class NiconicoIE(InfoExtractor): | ||||
|         if urlh is False: | ||||
|             login_ok = False | ||||
|         else: | ||||
|             parts = compat_urlparse.urlparse(urlh.geturl()) | ||||
|             parts = compat_urllib_parse_urlparse(urlh.geturl()) | ||||
|             if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login': | ||||
|                 login_ok = False | ||||
|         if not login_ok: | ||||
| @@ -292,7 +295,7 @@ class NiconicoIE(InfoExtractor): | ||||
|                 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', | ||||
|                 video_id, 'Downloading flv info') | ||||
|  | ||||
|             flv_info = compat_urlparse.parse_qs(flv_info_webpage) | ||||
|             flv_info = compat_parse_qs(flv_info_webpage) | ||||
|             if 'url' not in flv_info: | ||||
|                 if 'deleted' in flv_info: | ||||
|                     raise ExtractorError('The video has been deleted.', | ||||
| @@ -437,34 +440,76 @@ class NiconicoIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class NiconicoPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nicovideo.jp/mylist/27411728', | ||||
|         'info_dict': { | ||||
|             'id': '27411728', | ||||
|             'title': 'AKB48のオールナイトニッポン', | ||||
|             'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08', | ||||
|             'uploader': 'のっく', | ||||
|             'uploader_id': '805442', | ||||
|         }, | ||||
|         'playlist_mincount': 225, | ||||
|     }, { | ||||
|         'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _PAGE_SIZE = 100 | ||||
|  | ||||
|     def _call_api(self, list_id, resource, query): | ||||
|         return self._download_json( | ||||
|             'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id, | ||||
|             'Downloading %s JSON metatdata' % resource, query=query, | ||||
|             headers={'X-Frontend-Id': 6})['data']['mylist'] | ||||
|  | ||||
|     def _parse_owner(self, item): | ||||
|         owner = item.get('owner') or {} | ||||
|         if owner: | ||||
|             return { | ||||
|                 'uploader': owner.get('name'), | ||||
|                 'uploader_id': owner.get('id'), | ||||
|             } | ||||
|         return {} | ||||
|  | ||||
|     def _fetch_page(self, list_id, page): | ||||
|         page += 1 | ||||
|         items = self._call_api(list_id, 'page %d' % page, { | ||||
|             'page': page, | ||||
|             'pageSize': self._PAGE_SIZE, | ||||
|         })['items'] | ||||
|         for item in items: | ||||
|             video = item.get('video') or {} | ||||
|             video_id = video.get('id') | ||||
|             if not video_id: | ||||
|                 continue | ||||
|             count = video.get('count') or {} | ||||
|             get_count = lambda x: int_or_none(count.get(x)) | ||||
|             info = { | ||||
|                 '_type': 'url', | ||||
|                 'id': video_id, | ||||
|                 'title': video.get('title'), | ||||
|                 'url': 'https://www.nicovideo.jp/watch/' + video_id, | ||||
|                 'description': video.get('shortDescription'), | ||||
|                 'duration': int_or_none(video.get('duration')), | ||||
|                 'view_count': get_count('view'), | ||||
|                 'comment_count': get_count('comment'), | ||||
|                 'ie_key': NiconicoIE.ie_key(), | ||||
|             } | ||||
|             info.update(self._parse_owner(video)) | ||||
|             yield info | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         list_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, list_id) | ||||
|  | ||||
|         entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', | ||||
|                                           webpage, 'entries') | ||||
|         entries = json.loads(entries_json) | ||||
|         entries = [{ | ||||
|             '_type': 'url', | ||||
|             'ie_key': NiconicoIE.ie_key(), | ||||
|             'url': ('http://www.nicovideo.jp/watch/%s' % | ||||
|                     entry['item_data']['video_id']), | ||||
|         } for entry in entries] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), | ||||
|             'id': list_id, | ||||
|             'entries': entries, | ||||
|         } | ||||
|         mylist = self._call_api(list_id, 'list', { | ||||
|             'pageSize': 1, | ||||
|         }) | ||||
|         entries = InAdvancePagedList( | ||||
|             functools.partial(self._fetch_page, list_id), | ||||
|             math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE), | ||||
|             self._PAGE_SIZE) | ||||
|         result = self.playlist_result( | ||||
|             entries, list_id, mylist.get('name'), mylist.get('description')) | ||||
|         result.update(self._parse_owner(mylist)) | ||||
|         return result | ||||
|   | ||||
| @@ -5,10 +5,11 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     float_or_none, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -35,7 +36,7 @@ class NineCNineMediaIE(InfoExtractor): | ||||
|                 '$include': '[HasClosedCaptions]', | ||||
|             }) | ||||
|  | ||||
|         if content_package.get('Constraints', {}).get('Security', {}).get('Type'): | ||||
|         if try_get(content_package, lambda x: x['Constraints']['Security']['Type']): | ||||
|             raise ExtractorError('This video is DRM protected.', expected=True) | ||||
|  | ||||
|         manifest_base_url = content_package_url + 'manifest.' | ||||
| @@ -52,7 +53,7 @@ class NineCNineMediaIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for image in content.get('Images', []): | ||||
|         for image in (content.get('Images') or []): | ||||
|             image_url = image.get('Url') | ||||
|             if not image_url: | ||||
|                 continue | ||||
| @@ -70,7 +71,7 @@ class NineCNineMediaIE(InfoExtractor): | ||||
|                     continue | ||||
|                 container.append(e_name) | ||||
|  | ||||
|         season = content.get('Season', {}) | ||||
|         season = content.get('Season') or {} | ||||
|  | ||||
|         info = { | ||||
|             'id': content_id, | ||||
| @@ -79,13 +80,14 @@ class NineCNineMediaIE(InfoExtractor): | ||||
|             'timestamp': parse_iso8601(content.get('BroadcastDateTime')), | ||||
|             'episode_number': int_or_none(content.get('Episode')), | ||||
|             'season': season.get('Name'), | ||||
|             'season_number': season.get('Number'), | ||||
|             'season_number': int_or_none(season.get('Number')), | ||||
|             'season_id': season.get('Id'), | ||||
|             'series': content.get('Media', {}).get('Name'), | ||||
|             'series': try_get(content, lambda x: x['Media']['Name']), | ||||
|             'tags': tags, | ||||
|             'categories': categories, | ||||
|             'duration': float_or_none(content_package.get('Duration')), | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
|  | ||||
|         if content_package.get('HasClosedCaptions'): | ||||
|   | ||||
| @@ -1,235 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     parse_iso8601, | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NocoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' | ||||
|     _LOGIN_URL = 'https://noco.tv/do.php' | ||||
|     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' | ||||
|     _SUB_LANG_TEMPLATE = '&sub_lang=%s' | ||||
|     _NETRC_MACHINE = 'noco' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', | ||||
|             'md5': '0a993f0058ddbcd902630b2047ef710e', | ||||
|             'info_dict': { | ||||
|                 'id': '11538', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Ami Ami Idol - Hello! France', | ||||
|                 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86', | ||||
|                 'upload_date': '20140412', | ||||
|                 'uploader': 'Nolife', | ||||
|                 'uploader_id': 'NOL', | ||||
|                 'duration': 2851.2, | ||||
|             }, | ||||
|             'skip': 'Requires noco account', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call', | ||||
|             'md5': 'c190f1f48e313c55838f1f412225934d', | ||||
|             'info_dict': { | ||||
|                 'id': '12610', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'The Guild #1 - Wake-Up Call', | ||||
|                 'timestamp': 1403863200, | ||||
|                 'upload_date': '20140627', | ||||
|                 'uploader': 'LBL42', | ||||
|                 'uploader_id': 'LBL', | ||||
|                 'duration': 233.023, | ||||
|             }, | ||||
|             'skip': 'Requires noco account', | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login = self._download_json( | ||||
|             self._LOGIN_URL, None, 'Logging in', | ||||
|             data=urlencode_postdata({ | ||||
|                 'a': 'login', | ||||
|                 'cookie': '1', | ||||
|                 'username': username, | ||||
|                 'password': password, | ||||
|             }), | ||||
|             headers={ | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | ||||
|             }) | ||||
|  | ||||
|         if 'erreur' in login: | ||||
|             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _ts(): | ||||
|         return int(time.time() * 1000) | ||||
|  | ||||
|     def _call_api(self, path, video_id, note, sub_lang=None): | ||||
|         ts = compat_str(self._ts() + self._ts_offset) | ||||
|         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() | ||||
|         url = self._API_URL_TEMPLATE % (path, ts, tk) | ||||
|         if sub_lang: | ||||
|             url += self._SUB_LANG_TEMPLATE % sub_lang | ||||
|  | ||||
|         request = sanitized_Request(url) | ||||
|         request.add_header('Referer', self._referer) | ||||
|  | ||||
|         resp = self._download_json(request, video_id, note) | ||||
|  | ||||
|         if isinstance(resp, dict) and resp.get('error'): | ||||
|             self._raise_error(resp['error'], resp['description']) | ||||
|  | ||||
|         return resp | ||||
|  | ||||
|     def _raise_error(self, error, description): | ||||
|         raise ExtractorError( | ||||
|             '%s returned error: %s - %s' % (self.IE_NAME, error, description), | ||||
|             expected=True) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         # Timestamp adjustment offset between server time and local time | ||||
|         # must be calculated in order to use timestamps closest to server's | ||||
|         # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         player_url = self._search_regex( | ||||
|             r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1', | ||||
|             webpage, 'noco player', group='player', | ||||
|             default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf') | ||||
|  | ||||
|         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query) | ||||
|         ts = int_or_none(qs.get('ts', [None])[0]) | ||||
|         self._ts_offset = ts - self._ts() if ts else 0 | ||||
|         self._referer = player_url | ||||
|  | ||||
|         medias = self._call_api( | ||||
|             'shows/%s/medias' % video_id, | ||||
|             video_id, 'Downloading video JSON') | ||||
|  | ||||
|         show = self._call_api( | ||||
|             'shows/by_id/%s' % video_id, | ||||
|             video_id, 'Downloading show JSON')[0] | ||||
|  | ||||
|         options = self._call_api( | ||||
|             'users/init', video_id, | ||||
|             'Downloading user options JSON')['options'] | ||||
|         audio_lang_pref = options.get('audio_language') or options.get('language', 'fr') | ||||
|  | ||||
|         if audio_lang_pref == 'original': | ||||
|             audio_lang_pref = show['original_lang'] | ||||
|         if len(medias) == 1: | ||||
|             audio_lang_pref = list(medias.keys())[0] | ||||
|         elif audio_lang_pref not in medias: | ||||
|             audio_lang_pref = 'fr' | ||||
|  | ||||
|         qualities = self._call_api( | ||||
|             'qualities', | ||||
|             video_id, 'Downloading qualities JSON') | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for audio_lang, audio_lang_dict in medias.items(): | ||||
|             preference = 1 if audio_lang == audio_lang_pref else 0 | ||||
|             for sub_lang, lang_dict in audio_lang_dict['video_list'].items(): | ||||
|                 for format_id, fmt in lang_dict['quality_list'].items(): | ||||
|                     format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id) | ||||
|  | ||||
|                     video = self._call_api( | ||||
|                         'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang), | ||||
|                         video_id, 'Downloading %s video JSON' % format_id_extended, | ||||
|                         sub_lang if sub_lang != 'none' else None) | ||||
|  | ||||
|                     file_url = video['file'] | ||||
|                     if not file_url: | ||||
|                         continue | ||||
|  | ||||
|                     if file_url in ['forbidden', 'not found']: | ||||
|                         popmessage = video['popmessage'] | ||||
|                         self._raise_error(popmessage['title'], popmessage['message']) | ||||
|  | ||||
|                     formats.append({ | ||||
|                         'url': file_url, | ||||
|                         'format_id': format_id_extended, | ||||
|                         'width': int_or_none(fmt.get('res_width')), | ||||
|                         'height': int_or_none(fmt.get('res_lines')), | ||||
|                         'abr': int_or_none(fmt.get('audiobitrate'), 1000), | ||||
|                         'vbr': int_or_none(fmt.get('videobitrate'), 1000), | ||||
|                         'filesize': int_or_none(fmt.get('filesize')), | ||||
|                         'format_note': qualities[format_id].get('quality_name'), | ||||
|                         'quality': qualities[format_id].get('priority'), | ||||
|                         'preference': preference, | ||||
|                     }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ') | ||||
|  | ||||
|         if timestamp is not None and timestamp < 0: | ||||
|             timestamp = None | ||||
|  | ||||
|         uploader = show.get('partner_name') | ||||
|         uploader_id = show.get('partner_key') | ||||
|         duration = float_or_none(show.get('duration_ms'), 1000) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for thumbnail_key, thumbnail_url in show.items(): | ||||
|             m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key) | ||||
|             if not m: | ||||
|                 continue | ||||
|             thumbnails.append({ | ||||
|                 'url': thumbnail_url, | ||||
|                 'width': int(m.group('width')), | ||||
|                 'height': int(m.group('height')), | ||||
|             }) | ||||
|  | ||||
|         episode = show.get('show_TT') or show.get('show_OT') | ||||
|         family = show.get('family_TT') or show.get('family_OT') | ||||
|         episode_number = show.get('episode_number') | ||||
|  | ||||
|         title = '' | ||||
|         if family: | ||||
|             title += family | ||||
|         if episode_number: | ||||
|             title += ' #' + compat_str(episode_number) | ||||
|         if episode: | ||||
|             title += ' - ' + compat_str(episode) | ||||
|  | ||||
|         description = show.get('show_resume') or show.get('family_resume') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnails': thumbnails, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -2,21 +2,17 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| import random | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_unquote, | ||||
| ) | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     NO_DEFAULT, | ||||
|     parse_age_limit, | ||||
|     parse_duration, | ||||
|     str_or_none, | ||||
|     try_get, | ||||
|     urljoin, | ||||
|     url_or_none, | ||||
| @@ -25,11 +21,24 @@ from ..utils import ( | ||||
|  | ||||
| class NRKBaseIE(InfoExtractor): | ||||
|     _GEO_COUNTRIES = ['NO'] | ||||
|     _CDN_REPL_REGEX = r'''(?x):// | ||||
|         (?: | ||||
|             nrkod\d{1,2}-httpcache0-47115-cacheod0\.dna\.ip-only\.net/47115-cacheod0| | ||||
|             nrk-od-no\.telenorcdn\.net| | ||||
|             minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no | ||||
|         )/''' | ||||
|  | ||||
|     def _extract_nrk_formats(self, asset_url, video_id): | ||||
|         return self._extract_m3u8_formats( | ||||
|             re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url), | ||||
|         if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url): | ||||
|             return self._extract_akamai_formats(asset_url, video_id) | ||||
|         asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url) | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             asset_url, video_id, 'mp4', 'm3u8_native', fatal=False) | ||||
|         if not formats and re.search(self._CDN_REPL_REGEX, asset_url): | ||||
|             formats = self._extract_m3u8_formats( | ||||
|                 re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url), | ||||
|                 video_id, 'mp4', 'm3u8_native', fatal=False) | ||||
|         return formats | ||||
|  | ||||
|     def _raise_error(self, data): | ||||
|         MESSAGES = { | ||||
| @@ -47,6 +56,13 @@ class NRKBaseIE(InfoExtractor): | ||||
|         message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type) | ||||
|         raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|  | ||||
|     def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None): | ||||
|         return self._download_json( | ||||
|             urljoin('http://psapi.nrk.no/', path), | ||||
|             video_id, note or 'Downloading %s JSON' % item, | ||||
|             fatal=fatal, query=query, | ||||
|             headers={'Accept-Encoding': 'gzip, deflate, br'}) | ||||
|  | ||||
|  | ||||
| class NRKIE(NRKBaseIE): | ||||
|     _VALID_URL = r'''(?x) | ||||
| @@ -64,7 +80,7 @@ class NRKIE(NRKBaseIE): | ||||
|     _TESTS = [{ | ||||
|         # video | ||||
|         'url': 'http://www.nrk.no/video/PS*150533', | ||||
|         'md5': '706f34cdf1322577589e369e522b50ef', | ||||
|         'md5': 'f46be075326e23ad0e524edfcb06aeb6', | ||||
|         'info_dict': { | ||||
|             'id': '150533', | ||||
|             'ext': 'mp4', | ||||
| @@ -78,7 +94,7 @@ class NRKIE(NRKBaseIE): | ||||
|         # MD5 is unstable | ||||
|         'info_dict': { | ||||
|             'id': '154915', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Slik høres internett ut når du er blind', | ||||
|             'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', | ||||
|             'duration': 20, | ||||
| @@ -98,12 +114,47 @@ class NRKIE(NRKBaseIE): | ||||
|     }, { | ||||
|         'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # podcast | ||||
|         'url': 'nrk:l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'nrk:podcast/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # clip | ||||
|         'url': 'nrk:150533', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'nrk:clip/150533', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # program | ||||
|         'url': 'nrk:MDDP12000117', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'nrk:program/ENRK10100318', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # direkte | ||||
|         'url': 'nrk:nrk1', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'nrk:channel/nrk1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _extract_from_playback(self, video_id): | ||||
|         manifest = self._download_json( | ||||
|             'http://psapi.nrk.no/playback/manifest/%s' % video_id, | ||||
|             video_id, 'Downloading manifest JSON') | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url).split('/')[-1] | ||||
|  | ||||
|         path_templ = 'playback/%s/' + video_id | ||||
|  | ||||
|         def call_playback_api(item, query=None): | ||||
|             return self._call_api(path_templ % item, video_id, item, query=query) | ||||
|         # known values for preferredCdn: akamai, iponly, minicdn and telenor | ||||
|         manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'}) | ||||
|  | ||||
|         video_id = try_get(manifest, lambda x: x['id'], compat_str) or video_id | ||||
|  | ||||
|         if manifest.get('playability') == 'nonPlayable': | ||||
|             self._raise_error(manifest['nonPlayable']) | ||||
| @@ -119,13 +170,18 @@ class NRKIE(NRKBaseIE): | ||||
|             format_url = url_or_none(asset.get('url')) | ||||
|             if not format_url: | ||||
|                 continue | ||||
|             if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8': | ||||
|             asset_format = (asset.get('format') or '').lower() | ||||
|             if asset_format == 'hls' or determine_ext(format_url) == 'm3u8': | ||||
|                 formats.extend(self._extract_nrk_formats(format_url, video_id)) | ||||
|             elif asset_format == 'mp3': | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'format_id': asset_format, | ||||
|                     'vcodec': 'none', | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://psapi.nrk.no/playback/metadata/%s' % video_id, | ||||
|             video_id, 'Downloading metadata JSON') | ||||
|         data = call_playback_api('metadata') | ||||
|  | ||||
|         preplay = data['preplay'] | ||||
|         titles = preplay['titles'] | ||||
| @@ -149,60 +205,133 @@ class NRKIE(NRKBaseIE): | ||||
|                 'height': int_or_none(image.get('pixelHeight')), | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|         subtitles = {} | ||||
|         for sub in try_get(playable, lambda x: x['subtitles'], list) or []: | ||||
|             if not isinstance(sub, dict): | ||||
|                 continue | ||||
|             sub_url = url_or_none(sub.get('webVtt')) | ||||
|             if not sub_url: | ||||
|                 continue | ||||
|             sub_key = str_or_none(sub.get('language')) or 'nb' | ||||
|             sub_type = str_or_none(sub.get('type')) | ||||
|             if sub_type: | ||||
|                 sub_key += '-%s' % sub_type | ||||
|             subtitles.setdefault(sub_key, []).append({ | ||||
|                 'url': sub_url, | ||||
|             }) | ||||
|  | ||||
|         legal_age = try_get( | ||||
|             data, lambda x: x['legalAge']['body']['rating']['code'], compat_str) | ||||
|         # https://en.wikipedia.org/wiki/Norwegian_Media_Authority | ||||
|         age_limit = None | ||||
|         if legal_age: | ||||
|             if legal_age == 'A': | ||||
|                 age_limit = 0 | ||||
|             elif legal_age.isdigit(): | ||||
|                 age_limit = int_or_none(legal_age) | ||||
|  | ||||
|         is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series' | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'alt_title': alt_title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'thumbnails': thumbnails, | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self._extract_from_playback(video_id) | ||||
|         if is_series: | ||||
|             series = season_id = season_number = episode = episode_number = None | ||||
|             programs = self._call_api( | ||||
|                 'programs/%s' % video_id, video_id, 'programs', fatal=False) | ||||
|             if programs and isinstance(programs, dict): | ||||
|                 series = str_or_none(programs.get('seriesTitle')) | ||||
|                 season_id = str_or_none(programs.get('seasonId')) | ||||
|                 season_number = int_or_none(programs.get('seasonNumber')) | ||||
|                 episode = str_or_none(programs.get('episodeTitle')) | ||||
|                 episode_number = int_or_none(programs.get('episodeNumber')) | ||||
|             if not series: | ||||
|                 series = title | ||||
|             if alt_title: | ||||
|                 title += ' - %s' % alt_title | ||||
|             if not season_number: | ||||
|                 season_number = int_or_none(self._search_regex( | ||||
|                     r'Sesong\s+(\d+)', description or '', 'season number', | ||||
|                     default=None)) | ||||
|             if not episode: | ||||
|                 episode = alt_title if is_series else None | ||||
|             if not episode_number: | ||||
|                 episode_number = int_or_none(self._search_regex( | ||||
|                     r'^(\d+)\.', episode or '', 'episode number', | ||||
|                     default=None)) | ||||
|             if not episode_number: | ||||
|                 episode_number = int_or_none(self._search_regex( | ||||
|                     r'\((\d+)\s*:\s*\d+\)', description or '', | ||||
|                     'episode number', default=None)) | ||||
|             info.update({ | ||||
|                 'title': title, | ||||
|                 'series': series, | ||||
|                 'season_id': season_id, | ||||
|                 'season_number': season_number, | ||||
|                 'episode': episode, | ||||
|                 'episode_number': episode_number, | ||||
|             }) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class NRKTVIE(NRKBaseIE): | ||||
| class NRKTVIE(InfoExtractor): | ||||
|     IE_DESC = 'NRK TV and NRK Radio' | ||||
|     _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})' | ||||
|     _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE | ||||
|     _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://tv.nrk.no/program/MDDP12000117', | ||||
|         'md5': '8270824df46ec629b66aeaa5796b36fb', | ||||
|         'md5': 'c4a5960f1b00b40d47db65c1064e0ab1', | ||||
|         'info_dict': { | ||||
|             'id': 'MDDP12000117AA', | ||||
|             'id': 'MDDP12000117', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Alarm Trolltunga', | ||||
|             'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce', | ||||
|             'duration': 2223, | ||||
|             'duration': 2223.44, | ||||
|             'age_limit': 6, | ||||
|             'subtitles': { | ||||
|                 'nb-nor': [{ | ||||
|                     'ext': 'vtt', | ||||
|                 }], | ||||
|                 'nb-ttv': [{ | ||||
|                     'ext': 'vtt', | ||||
|                 }] | ||||
|             }, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', | ||||
|         'md5': '9a167e54d04671eb6317a37b7bc8a280', | ||||
|         'md5': '8d40dab61cea8ab0114e090b029a0565', | ||||
|         'info_dict': { | ||||
|             'id': 'MUHH48000314AA', | ||||
|             'id': 'MUHH48000314', | ||||
|             'ext': 'mp4', | ||||
|             'title': '20 spørsmål 23.05.2014', | ||||
|             'title': '20 spørsmål - 23. mai 2014', | ||||
|             'alt_title': '23. mai 2014', | ||||
|             'description': 'md5:bdea103bc35494c143c6a9acdd84887a', | ||||
|             'duration': 1741, | ||||
|             'series': '20 spørsmål', | ||||
|             'episode': '23.05.2014', | ||||
|             'episode': '23. mai 2014', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'skip': 'NoProgramRights', | ||||
|     }, { | ||||
|         'url': 'https://tv.nrk.no/program/mdfp15000514', | ||||
|         'info_dict': { | ||||
|             'id': 'MDFP15000514CA', | ||||
|             'id': 'MDFP15000514', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', | ||||
|             'title': 'Kunnskapskanalen - Grunnlovsjubiléet - Stor ståhei for ingenting', | ||||
|             'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', | ||||
|             'duration': 4605, | ||||
|             'duration': 4605.08, | ||||
|             'series': 'Kunnskapskanalen', | ||||
|             'episode': '24.05.2014', | ||||
|             'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -211,63 +340,41 @@ class NRKTVIE(NRKBaseIE): | ||||
|         # single playlist video | ||||
|         'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', | ||||
|         'info_dict': { | ||||
|             'id': 'MSPO40010515-part2', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', | ||||
|             'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', | ||||
|             'id': 'MSPO40010515', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', | ||||
|             'description': 'md5:c03aba1e917561eface5214020551b7a', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Video is geo restricted'], | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|         'skip': 'particular part is not supported currently', | ||||
|     }, { | ||||
|         'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': 'MSPO40010515AH', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)', | ||||
|                 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d', | ||||
|                 'duration': 772, | ||||
|                 'series': 'Tour de Ski', | ||||
|                 'episode': '06.01.2015', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'MSPO40010515BH', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)', | ||||
|                 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d', | ||||
|                 'duration': 6175, | ||||
|                 'series': 'Tour de Ski', | ||||
|                 'episode': '06.01.2015', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }], | ||||
|         'info_dict': { | ||||
|             'id': 'MSPO40010515', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', | ||||
|             'description': 'md5:1f97a41f05a9486ee00c56f35f82993d', | ||||
|             'description': 'md5:c03aba1e917561eface5214020551b7a', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'expected_warnings': ['Video is geo restricted'], | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|         'skip': 'Ikke tilgjengelig utenfor Norge', | ||||
|     }, { | ||||
|         'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13', | ||||
|         'info_dict': { | ||||
|             'id': 'KMTE50001317AA', | ||||
|             'id': 'KMTE50001317', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Anno 13:30', | ||||
|             'title': 'Anno - 13. episode', | ||||
|             'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa', | ||||
|             'duration': 2340, | ||||
|             'series': 'Anno', | ||||
|             'episode': '13:30', | ||||
|             'episode': '13. episode', | ||||
|             'season_number': 3, | ||||
|             'episode_number': 13, | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -275,17 +382,19 @@ class NRKTVIE(NRKBaseIE): | ||||
|     }, { | ||||
|         'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017', | ||||
|         'info_dict': { | ||||
|             'id': 'MUHH46000317AA', | ||||
|             'id': 'MUHH46000317', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Nytt på Nytt 27.01.2017', | ||||
|             'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b', | ||||
|             'duration': 1796, | ||||
|             'series': 'Nytt på nytt', | ||||
|             'episode': '27.01.2017', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'ProgramRightsHasExpired', | ||||
|     }, { | ||||
|         'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', | ||||
|         'only_matching': True, | ||||
| @@ -297,177 +406,26 @@ class NRKTVIE(NRKBaseIE): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _api_host = None | ||||
|  | ||||
|     def _extract_from_mediaelement(self, video_id): | ||||
|         api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS | ||||
|  | ||||
|         for api_host in api_hosts: | ||||
|             data = self._download_json( | ||||
|                 'http://%s/mediaelement/%s' % (api_host, video_id), | ||||
|                 video_id, 'Downloading mediaelement JSON', | ||||
|                 fatal=api_host == api_hosts[-1]) | ||||
|             if not data: | ||||
|                 continue | ||||
|             self._api_host = api_host | ||||
|             break | ||||
|  | ||||
|         title = data.get('fullTitle') or data.get('mainTitle') or data['title'] | ||||
|         video_id = data.get('id') or video_id | ||||
|  | ||||
|         urls = [] | ||||
|         entries = [] | ||||
|  | ||||
|         conviva = data.get('convivaStatistics') or {} | ||||
|         live = (data.get('mediaElementType') == 'Live' | ||||
|                 or data.get('isLive') is True or conviva.get('isLive')) | ||||
|  | ||||
|         def make_title(t): | ||||
|             return self._live_title(t) if live else t | ||||
|  | ||||
|         media_assets = data.get('mediaAssets') | ||||
|         if media_assets and isinstance(media_assets, list): | ||||
|             def video_id_and_title(idx): | ||||
|                 return ((video_id, title) if len(media_assets) == 1 | ||||
|                         else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) | ||||
|             for num, asset in enumerate(media_assets, 1): | ||||
|                 asset_url = asset.get('url') | ||||
|                 if not asset_url or asset_url in urls: | ||||
|                     continue | ||||
|                 formats = self._extract_nrk_formats(asset_url, video_id) | ||||
|                 if not formats: | ||||
|                     continue | ||||
|                 self._sort_formats(formats) | ||||
|  | ||||
|                 entry_id, entry_title = video_id_and_title(num) | ||||
|                 duration = parse_duration(asset.get('duration')) | ||||
|                 subtitles = {} | ||||
|                 for subtitle in ('webVtt', 'timedText'): | ||||
|                     subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) | ||||
|                     if subtitle_url: | ||||
|                         subtitles.setdefault('no', []).append({ | ||||
|                             'url': compat_urllib_parse_unquote(subtitle_url) | ||||
|                         }) | ||||
|                 entries.append({ | ||||
|                     'id': asset.get('carrierId') or entry_id, | ||||
|                     'title': make_title(entry_title), | ||||
|                     'duration': duration, | ||||
|                     'subtitles': subtitles, | ||||
|                     'formats': formats, | ||||
|                 }) | ||||
|  | ||||
|         if not entries: | ||||
|             media_url = data.get('mediaUrl') | ||||
|             if media_url and media_url not in urls: | ||||
|                 formats = self._extract_nrk_formats(media_url, video_id) | ||||
|                 if formats: | ||||
|                     self._sort_formats(formats) | ||||
|                     duration = parse_duration(data.get('duration')) | ||||
|                     entries = [{ | ||||
|                         'id': video_id, | ||||
|                         'title': make_title(title), | ||||
|                         'duration': duration, | ||||
|                         'formats': formats, | ||||
|                     }] | ||||
|  | ||||
|         if not entries: | ||||
|             self._raise_error(data) | ||||
|  | ||||
|         series = conviva.get('seriesName') or data.get('seriesTitle') | ||||
|         episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') | ||||
|  | ||||
|         season_number = None | ||||
|         episode_number = None | ||||
|         if data.get('mediaElementType') == 'Episode': | ||||
|             _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ | ||||
|                 data.get('relativeOriginUrl', '') | ||||
|             EPISODENUM_RE = [ | ||||
|                 r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.', | ||||
|                 r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})', | ||||
|             ] | ||||
|             season_number = int_or_none(self._search_regex( | ||||
|                 EPISODENUM_RE, _season_episode, 'season number', | ||||
|                 default=None, group='season')) | ||||
|             episode_number = int_or_none(self._search_regex( | ||||
|                 EPISODENUM_RE, _season_episode, 'episode number', | ||||
|                 default=None, group='episode')) | ||||
|  | ||||
|         thumbnails = None | ||||
|         images = data.get('images') | ||||
|         if images and isinstance(images, dict): | ||||
|             web_images = images.get('webImages') | ||||
|             if isinstance(web_images, list): | ||||
|                 thumbnails = [{ | ||||
|                     'url': image['imageUrl'], | ||||
|                     'width': int_or_none(image.get('width')), | ||||
|                     'height': int_or_none(image.get('height')), | ||||
|                 } for image in web_images if image.get('imageUrl')] | ||||
|  | ||||
|         description = data.get('description') | ||||
|         category = data.get('mediaAnalytics', {}).get('category') | ||||
|  | ||||
|         common_info = { | ||||
|             'description': description, | ||||
|             'series': series, | ||||
|             'episode': episode, | ||||
|             'season_number': season_number, | ||||
|             'episode_number': episode_number, | ||||
|             'categories': [category] if category else None, | ||||
|             'age_limit': parse_age_limit(data.get('legalAge')), | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
|  | ||||
|         vcodec = 'none' if data.get('mediaType') == 'Audio' else None | ||||
|  | ||||
|         for entry in entries: | ||||
|             entry.update(common_info) | ||||
|             for f in entry['formats']: | ||||
|                 f['vcodec'] = vcodec | ||||
|  | ||||
|         points = data.get('shortIndexPoints') | ||||
|         if isinstance(points, list): | ||||
|             chapters = [] | ||||
|             for next_num, point in enumerate(points, start=1): | ||||
|                 if not isinstance(point, dict): | ||||
|                     continue | ||||
|                 start_time = parse_duration(point.get('startPoint')) | ||||
|                 if start_time is None: | ||||
|                     continue | ||||
|                 end_time = parse_duration( | ||||
|                     data.get('duration') | ||||
|                     if next_num == len(points) | ||||
|                     else points[next_num].get('startPoint')) | ||||
|                 if end_time is None: | ||||
|                     continue | ||||
|                 chapters.append({ | ||||
|                     'start_time': start_time, | ||||
|                     'end_time': end_time, | ||||
|                     'title': point.get('title'), | ||||
|                 }) | ||||
|             if chapters and len(entries) == 1: | ||||
|                 entries[0]['chapters'] = chapters | ||||
|  | ||||
|         return self.playlist_result(entries, video_id, title, description) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self._extract_from_mediaelement(video_id) | ||||
|         return self.url_result( | ||||
|             'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id) | ||||
|  | ||||
|  | ||||
| class NRKTVEpisodeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)' | ||||
|     _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2', | ||||
|         'info_dict': { | ||||
|             'id': 'MUHH36005220BA', | ||||
|             'id': 'MUHH36005220', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Kro, krig og kjærlighet 2:6', | ||||
|             'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350', | ||||
|             'duration': 1563, | ||||
|             'title': 'Hellums kro - 2. Kro, krig og kjærlighet', | ||||
|             'description': 'md5:ad92ddffc04cea8ce14b415deef81787', | ||||
|             'duration': 1563.92, | ||||
|             'series': 'Hellums kro', | ||||
|             'season_number': 1, | ||||
|             'episode_number': 2, | ||||
|             'episode': '2:6', | ||||
|             'episode': '2. Kro, krig og kjærlighet', | ||||
|             'age_limit': 6, | ||||
|         }, | ||||
|         'params': { | ||||
| @@ -476,15 +434,16 @@ class NRKTVEpisodeIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8', | ||||
|         'info_dict': { | ||||
|             'id': 'MSUI14000816AA', | ||||
|             'id': 'MSUI14000816', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Backstage 8:30', | ||||
|             'title': 'Backstage - 8. episode', | ||||
|             'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4', | ||||
|             'duration': 1320, | ||||
|             'series': 'Backstage', | ||||
|             'season_number': 1, | ||||
|             'episode_number': 8, | ||||
|             'episode': '8:30', | ||||
|             'episode': '8. episode', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -493,7 +452,7 @@ class NRKTVEpisodeIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         display_id, season_number, episode_number = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
| @@ -505,57 +464,17 @@ class NRKTVEpisodeIE(InfoExtractor): | ||||
|         assert re.match(NRKTVIE._EPISODE_RE, nrk_id) | ||||
|  | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             '_type': 'url', | ||||
|             'id': nrk_id, | ||||
|             'url': 'nrk:%s' % nrk_id, | ||||
|             'ie_key': NRKIE.ie_key(), | ||||
|             'season_number': int(season_number), | ||||
|             'episode_number': int(episode_number), | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class NRKTVSerieBaseIE(InfoExtractor): | ||||
|     def _extract_series(self, webpage, display_id, fatal=True): | ||||
|         config = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', | ||||
|                  r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', | ||||
|                  r'PRELOADED_STATE_*\s*=\s*({.+?})\s*\n'), | ||||
|                 webpage, 'config', default='{}' if not fatal else NO_DEFAULT), | ||||
|             display_id, fatal=False, transform_source=js_to_json) | ||||
|         if not config: | ||||
|             return | ||||
|         return try_get( | ||||
|             config, | ||||
|             (lambda x: x['initialState']['series'], lambda x: x['series']), | ||||
|             dict) | ||||
|  | ||||
|     def _extract_seasons(self, domain, series_id, seasons): | ||||
|         if isinstance(seasons, dict): | ||||
|             seasons = seasons.get('seasons') | ||||
|         if not isinstance(seasons, list): | ||||
|             return [] | ||||
|         entries = [] | ||||
|         for season in seasons: | ||||
|             if not isinstance(season, dict): | ||||
|                 continue | ||||
|             episodes = self._extract_episodes(season) | ||||
|             if episodes: | ||||
|                 entries.extend(episodes) | ||||
|                 continue | ||||
|             season_name = season.get('name') | ||||
|             if season_name and isinstance(season_name, compat_str): | ||||
|                 entries.append(self.url_result( | ||||
|                     'https://%s.nrk.no/serie/%s/sesong/%s' | ||||
|                     % (domain, series_id, season_name), | ||||
|                     ie=NRKTVSeasonIE.ie_key(), | ||||
|                     video_title=season.get('title'))) | ||||
|         return entries | ||||
|  | ||||
|     def _extract_episodes(self, season): | ||||
|         if not isinstance(season, dict): | ||||
|             return [] | ||||
|         return self._extract_entries(season.get('episodes')) | ||||
|  | ||||
| class NRKTVSerieBaseIE(NRKBaseIE): | ||||
|     def _extract_entries(self, entry_list): | ||||
|         if not isinstance(entry_list, list): | ||||
|             return [] | ||||
| @@ -564,8 +483,6 @@ class NRKTVSerieBaseIE(InfoExtractor): | ||||
|             nrk_id = episode.get('prfId') or episode.get('episodeId') | ||||
|             if not nrk_id or not isinstance(nrk_id, compat_str): | ||||
|                 continue | ||||
|             if not re.match(NRKTVIE._EPISODE_RE, nrk_id): | ||||
|                 continue | ||||
|             entries.append(self.url_result( | ||||
|                 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)) | ||||
|         return entries | ||||
| @@ -577,9 +494,13 @@ class NRKTVSerieBaseIE(InfoExtractor): | ||||
|             if embedded.get(asset_key): | ||||
|                 return asset_key | ||||
|  | ||||
|     @staticmethod | ||||
|     def _catalog_name(serie_kind): | ||||
|         return 'podcast' if serie_kind in ('podcast', 'podkast') else 'series' | ||||
|  | ||||
|     def _entries(self, data, display_id): | ||||
|         for page_num in itertools.count(1): | ||||
|             embedded = data.get('_embedded') | ||||
|             embedded = data.get('_embedded') or data | ||||
|             if not isinstance(embedded, dict): | ||||
|                 break | ||||
|             assets_key = self._extract_assets_key(embedded) | ||||
| @@ -594,25 +515,32 @@ class NRKTVSerieBaseIE(InfoExtractor): | ||||
|             for e in self._extract_entries(entries): | ||||
|                 yield e | ||||
|             # Find next URL | ||||
|             next_url = urljoin( | ||||
|                 'https://psapi.nrk.no/', | ||||
|                 try_get( | ||||
|             next_url_path = try_get( | ||||
|                 data, | ||||
|                 (lambda x: x['_links']['next']['href'], | ||||
|                  lambda x: x['_embedded'][assets_key]['_links']['next']['href']), | ||||
|                     compat_str)) | ||||
|             if not next_url: | ||||
|                 compat_str) | ||||
|             if not next_url_path: | ||||
|                 break | ||||
|             data = self._download_json( | ||||
|                 next_url, display_id, | ||||
|                 'Downloading %s JSON page %d' % (assets_key, page_num), | ||||
|             data = self._call_api( | ||||
|                 next_url_path, display_id, | ||||
|                 note='Downloading %s JSON page %d' % (assets_key, page_num), | ||||
|                 fatal=False) | ||||
|             if not data: | ||||
|                 break | ||||
|  | ||||
|  | ||||
| class NRKTVSeasonIE(NRKTVSerieBaseIE): | ||||
|     _VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?P<domain>tv|radio)\.nrk\.no/ | ||||
|                         (?P<serie_kind>serie|pod[ck]ast)/ | ||||
|                         (?P<serie>[^/]+)/ | ||||
|                         (?: | ||||
|                             (?:sesong/)?(?P<id>\d+)| | ||||
|                             sesong/(?P<id_2>[^/?#&]+) | ||||
|                         ) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://tv.nrk.no/serie/backstage/sesong/1', | ||||
|         'info_dict': { | ||||
| @@ -648,23 +576,35 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE): | ||||
|         # 180 entries, single page | ||||
|         'url': 'https://tv.nrk.no/serie/spangas/sesong/1', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/diagnose-kverulant', | ||||
|         'info_dict': { | ||||
|             'id': 'hele_historien/diagnose-kverulant', | ||||
|             'title': 'Diagnose kverulant', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     }, { | ||||
|         'url': 'https://radio.nrk.no/podkast/loerdagsraadet/sesong/202101', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url) | ||||
|         return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url) or NRKRadioPodkastIE.suitable(url) | ||||
|                 else super(NRKTVSeasonIE, cls).suitable(url)) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         domain = mobj.group('domain') | ||||
|         serie_kind = mobj.group('serie_kind') | ||||
|         serie = mobj.group('serie') | ||||
|         season_id = mobj.group('id') | ||||
|         season_id = mobj.group('id') or mobj.group('id_2') | ||||
|         display_id = '%s/%s' % (serie, season_id) | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'https://psapi.nrk.no/%s/catalog/series/%s/seasons/%s' | ||||
|             % (domain, serie, season_id), display_id, query={'pageSize': 50}) | ||||
|         data = self._call_api( | ||||
|             '%s/catalog/%s/%s/seasons/%s' | ||||
|             % (domain, self._catalog_name(serie_kind), serie, season_id), | ||||
|             display_id, 'season', query={'pageSize': 50}) | ||||
|  | ||||
|         title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id | ||||
|         return self.playlist_result( | ||||
| @@ -673,8 +613,7 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE): | ||||
|  | ||||
|  | ||||
| class NRKTVSeriesIE(NRKTVSerieBaseIE): | ||||
|     _VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' | ||||
|     _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/(?P<serie_kind>serie|pod[ck]ast)/(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         # new layout, instalments | ||||
|         'url': 'https://tv.nrk.no/serie/groenn-glede', | ||||
| @@ -696,7 +635,6 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): | ||||
|             'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e', | ||||
|         }, | ||||
|         'playlist_mincount': 30, | ||||
|         'expected_warnings': ['HTTP Error 404: Not Found'], | ||||
|     }, { | ||||
|         # new layout, seasons | ||||
|         'url': 'https://tv.nrk.no/serie/backstage', | ||||
| @@ -706,14 +644,13 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): | ||||
|             'description': 'md5:63692ceb96813d9a207e9910483d948b', | ||||
|         }, | ||||
|         'playlist_mincount': 60, | ||||
|         'expected_warnings': ['HTTP Error 404: Not Found'], | ||||
|     }, { | ||||
|         # old layout | ||||
|         'url': 'https://tv.nrksuper.no/serie/labyrint', | ||||
|         'info_dict': { | ||||
|             'id': 'labyrint', | ||||
|             'title': 'Labyrint', | ||||
|             'description': 'md5:318b597330fdac5959247c9b69fdb1ec', | ||||
|             'description': 'I Daidalos sin undersjøiske Labyrint venter spennende oppgaver, skumle robotskapninger og slim.', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     }, { | ||||
| @@ -729,70 +666,71 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): | ||||
|         'url': 'https://radio.nrk.no/serie/dickie-dick-dickens', | ||||
|         'info_dict': { | ||||
|             'id': 'dickie-dick-dickens', | ||||
|             'title': 'Dickie Dick Dickens', | ||||
|             'description': 'md5:19e67411ffe57f7dce08a943d7a0b91f', | ||||
|         }, | ||||
|         'playlist_mincount': 8, | ||||
|         'expected_warnings': ['HTTP Error 404: Not Found'], | ||||
|     }, { | ||||
|         'url': 'https://nrksuper.no/serie/labyrint', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://radio.nrk.no/podkast/ulrikkes_univers', | ||||
|         'info_dict': { | ||||
|             'id': 'ulrikkes_univers', | ||||
|         }, | ||||
|         'playlist_mincount': 10, | ||||
|     }, { | ||||
|         'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/nrkno-poddkast-26588-134079-05042018030000', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return ( | ||||
|             False if any(ie.suitable(url) | ||||
|                          for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE)) | ||||
|                          for ie in (NRKTVIE, NRKTVEpisodeIE, NRKRadioPodkastIE, NRKTVSeasonIE)) | ||||
|             else super(NRKTVSeriesIE, cls).suitable(url)) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         domain = mobj.group('domain') | ||||
|         series_id = mobj.group('id') | ||||
|         site, serie_kind, series_id = re.match(self._VALID_URL, url).groups() | ||||
|         is_radio = site == 'radio.nrk' | ||||
|         domain = 'radio' if is_radio else 'tv' | ||||
|  | ||||
|         title = description = None | ||||
|         size_prefix = 'p' if is_radio else 'embeddedInstalmentsP' | ||||
|         series = self._call_api( | ||||
|             '%s/catalog/%s/%s' | ||||
|             % (domain, self._catalog_name(serie_kind), series_id), | ||||
|             series_id, 'serie', query={size_prefix + 'ageSize': 50}) | ||||
|         titles = try_get(series, [ | ||||
|             lambda x: x['titles'], | ||||
|             lambda x: x[x['type']]['titles'], | ||||
|             lambda x: x[x['seriesType']]['titles'], | ||||
|         ]) or {} | ||||
|  | ||||
|         webpage = self._download_webpage(url, series_id) | ||||
|  | ||||
|         series = self._extract_series(webpage, series_id, fatal=False) | ||||
|         if series: | ||||
|             title = try_get(series, lambda x: x['titles']['title'], compat_str) | ||||
|             description = try_get( | ||||
|                 series, lambda x: x['titles']['subtitle'], compat_str) | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'https://psapi.nrk.no/%s/catalog/series/%s/instalments' | ||||
|             % (domain, series_id), series_id, query={'pageSize': 50}, | ||||
|             fatal=False) | ||||
|         if data: | ||||
|             return self.playlist_result( | ||||
|                 self._entries(data, series_id), series_id, title, description) | ||||
|  | ||||
|         # New layout (e.g. https://tv.nrk.no/serie/backstage) | ||||
|         if series: | ||||
|         entries = [] | ||||
|             entries.extend(self._extract_seasons(domain, series_id, series.get('seasons'))) | ||||
|             entries.extend(self._extract_entries(series.get('instalments'))) | ||||
|             entries.extend(self._extract_episodes(series.get('extraMaterial'))) | ||||
|             return self.playlist_result(entries, series_id, title, description) | ||||
|         entries.extend(self._entries(series, series_id)) | ||||
|         embedded = series.get('_embedded') or {} | ||||
|         linked_seasons = try_get(series, lambda x: x['_links']['seasons']) or [] | ||||
|         embedded_seasons = embedded.get('seasons') or [] | ||||
|         if len(linked_seasons) > len(embedded_seasons): | ||||
|             for season in linked_seasons: | ||||
|                 season_url = urljoin(url, season.get('href')) | ||||
|                 if not season_url: | ||||
|                     season_name = season.get('name') | ||||
|                     if season_name and isinstance(season_name, compat_str): | ||||
|                         season_url = 'https://%s.nrk.no/serie/%s/sesong/%s' % (domain, series_id, season_name) | ||||
|                 if season_url: | ||||
|                     entries.append(self.url_result( | ||||
|                         season_url, ie=NRKTVSeasonIE.ie_key(), | ||||
|                         video_title=season.get('title'))) | ||||
|         else: | ||||
|             for season in embedded_seasons: | ||||
|                 entries.extend(self._entries(season, series_id)) | ||||
|         entries.extend(self._entries( | ||||
|             embedded.get('extraMaterial') or {}, series_id)) | ||||
|  | ||||
|         # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint) | ||||
|         entries = [ | ||||
|             self.url_result( | ||||
|                 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format( | ||||
|                     series=series_id, season=season_id)) | ||||
|             for season_id in re.findall(self._ITEM_RE, webpage) | ||||
|         ] | ||||
|  | ||||
|         title = self._html_search_meta( | ||||
|             'seriestitle', webpage, | ||||
|             'title', default=None) or self._og_search_title( | ||||
|             webpage, fatal=False) | ||||
|         if title: | ||||
|             title = self._search_regex( | ||||
|                 r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title) | ||||
|  | ||||
|         description = self._html_search_meta( | ||||
|             'series_description', webpage, | ||||
|             'description', default=None) or self._og_search_description(webpage) | ||||
|  | ||||
|         return self.playlist_result(entries, series_id, title, description) | ||||
|         return self.playlist_result( | ||||
|             entries, series_id, titles.get('title'), titles.get('subtitle')) | ||||
|  | ||||
|  | ||||
| class NRKTVDirekteIE(NRKTVIE): | ||||
| @@ -808,6 +746,38 @@ class NRKTVDirekteIE(NRKTVIE): | ||||
|     }] | ||||
|  | ||||
|  | ||||
| class NRKRadioPodkastIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://radio\.nrk\.no/pod[ck]ast/(?:[^/]+/)+(?P<id>l_[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8', | ||||
|         'md5': '8d40dab61cea8ab0114e090b029a0565', | ||||
|         'info_dict': { | ||||
|             'id': 'MUHH48000314AA', | ||||
|             'ext': 'mp4', | ||||
|             'title': '20 spørsmål 23.05.2014', | ||||
|             'description': 'md5:bdea103bc35494c143c6a9acdd84887a', | ||||
|             'duration': 1741, | ||||
|             'series': '20 spørsmål', | ||||
|             'episode': '23.05.2014', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://radio.nrk.no/podcast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/sesong/1/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/bortfoert-i-bergen/l_774d1a2c-7aa7-4965-8d1a-2c7aa7d9652c', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id) | ||||
|  | ||||
|  | ||||
| class NRKPlaylistBaseIE(InfoExtractor): | ||||
|     def _extract_description(self, webpage): | ||||
|         pass | ||||
| @@ -896,14 +866,8 @@ class NRKSkoleIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id, | ||||
|             video_id) | ||||
|  | ||||
|         nrk_id = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>', | ||||
|                 webpage, 'application json'), | ||||
|             video_id)['activeMedia']['psId'] | ||||
|         nrk_id = self._download_json( | ||||
|             'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/%s' % video_id, | ||||
|             video_id)['psId'] | ||||
|  | ||||
|         return self.url_result('nrk:%s' % nrk_id) | ||||
|   | ||||
| @@ -450,6 +450,18 @@ class PeerTubeIE(InfoExtractor): | ||||
|             'tags': ['framasoft', 'peertube'], | ||||
|             'categories': ['Science & Technology'], | ||||
|         } | ||||
|     }, { | ||||
|         # Issue #26002 | ||||
|         'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc', | ||||
|         'info_dict': { | ||||
|             'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Dot matrix printer shell demo', | ||||
|             'uploader_id': '3', | ||||
|             'timestamp': 1587401293, | ||||
|             'upload_date': '20200420', | ||||
|             'uploader': 'Drew DeVault', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', | ||||
|         'only_matching': True, | ||||
| @@ -526,7 +538,15 @@ class PeerTubeIE(InfoExtractor): | ||||
|         title = video['name'] | ||||
|  | ||||
|         formats = [] | ||||
|         for file_ in video['files']: | ||||
|         files = video.get('files') or [] | ||||
|         for playlist in (video.get('streamingPlaylists') or []): | ||||
|             if not isinstance(playlist, dict): | ||||
|                 continue | ||||
|             playlist_files = playlist.get('files') | ||||
|             if not (playlist_files and isinstance(playlist_files, list)): | ||||
|                 continue | ||||
|             files.extend(playlist_files) | ||||
|         for file_ in files: | ||||
|             if not isinstance(file_, dict): | ||||
|                 continue | ||||
|             file_url = url_or_none(file_.get('fileUrl')) | ||||
| @@ -541,6 +561,10 @@ class PeerTubeIE(InfoExtractor): | ||||
|                 'format_id': format_id, | ||||
|                 'filesize': file_size, | ||||
|             }) | ||||
|             if format_id == '0p': | ||||
|                 f['vcodec'] = 'none' | ||||
|             else: | ||||
|                 f['fps'] = int_or_none(file_.get('fps')) | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -6,16 +6,33 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     dict_get, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PikselIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)' | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (?: | ||||
|             (?: | ||||
|                 player\. | ||||
|                     (?: | ||||
|                         olympusattelecom| | ||||
|                         vibebyvista | ||||
|                     )| | ||||
|                 (?:api|player)\.multicastmedia| | ||||
|                 (?:api-ovp|player)\.piksel | ||||
|             )\.com| | ||||
|             (?: | ||||
|                 mz-edge\.stream\.co| | ||||
|                 movie-s\.nhk\.or | ||||
|             )\.jp| | ||||
|             vidego\.baltimorecity\.gov | ||||
|         )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://player.piksel.com/v/ums2867l', | ||||
| @@ -56,46 +73,41 @@ class PikselIE(InfoExtractor): | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _call_api(self, app_token, resource, display_id, query, fatal=True): | ||||
|         response = (self._download_json( | ||||
|             'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token), | ||||
|             display_id, query=query, fatal=fatal) or {}).get('response') | ||||
|         failure = try_get(response, lambda x: x['failure']['reason']) | ||||
|         if failure: | ||||
|             if fatal: | ||||
|                 raise ExtractorError(failure, expected=True) | ||||
|             self.report_warning(failure) | ||||
|         return response | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         ref_id, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_id = self._search_regex( | ||||
|             r'data-de-program-uuid=[\'"]([a-z0-9]+)', | ||||
|             webpage, 'program uuid', default=display_id) | ||||
|         app_token = self._search_regex([ | ||||
|             r'clientAPI\s*:\s*"([^"]+)"', | ||||
|             r'data-de-api-key\s*=\s*"([^"]+)"' | ||||
|         ], webpage, 'app token') | ||||
|         response = self._download_json( | ||||
|             'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token, | ||||
|             video_id, query={ | ||||
|                 'v': video_id | ||||
|             })['response'] | ||||
|         failure = response.get('failure') | ||||
|         if failure: | ||||
|             raise ExtractorError(response['failure']['reason'], expected=True) | ||||
|         video_data = response['WsProgramResponse']['program']['asset'] | ||||
|         query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} | ||||
|         program = self._call_api( | ||||
|             app_token, 'program', display_id, query)['WsProgramResponse']['program'] | ||||
|         video_id = program['uuid'] | ||||
|         video_data = program['asset'] | ||||
|         title = video_data['title'] | ||||
|         asset_type = dict_get(video_data, ['assetType', 'asset_type']) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         m3u8_url = dict_get(video_data, [ | ||||
|             'm3u8iPadURL', | ||||
|             'ipadM3u8Url', | ||||
|             'm3u8AndroidURL', | ||||
|             'm3u8iPhoneURL', | ||||
|             'iphoneM3u8Url']) | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|         asset_type = dict_get(video_data, ['assetType', 'asset_type']) | ||||
|         for asset_file in video_data.get('assetFiles', []): | ||||
|         def process_asset_file(asset_file): | ||||
|             if not asset_file: | ||||
|                 return | ||||
|             # TODO: extract rtmp formats | ||||
|             http_url = asset_file.get('http_url') | ||||
|             if not http_url: | ||||
|                 continue | ||||
|                 return | ||||
|             tbr = None | ||||
|             vbr = int_or_none(asset_file.get('videoBitrate'), 1024) | ||||
|             abr = int_or_none(asset_file.get('audioBitrate'), 1024) | ||||
| @@ -118,6 +130,43 @@ class PikselIE(InfoExtractor): | ||||
|                 'filesize': int_or_none(asset_file.get('filesize')), | ||||
|                 'tbr': tbr, | ||||
|             }) | ||||
|  | ||||
|         def process_asset_files(asset_files): | ||||
|             for asset_file in (asset_files or []): | ||||
|                 process_asset_file(asset_file) | ||||
|  | ||||
|         process_asset_files(video_data.get('assetFiles')) | ||||
|         process_asset_file(video_data.get('referenceFile')) | ||||
|         if not formats: | ||||
|             asset_id = video_data.get('assetid') or program.get('assetid') | ||||
|             if asset_id: | ||||
|                 process_asset_files(try_get(self._call_api( | ||||
|                     app_token, 'asset_file', display_id, { | ||||
|                         'assetid': asset_id, | ||||
|                     }, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) | ||||
|  | ||||
|         m3u8_url = dict_get(video_data, [ | ||||
|             'm3u8iPadURL', | ||||
|             'ipadM3u8Url', | ||||
|             'm3u8AndroidURL', | ||||
|             'm3u8iPhoneURL', | ||||
|             'iphoneM3u8Url']) | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|         smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) | ||||
|         if smil_url: | ||||
|             transform_source = None | ||||
|             if ref_id == 'nhkworld': | ||||
|                 # TODO: figure out if this is something to be fixed in urljoin, | ||||
|                 # _parse_smil_formats or keep it here | ||||
|                 transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"') | ||||
|             formats.extend(self._extract_smil_formats( | ||||
|                 re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id, | ||||
|                 transform_source=transform_source, fatal=False)) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|   | ||||
| @@ -288,14 +288,24 @@ class PornHubIE(PornHubBaseIE): | ||||
|             video_urls.append((v_url, None)) | ||||
|             video_urls_set.add(v_url) | ||||
|  | ||||
|         def parse_quality_items(quality_items): | ||||
|             q_items = self._parse_json(quality_items, video_id, fatal=False) | ||||
|             if not isinstance(q_items, list): | ||||
|                 return | ||||
|             for item in q_items: | ||||
|                 if isinstance(item, dict): | ||||
|                     add_video_url(item.get('url')) | ||||
|  | ||||
|         if not video_urls: | ||||
|             FORMAT_PREFIXES = ('media', 'quality') | ||||
|             FORMAT_PREFIXES = ('media', 'quality', 'qualityItems') | ||||
|             js_vars = extract_js_vars( | ||||
|                 webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), | ||||
|                 default=None) | ||||
|             if js_vars: | ||||
|                 for key, format_url in js_vars.items(): | ||||
|                     if any(key.startswith(p) for p in FORMAT_PREFIXES): | ||||
|                     if key.startswith(FORMAT_PREFIXES[-1]): | ||||
|                         parse_quality_items(format_url) | ||||
|                     elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]): | ||||
|                         add_video_url(format_url) | ||||
|             if not video_urls and re.search( | ||||
|                     r'<[^>]+\bid=["\']lockedPlayer', webpage): | ||||
| @@ -351,12 +361,16 @@ class PornHubIE(PornHubBaseIE): | ||||
|             r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', | ||||
|             webpage, 'uploader', default=None) | ||||
|  | ||||
|         def extract_vote_count(kind, name): | ||||
|             return self._extract_count( | ||||
|                 (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind, | ||||
|                  r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind), | ||||
|                 webpage, name) | ||||
|  | ||||
|         view_count = self._extract_count( | ||||
|             r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') | ||||
|         like_count = self._extract_count( | ||||
|             r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like') | ||||
|         dislike_count = self._extract_count( | ||||
|             r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike') | ||||
|         like_count = extract_vote_count('Up', 'like') | ||||
|         dislike_count = extract_vote_count('Down', 'dislike') | ||||
|         comment_count = self._extract_count( | ||||
|             r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') | ||||
|  | ||||
|   | ||||
| @@ -103,21 +103,27 @@ class RaiBaseIE(InfoExtractor): | ||||
|         }.items() if v is not None) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_subtitles(url, subtitle_url): | ||||
|     def _extract_subtitles(url, video_data): | ||||
|         STL_EXT = 'stl' | ||||
|         SRT_EXT = 'srt' | ||||
|         subtitles = {} | ||||
|         if subtitle_url and isinstance(subtitle_url, compat_str): | ||||
|             subtitle_url = urljoin(url, subtitle_url) | ||||
|             STL_EXT = '.stl' | ||||
|             SRT_EXT = '.srt' | ||||
|             subtitles['it'] = [{ | ||||
|                 'ext': 'stl', | ||||
|                 'url': subtitle_url, | ||||
|             }] | ||||
|             if subtitle_url.endswith(STL_EXT): | ||||
|                 srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT | ||||
|                 subtitles['it'].append({ | ||||
|                     'ext': 'srt', | ||||
|                     'url': srt_url, | ||||
|         subtitles_array = video_data.get('subtitlesArray') or [] | ||||
|         for k in ('subtitles', 'subtitlesUrl'): | ||||
|             subtitles_array.append({'url': video_data.get(k)}) | ||||
|         for subtitle in subtitles_array: | ||||
|             sub_url = subtitle.get('url') | ||||
|             if sub_url and isinstance(sub_url, compat_str): | ||||
|                 sub_lang = subtitle.get('language') or 'it' | ||||
|                 sub_url = urljoin(url, sub_url) | ||||
|                 sub_ext = determine_ext(sub_url, SRT_EXT) | ||||
|                 subtitles.setdefault(sub_lang, []).append({ | ||||
|                     'ext': sub_ext, | ||||
|                     'url': sub_url, | ||||
|                 }) | ||||
|                 if STL_EXT == sub_ext: | ||||
|                     subtitles[sub_lang].append({ | ||||
|                         'ext': SRT_EXT, | ||||
|                         'url': sub_url[:-len(STL_EXT)] + SRT_EXT, | ||||
|                     }) | ||||
|         return subtitles | ||||
|  | ||||
| @@ -138,6 +144,9 @@ class RaiPlayIE(RaiBaseIE): | ||||
|             'duration': 6160, | ||||
|             'series': 'Report', | ||||
|             'season': '2013/14', | ||||
|             'subtitles': { | ||||
|                 'it': 'count:2', | ||||
|             }, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -145,6 +154,10 @@ class RaiPlayIE(RaiBaseIE): | ||||
|     }, { | ||||
|         'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # subtitles at 'subtitlesArray' key (see #27698) | ||||
|         'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -172,7 +185,7 @@ class RaiPlayIE(RaiBaseIE): | ||||
|         if date_published and time_published: | ||||
|             date_published += ' ' + time_published | ||||
|  | ||||
|         subtitles = self._extract_subtitles(url, video.get('subtitles')) | ||||
|         subtitles = self._extract_subtitles(url, video) | ||||
|  | ||||
|         program_info = media.get('program_info') or {} | ||||
|         season = media.get('season') | ||||
| @@ -326,6 +339,22 @@ class RaiIE(RaiBaseIE): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key | ||||
|         'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html', | ||||
|         'info_dict': { | ||||
|             'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015', | ||||
|             'description': 'md5:d291b03407ec505f95f27970c0b025f4', | ||||
|             'upload_date': '20150913', | ||||
|             'subtitles': { | ||||
|                 'it': 'count:2', | ||||
|             }, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # Direct MMS URL | ||||
|         'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', | ||||
| @@ -366,7 +395,7 @@ class RaiIE(RaiBaseIE): | ||||
|                     'url': compat_urlparse.urljoin(url, thumbnail_url), | ||||
|                 }) | ||||
|  | ||||
|         subtitles = self._extract_subtitles(url, media.get('subtitlesUrl')) | ||||
|         subtitles = self._extract_subtitles(url, media) | ||||
|  | ||||
|         info = { | ||||
|             'id': content_id, | ||||
| @@ -403,7 +432,8 @@ class RaiIE(RaiBaseIE): | ||||
|                 r'''(?x) | ||||
|                     (?: | ||||
|                         (?:initEdizione|drawMediaRaiTV)\(| | ||||
|                         <(?:[^>]+\bdata-id|var\s+uniquename)= | ||||
|                         <(?:[^>]+\bdata-id|var\s+uniquename)=| | ||||
|                         <iframe[^>]+\bsrc= | ||||
|                     ) | ||||
|                     (["\']) | ||||
|                     (?:(?!\1).)*\bContentItem-(?P<id>%s) | ||||
|   | ||||
| @@ -7,6 +7,8 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
| @@ -55,10 +57,12 @@ class RedditRIE(InfoExtractor): | ||||
|             'id': 'zv89llsvexdz', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'That small heart attack.', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'thumbnail': r're:^https?://.*\.(?:jpg|png)', | ||||
|             'thumbnails': 'count:4', | ||||
|             'timestamp': 1501941939, | ||||
|             'upload_date': '20170805', | ||||
|             'uploader': 'Antw87', | ||||
|             'duration': 12, | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
| @@ -116,13 +120,40 @@ class RedditRIE(InfoExtractor): | ||||
|         else: | ||||
|             age_limit = None | ||||
|  | ||||
|         thumbnails = [] | ||||
|  | ||||
|         def add_thumbnail(src): | ||||
|             if not isinstance(src, dict): | ||||
|                 return | ||||
|             thumbnail_url = url_or_none(src.get('url')) | ||||
|             if not thumbnail_url: | ||||
|                 return | ||||
|             thumbnails.append({ | ||||
|                 'url': unescapeHTML(thumbnail_url), | ||||
|                 'width': int_or_none(src.get('width')), | ||||
|                 'height': int_or_none(src.get('height')), | ||||
|             }) | ||||
|  | ||||
|         for image in try_get(data, lambda x: x['preview']['images']) or []: | ||||
|             if not isinstance(image, dict): | ||||
|                 continue | ||||
|             add_thumbnail(image.get('source')) | ||||
|             resolutions = image.get('resolutions') | ||||
|             if isinstance(resolutions, list): | ||||
|                 for resolution in resolutions: | ||||
|                     add_thumbnail(resolution) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': video_url, | ||||
|             'title': data.get('title'), | ||||
|             'thumbnail': url_or_none(data.get('thumbnail')), | ||||
|             'thumbnails': thumbnails, | ||||
|             'timestamp': float_or_none(data.get('created_utc')), | ||||
|             'uploader': data.get('author'), | ||||
|             'duration': int_or_none(try_get( | ||||
|                 data, | ||||
|                 (lambda x: x['media']['reddit_video']['duration'], | ||||
|                  lambda x: x['secure_media']['reddit_video']['duration']))), | ||||
|             'like_count': int_or_none(data.get('ups')), | ||||
|             'dislike_count': int_or_none(data.get('downs')), | ||||
|             'comment_count': int_or_none(data.get('num_comments')), | ||||
|   | ||||
| @@ -6,14 +6,24 @@ from ..compat import compat_urllib_parse_urlparse | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     find_xpath_attr, | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
|     url_or_none, | ||||
|     xpath_attr, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RuutuIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla)/(?P<id>\d+)' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/| | ||||
|                             static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid= | ||||
|                         ) | ||||
|                         (?P<id>\d+) | ||||
|                     ''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ruutu.fi/video/2058907', | ||||
| @@ -71,15 +81,53 @@ class RuutuIE(InfoExtractor): | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|                 'age_limit': 0, | ||||
|             }, | ||||
|             'expected_warnings': ['HTTP Error 502: Bad Gateway'], | ||||
|         } | ||||
|             'expected_warnings': [ | ||||
|                 'HTTP Error 502: Bad Gateway', | ||||
|                 'Failed to download m3u8 information', | ||||
|             ], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.supla.fi/audio/2231370', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             # episode | ||||
|             'url': 'https://www.ruutu.fi/video/3401964', | ||||
|             'info_dict': { | ||||
|                 'id': '3401964', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17', | ||||
|                 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba', | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|                 'duration': 2582, | ||||
|                 'age_limit': 12, | ||||
|                 'upload_date': '20190508', | ||||
|                 'series': 'Temptation Island Suomi', | ||||
|                 'season_number': 5, | ||||
|                 'episode_number': 17, | ||||
|                 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'], | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # premium | ||||
|             'url': 'https://www.ruutu.fi/video/3618715', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|     _API_BASE = 'https://gatling.nelonenmedia.fi' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video_xml = self._download_xml( | ||||
|             'https://gatling.nelonenmedia.fi/media-xml-cache', video_id, | ||||
|             '%s/media-xml-cache' % self._API_BASE, video_id, | ||||
|             query={'id': video_id}) | ||||
|  | ||||
|         formats = [] | ||||
| @@ -96,9 +144,18 @@ class RuutuIE(InfoExtractor): | ||||
|                         continue | ||||
|                     processed_urls.append(video_url) | ||||
|                     ext = determine_ext(video_url) | ||||
|                     auth_video_url = url_or_none(self._download_webpage( | ||||
|                         '%s/auth/access/v2' % self._API_BASE, video_id, | ||||
|                         note='Downloading authenticated %s stream URL' % ext, | ||||
|                         fatal=False, query={'stream': video_url})) | ||||
|                     if auth_video_url: | ||||
|                         processed_urls.append(auth_video_url) | ||||
|                         video_url = auth_video_url | ||||
|                     if ext == 'm3u8': | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|                             video_url, video_id, 'mp4', | ||||
|                             entry_protocol='m3u8_native', m3u8_id='hls', | ||||
|                             fatal=False)) | ||||
|                     elif ext == 'f4m': | ||||
|                         formats.extend(self._extract_f4m_formats( | ||||
|                             video_url, video_id, f4m_id='hds', fatal=False)) | ||||
| @@ -136,18 +193,35 @@ class RuutuIE(InfoExtractor): | ||||
|  | ||||
|         extract_formats(video_xml.find('./Clip')) | ||||
|  | ||||
|         def pv(name): | ||||
|             node = find_xpath_attr( | ||||
|                 video_xml, './Clip/PassthroughVariables/variable', 'name', name) | ||||
|             if node is not None: | ||||
|                 return node.get('value') | ||||
|  | ||||
|         if not formats: | ||||
|             drm = xpath_text(video_xml, './Clip/DRM', default=None) | ||||
|         if not formats and drm: | ||||
|             if drm: | ||||
|                 raise ExtractorError('This video is DRM protected.', expected=True) | ||||
|             ns_st_cds = pv('ns_st_cds') | ||||
|             if ns_st_cds != 'free': | ||||
|                 raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         themes = pv('themes') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), | ||||
|             'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), | ||||
|             'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), | ||||
|             'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')), | ||||
|             'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')), | ||||
|             'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), | ||||
|             'upload_date': unified_strdate(pv('date_start')), | ||||
|             'series': pv('series_name'), | ||||
|             'season_number': int_or_none(pv('season_number')), | ||||
|             'episode_number': int_or_none(pv('episode_number')), | ||||
|             'categories': themes.split(',') if themes else [], | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from ..utils import ( | ||||
|  | ||||
| class SBSIE(InfoExtractor): | ||||
|     IE_DESC = 'sbs.com.au' | ||||
|     _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P<id>[0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # Original URL is handled by the generic IE which finds the iframe: | ||||
| @@ -18,7 +18,7 @@ class SBSIE(InfoExtractor): | ||||
|         'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', | ||||
|         'md5': '3150cf278965eeabb5b4cea1c963fe0a', | ||||
|         'info_dict': { | ||||
|             'id': '320403011771', | ||||
|             'id': '_rFBPRPO4pMR', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Dingo Conservation (The Feed)', | ||||
|             'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', | ||||
| @@ -34,6 +34,15 @@ class SBSIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -4,8 +4,12 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .brightcove import BrightcoveNewIE | ||||
| from ..compat import compat_str | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     try_get, | ||||
|     update_url_query, | ||||
| ) | ||||
| @@ -41,6 +45,7 @@ class SevenPlusIE(BrightcoveNewIE): | ||||
|     def _real_extract(self, url): | ||||
|         path, episode_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         try: | ||||
|             media = self._download_json( | ||||
|                 'https://videoservice.swm.digital/playback', episode_id, query={ | ||||
|                     'appId': '7plus', | ||||
| @@ -51,6 +56,11 @@ class SevenPlusIE(BrightcoveNewIE): | ||||
|                     'deliveryId': 'csai', | ||||
|                     'videoType': 'vod', | ||||
|                 })['media'] | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 raise ExtractorError(self._parse_json( | ||||
|                     e.cause.read().decode(), episode_id)[0]['error_code'], expected=True) | ||||
|             raise | ||||
|  | ||||
|         for source in media.get('sources', {}): | ||||
|             src = source.get('src') | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
| @@ -11,38 +13,61 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class SkyBaseIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_data = extract_attributes(self._search_regex( | ||||
|             r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)', | ||||
|             webpage, 'video data')) | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' | ||||
|     _SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)' | ||||
|  | ||||
|         video_url = 'ooyala:%s' % video_data['data-video-id'] | ||||
|         if video_data.get('data-token-required') == 'true': | ||||
|             token_fetch_options = self._parse_json(video_data.get( | ||||
|                 'data-token-fetch-options', '{}'), video_id, fatal=False) or {} | ||||
|             token_fetch_url = token_fetch_options.get('url') | ||||
|     def _process_ooyala_element(self, webpage, sdc_el, url): | ||||
|         sdc = extract_attributes(sdc_el) | ||||
|         provider = sdc.get('data-provider') | ||||
|         if provider == 'ooyala': | ||||
|             video_id = sdc['data-sdc-video-id'] | ||||
|             video_url = 'ooyala:%s' % video_id | ||||
|             ie_key = 'Ooyala' | ||||
|             ooyala_el = self._search_regex( | ||||
|                 r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id, | ||||
|                 webpage, 'video data', fatal=False) | ||||
|             if ooyala_el: | ||||
|                 ooyala_attrs = extract_attributes(ooyala_el) or {} | ||||
|                 if ooyala_attrs.get('data-token-required') == 'true': | ||||
|                     token_fetch_url = (self._parse_json(ooyala_attrs.get( | ||||
|                         'data-token-fetch-options', '{}'), | ||||
|                         video_id, fatal=False) or {}).get('url') | ||||
|                     if token_fetch_url: | ||||
|                 embed_token = self._download_webpage(urljoin( | ||||
|                         embed_token = self._download_json(urljoin( | ||||
|                             url, token_fetch_url), video_id, fatal=False) | ||||
|                         if embed_token: | ||||
|                             video_url = smuggle_url( | ||||
|                         video_url, {'embed_token': embed_token.strip('"')}) | ||||
|                                 video_url, {'embed_token': embed_token}) | ||||
|         elif provider == 'brightcove': | ||||
|             video_id = sdc['data-video-id'] | ||||
|             account_id = sdc.get('data-account-id') or '6058004172001' | ||||
|             player_id = sdc.get('data-player-id') or 'RC9PQUaJ6' | ||||
|             video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id) | ||||
|             ie_key = 'BrightcoveNew' | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ie_key': ie_key, | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         info = self._process_ooyala_element(webpage, self._search_regex( | ||||
|             self._SDC_EL_REGEX, webpage, 'sdc element'), url) | ||||
|         info.update({ | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': strip_or_none(self._og_search_description(webpage)), | ||||
|             'ie_key': 'Ooyala', | ||||
|         } | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class SkySportsIE(SkyBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|     IE_NAME = 'sky:sports' | ||||
|     _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', | ||||
|         'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec', | ||||
|         'info_dict': { | ||||
| @@ -52,19 +77,55 @@ class SkySportsIE(SkyBaseIE): | ||||
|             'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'https://www.skysports.com/watch/video/sports/f1/12160544/abu-dhabi-gp-the-notebook', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.skysports.com/watch/video/tv-shows/12118508/rainford-brent-how-ace-programme-helps', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|  | ||||
| class SkyNewsIE(SkyBaseIE): | ||||
|     IE_NAME = 'sky:news' | ||||
|     _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962', | ||||
|         'md5': 'd6327e581473cea9976a3236ded370cd', | ||||
|         'md5': '411e8893fd216c75eaf7e4c65d364115', | ||||
|         'info_dict': { | ||||
|             'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM', | ||||
|             'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Russian plane inspected after deadly fire', | ||||
|             'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.', | ||||
|             'uploader_id': '6058004172001', | ||||
|             'timestamp': 1567112345, | ||||
|             'upload_date': '20190829', | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|         'add_ie': ['BrightcoveNew'], | ||||
|     } | ||||
|  | ||||
|  | ||||
| class SkySportsNewsIE(SkyBaseIE): | ||||
|     IE_NAME = 'sky:sports:news' | ||||
|     _VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass', | ||||
|         'info_dict': { | ||||
|             'id': '10871916', | ||||
|             'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass', | ||||
|             'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         article_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, article_id) | ||||
|  | ||||
|         entries = [] | ||||
|         for sdc_el in re.findall(self._SDC_EL_REGEX, webpage): | ||||
|             entries.append(self._process_ooyala_element(webpage, sdc_el, url)) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, article_id, self._og_search_title(webpage), | ||||
|             self._html_search_meta(['og:description', 'description'], webpage)) | ||||
|   | ||||
| @@ -2,7 +2,12 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
| from ..utils import ( | ||||
|     bool_or_none, | ||||
|     smuggle_url, | ||||
|     try_get, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SlidesLiveIE(InfoExtractor): | ||||
| @@ -18,8 +23,21 @@ class SlidesLiveIE(InfoExtractor): | ||||
|             'description': 'Watch full version of this video at https://slideslive.com/38902413.', | ||||
|             'uploader': 'SlidesLive Videos - A', | ||||
|             'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', | ||||
|             'timestamp': 1597615266, | ||||
|             'upload_date': '20170925', | ||||
|         } | ||||
|     }, { | ||||
|         # video_service_name = yoda | ||||
|         'url': 'https://slideslive.com/38935785', | ||||
|         'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a', | ||||
|         'info_dict': { | ||||
|             'id': 'RMraDYN5ozA_', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges', | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'bestvideo', | ||||
|         }, | ||||
|     }, { | ||||
|         # video_service_name = youtube | ||||
|         'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', | ||||
| @@ -39,18 +57,48 @@ class SlidesLiveIE(InfoExtractor): | ||||
|         video_data = self._download_json( | ||||
|             'https://ben.slideslive.com/player/' + video_id, video_id) | ||||
|         service_name = video_data['video_service_name'].lower() | ||||
|         assert service_name in ('url', 'vimeo', 'youtube') | ||||
|         assert service_name in ('url', 'yoda', 'vimeo', 'youtube') | ||||
|         service_id = video_data['video_service_id'] | ||||
|         subtitles = {} | ||||
|         for sub in try_get(video_data, lambda x: x['subtitles'], list) or []: | ||||
|             if not isinstance(sub, dict): | ||||
|                 continue | ||||
|             webvtt_url = url_or_none(sub.get('webvtt_url')) | ||||
|             if not webvtt_url: | ||||
|                 continue | ||||
|             lang = sub.get('language') or 'en' | ||||
|             subtitles.setdefault(lang, []).append({ | ||||
|                 'url': webvtt_url, | ||||
|             }) | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'thumbnail': video_data.get('thumbnail'), | ||||
|             'url': service_id, | ||||
|             'is_live': bool_or_none(video_data.get('is_live')), | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|         if service_name == 'url': | ||||
|         if service_name in ('url', 'yoda'): | ||||
|             info['title'] = video_data['title'] | ||||
|             if service_name == 'url': | ||||
|                 info['url'] = service_id | ||||
|             else: | ||||
|                 formats = [] | ||||
|                 _MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s' | ||||
|                 # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     _MANIFEST_PATTERN % (service_id, 'm3u8'), | ||||
|                     service_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     _MANIFEST_PATTERN % (service_id, 'mpd'), service_id, | ||||
|                     mpd_id='dash', fatal=False)) | ||||
|                 self._sort_formats(formats) | ||||
|                 info.update({ | ||||
|                     'id': service_id, | ||||
|                     'formats': formats, | ||||
|                 }) | ||||
|         else: | ||||
|             info.update({ | ||||
|                 '_type': 'url_transparent', | ||||
|                 'url': service_id, | ||||
|                 'ie_key': service_name.capitalize(), | ||||
|                 'title': video_data.get('title'), | ||||
|             }) | ||||
|   | ||||
| @@ -1,416 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import hashlib | ||||
| import uuid | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     sanitized_Request, | ||||
|     unified_strdate, | ||||
|     urlencode_postdata, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SmotriIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com' | ||||
|     IE_NAME = 'smotri' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})' | ||||
|     _NETRC_MACHINE = 'smotri' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # real video id 2610366 | ||||
|         { | ||||
|             'url': 'http://smotri.com/video/view/?id=v261036632ab', | ||||
|             'md5': '02c0dfab2102984e9c5bb585cc7cc321', | ||||
|             'info_dict': { | ||||
|                 'id': 'v261036632ab', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'катастрофа с камер видеонаблюдения', | ||||
|                 'uploader': 'rbc2008', | ||||
|                 'uploader_id': 'rbc08', | ||||
|                 'upload_date': '20131118', | ||||
|                 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', | ||||
|             }, | ||||
|         }, | ||||
|         # real video id 57591 | ||||
|         { | ||||
|             'url': 'http://smotri.com/video/view/?id=v57591cb20', | ||||
|             'md5': '830266dfc21f077eac5afd1883091bcd', | ||||
|             'info_dict': { | ||||
|                 'id': 'v57591cb20', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'test', | ||||
|                 'uploader': 'Support Photofile@photofile', | ||||
|                 'uploader_id': 'support-photofile', | ||||
|                 'upload_date': '20070704', | ||||
|                 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', | ||||
|             }, | ||||
|         }, | ||||
|         # video-password, not approved by moderator | ||||
|         { | ||||
|             'url': 'http://smotri.com/video/view/?id=v1390466a13c', | ||||
|             'md5': 'f6331cef33cad65a0815ee482a54440b', | ||||
|             'info_dict': { | ||||
|                 'id': 'v1390466a13c', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', | ||||
|                 'uploader': 'timoxa40', | ||||
|                 'uploader_id': 'timoxa40', | ||||
|                 'upload_date': '20100404', | ||||
|                 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'videopassword': 'qwerty', | ||||
|             }, | ||||
|             'skip': 'Video is not approved by moderator', | ||||
|         }, | ||||
|         # video-password | ||||
|         { | ||||
|             'url': 'http://smotri.com/video/view/?id=v6984858774#', | ||||
|             'md5': 'f11e01d13ac676370fc3b95b9bda11b0', | ||||
|             'info_dict': { | ||||
|                 'id': 'v6984858774', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Дача Солженицина ПАРОЛЬ 223322', | ||||
|                 'uploader': 'psavari1', | ||||
|                 'uploader_id': 'psavari1', | ||||
|                 'upload_date': '20081103', | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'videopassword': '223322', | ||||
|             }, | ||||
|         }, | ||||
|         # age limit + video-password, not approved by moderator | ||||
|         { | ||||
|             'url': 'http://smotri.com/video/view/?id=v15408898bcf', | ||||
|             'md5': '91e909c9f0521adf5ee86fbe073aad70', | ||||
|             'info_dict': { | ||||
|                 'id': 'v15408898bcf', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'этот ролик не покажут по ТВ', | ||||
|                 'uploader': 'zzxxx', | ||||
|                 'uploader_id': 'ueggb', | ||||
|                 'upload_date': '20101001', | ||||
|                 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', | ||||
|                 'age_limit': 18, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'videopassword': '333' | ||||
|             }, | ||||
|             'skip': 'Video is not approved by moderator', | ||||
|         }, | ||||
|         # age limit + video-password | ||||
|         { | ||||
|             'url': 'http://smotri.com/video/view/?id=v7780025814', | ||||
|             'md5': 'b4599b068422559374a59300c5337d72', | ||||
|             'info_dict': { | ||||
|                 'id': 'v7780025814', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Sexy Beach (пароль 123)', | ||||
|                 'uploader': 'вАся', | ||||
|                 'uploader_id': 'asya_prosto', | ||||
|                 'upload_date': '20081218', | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|                 'age_limit': 18, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'videopassword': '123' | ||||
|             }, | ||||
|         }, | ||||
|         # swf player | ||||
|         { | ||||
|             'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500', | ||||
|             'md5': '31099eeb4bc906712c5f40092045108d', | ||||
|             'info_dict': { | ||||
|                 'id': 'v9188090500', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Shakira - Don\'t Bother', | ||||
|                 'uploader': 'HannahL', | ||||
|                 'uploader_id': 'lisaha95', | ||||
|                 'upload_date': '20090331', | ||||
|                 'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_url(cls, webpage): | ||||
|         mobj = re.search( | ||||
|             r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s* | ||||
|                     <div\s+class="video_image">[^<]+</div>\s* | ||||
|                     <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage) | ||||
|         if mobj is not None: | ||||
|             return 'http://smotri.com/video/view/?id=%s' % mobj.group('id') | ||||
|  | ||||
|     def _search_meta(self, name, html, display_name=None): | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|         return self._html_search_meta(name, html, display_name) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video_form = { | ||||
|             'ticket': video_id, | ||||
|             'video_url': '1', | ||||
|             'frame_url': '1', | ||||
|             'devid': 'LoadupFlashPlayer', | ||||
|             'getvideoinfo': '1', | ||||
|         } | ||||
|  | ||||
|         video_password = self._downloader.params.get('videopassword') | ||||
|         if video_password: | ||||
|             video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'http://smotri.com/video/view/url/bot/', | ||||
|             video_id, 'Downloading video JSON', | ||||
|             data=urlencode_postdata(video_form), | ||||
|             headers={'Content-Type': 'application/x-www-form-urlencoded'}) | ||||
|  | ||||
|         video_url = video.get('_vidURL') or video.get('_vidURL_mp4') | ||||
|  | ||||
|         if not video_url: | ||||
|             if video.get('_moderate_no'): | ||||
|                 raise ExtractorError( | ||||
|                     'Video %s has not been approved by moderator' % video_id, expected=True) | ||||
|  | ||||
|             if video.get('error'): | ||||
|                 raise ExtractorError('Video %s does not exist' % video_id, expected=True) | ||||
|  | ||||
|             if video.get('_pass_protected') == 1: | ||||
|                 msg = ('Invalid video password' if video_password | ||||
|                        else 'This video is protected by a password, use the --video-password option') | ||||
|                 raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         title = video['title'] | ||||
|         thumbnail = video.get('_imgURL') | ||||
|         upload_date = unified_strdate(video.get('added')) | ||||
|         uploader = video.get('userNick') | ||||
|         uploader_id = video.get('userLogin') | ||||
|         duration = int_or_none(video.get('duration')) | ||||
|  | ||||
|         # Video JSON does not provide enough meta data | ||||
|         # We will extract some from the video web page instead | ||||
|         webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page') | ||||
|  | ||||
|         # Warning if video is unavailable | ||||
|         warning = self._html_search_regex( | ||||
|             r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage, | ||||
|             'warning message', default=None) | ||||
|         if warning is not None: | ||||
|             self._downloader.report_warning( | ||||
|                 'Video %s may not be available; smotri said: %s ' % | ||||
|                 (video_id, warning)) | ||||
|  | ||||
|         # Adult content | ||||
|         if 'EroConfirmText">' in webpage: | ||||
|             self.report_age_confirmation() | ||||
|             confirm_string = self._html_search_regex( | ||||
|                 r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id, | ||||
|                 webpage, 'confirm string') | ||||
|             confirm_url = webpage_url + '&confirm=%s' % confirm_string | ||||
|             webpage = self._download_webpage( | ||||
|                 confirm_url, video_id, | ||||
|                 'Downloading video page (age confirmed)') | ||||
|             adult_content = True | ||||
|         else: | ||||
|             adult_content = False | ||||
|  | ||||
|         view_count = self._html_search_regex( | ||||
|             r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>', | ||||
|             webpage, 'view count', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'view_count': int_or_none(view_count), | ||||
|             'age_limit': 18 if adult_content else 0, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SmotriCommunityIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com community videos' | ||||
|     IE_NAME = 'smotri:community' | ||||
|     _VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://smotri.com/community/video/kommuna', | ||||
|         'info_dict': { | ||||
|             'id': 'kommuna', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         community_id = self._match_id(url) | ||||
|  | ||||
|         rss = self._download_xml( | ||||
|             'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id, | ||||
|             community_id, 'Downloading community RSS') | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(video_url.text, SmotriIE.ie_key()) | ||||
|             for video_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         return self.playlist_result(entries, community_id) | ||||
|  | ||||
|  | ||||
| class SmotriUserIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com user videos' | ||||
|     IE_NAME = 'smotri:user' | ||||
|     _VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://smotri.com/user/inspector', | ||||
|         'info_dict': { | ||||
|             'id': 'inspector', | ||||
|             'title': 'Inspector', | ||||
|         }, | ||||
|         'playlist_mincount': 9, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         user_id = self._match_id(url) | ||||
|  | ||||
|         rss = self._download_xml( | ||||
|             'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id, | ||||
|             user_id, 'Downloading user RSS') | ||||
|  | ||||
|         entries = [self.url_result(video_url.text, 'Smotri') | ||||
|                    for video_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         description_text = xpath_text(rss, './channel/description') or '' | ||||
|         user_nickname = self._search_regex( | ||||
|             '^Видео режиссера (.+)$', description_text, | ||||
|             'user nickname', fatal=False) | ||||
|  | ||||
|         return self.playlist_result(entries, user_id, user_nickname) | ||||
|  | ||||
|  | ||||
| class SmotriBroadcastIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com broadcasts' | ||||
|     IE_NAME = 'smotri:broadcast' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*' | ||||
|     _NETRC_MACHINE = 'smotri' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         broadcast_id = mobj.group('id') | ||||
|  | ||||
|         broadcast_url = 'http://' + mobj.group('url') | ||||
|         broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') | ||||
|  | ||||
|         if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: | ||||
|             raise ExtractorError( | ||||
|                 'Broadcast %s does not exist' % broadcast_id, expected=True) | ||||
|  | ||||
|         # Adult content | ||||
|         if re.search('EroConfirmText">', broadcast_page) is not None: | ||||
|  | ||||
|             (username, password) = self._get_login_info() | ||||
|             if username is None: | ||||
|                 self.raise_login_required( | ||||
|                     'Erotic broadcasts allowed only for registered users') | ||||
|  | ||||
|             login_form = { | ||||
|                 'login-hint53': '1', | ||||
|                 'confirm_erotic': '1', | ||||
|                 'login': username, | ||||
|                 'password': password, | ||||
|             } | ||||
|  | ||||
|             request = sanitized_Request( | ||||
|                 broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form)) | ||||
|             request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|             broadcast_page = self._download_webpage( | ||||
|                 request, broadcast_id, 'Logging in and confirming age') | ||||
|  | ||||
|             if '>Неверный логин или пароль<' in broadcast_page: | ||||
|                 raise ExtractorError( | ||||
|                     'Unable to log in: bad username or password', expected=True) | ||||
|  | ||||
|             adult_content = True | ||||
|         else: | ||||
|             adult_content = False | ||||
|  | ||||
|         ticket = self._html_search_regex( | ||||
|             (r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1', | ||||
|              r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"), | ||||
|             broadcast_page, 'broadcast ticket', group='ticket') | ||||
|  | ||||
|         broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket | ||||
|  | ||||
|         broadcast_password = self._downloader.params.get('videopassword') | ||||
|         if broadcast_password: | ||||
|             broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         broadcast_json_page = self._download_webpage( | ||||
|             broadcast_url, broadcast_id, 'Downloading broadcast JSON') | ||||
|  | ||||
|         try: | ||||
|             broadcast_json = json.loads(broadcast_json_page) | ||||
|  | ||||
|             protected_broadcast = broadcast_json['_pass_protected'] == 1 | ||||
|             if protected_broadcast and not broadcast_password: | ||||
|                 raise ExtractorError( | ||||
|                     'This broadcast is protected by a password, use the --video-password option', | ||||
|                     expected=True) | ||||
|  | ||||
|             broadcast_offline = broadcast_json['is_play'] == 0 | ||||
|             if broadcast_offline: | ||||
|                 raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True) | ||||
|  | ||||
|             rtmp_url = broadcast_json['_server'] | ||||
|             mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url) | ||||
|             if not mobj: | ||||
|                 raise ExtractorError('Unexpected broadcast rtmp URL') | ||||
|  | ||||
|             broadcast_playpath = broadcast_json['_streamName'] | ||||
|             broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL']) | ||||
|             broadcast_thumbnail = broadcast_json.get('_imgURL') | ||||
|             broadcast_title = self._live_title(broadcast_json['title']) | ||||
|             broadcast_description = broadcast_json.get('description') | ||||
|             broadcaster_nick = broadcast_json.get('nick') | ||||
|             broadcaster_login = broadcast_json.get('login') | ||||
|             rtmp_conn = 'S:%s' % uuid.uuid4().hex | ||||
|         except KeyError: | ||||
|             if protected_broadcast: | ||||
|                 raise ExtractorError('Bad broadcast password', expected=True) | ||||
|             raise ExtractorError('Unexpected broadcast JSON') | ||||
|  | ||||
|         return { | ||||
|             'id': broadcast_id, | ||||
|             'url': rtmp_url, | ||||
|             'title': broadcast_title, | ||||
|             'thumbnail': broadcast_thumbnail, | ||||
|             'description': broadcast_description, | ||||
|             'uploader': broadcaster_nick, | ||||
|             'uploader_id': broadcaster_login, | ||||
|             'age_limit': 18 if adult_content else 0, | ||||
|             'ext': 'flv', | ||||
|             'play_path': broadcast_playpath, | ||||
|             'player_url': 'http://pics.smotri.com/broadcast_play.swf', | ||||
|             'app': broadcast_app, | ||||
|             'rtmp_live': True, | ||||
|             'rtmp_conn': rtmp_conn, | ||||
|             'is_live': True, | ||||
|         } | ||||
| @@ -1,40 +1,112 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import time | ||||
| import uuid | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SonyLIVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight", | ||||
|         'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true', | ||||
|         'info_dict': { | ||||
|             'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight", | ||||
|             'id': 'ref:5024612095001', | ||||
|             'title': 'Bachelors Delight - Achaari Cheese Toast', | ||||
|             'id': '1000022678', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20170923', | ||||
|             'description': 'md5:7f28509a148d5be9d0782b4d5106410d', | ||||
|             'uploader_id': '5182475815001', | ||||
|             'timestamp': 1506200547, | ||||
|             'upload_date': '20200411', | ||||
|             'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb', | ||||
|             'timestamp': 1586632091, | ||||
|             'duration': 185, | ||||
|             'season_number': 1, | ||||
|             'episode': 'Achaari Cheese Toast', | ||||
|             'episode_number': 1, | ||||
|             'release_year': 2016, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['BrightcoveNew'], | ||||
|     }, { | ||||
|         'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)', | ||||
|         'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _GEO_COUNTRIES = ['IN'] | ||||
|     _TOKEN = None | ||||
|  | ||||
|     # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s' | ||||
|     def _call_api(self, version, path, video_id): | ||||
|         headers = {} | ||||
|         if self._TOKEN: | ||||
|             headers['security_token'] = self._TOKEN | ||||
|         try: | ||||
|             return self._download_json( | ||||
|                 'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path), | ||||
|                 video_id, headers=headers)['resultObj'] | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | ||||
|                 message = self._parse_json( | ||||
|                     e.cause.read().decode(), video_id)['message'] | ||||
|                 if message == 'Geoblocked Country': | ||||
|                     self.raise_geo_restricted(countries=self._GEO_COUNTRIES) | ||||
|                 raise ExtractorError(message) | ||||
|             raise | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         brightcove_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, { | ||||
|                 'geo_countries': ['IN'], | ||||
|                 'referrer': url, | ||||
|             }), | ||||
|             'BrightcoveNew', brightcove_id) | ||||
|         video_id = self._match_id(url) | ||||
|         content = self._call_api( | ||||
|             '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id) | ||||
|         if content.get('isEncrypted'): | ||||
|             raise ExtractorError('This video is DRM protected.', expected=True) | ||||
|         dash_url = content['videoURL'] | ||||
|         headers = { | ||||
|             'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000) | ||||
|         } | ||||
|         formats = self._extract_mpd_formats( | ||||
|             dash_url, video_id, mpd_id='dash', headers=headers, fatal=False) | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'), | ||||
|             video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False)) | ||||
|         for f in formats: | ||||
|             f.setdefault('http_headers', {}).update(headers) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         metadata = self._call_api( | ||||
|             '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata'] | ||||
|         title = metadata['title'] | ||||
|         episode = metadata.get('episodeTitle') | ||||
|         if episode and title != episode: | ||||
|             title += ' - ' + episode | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': content.get('posterURL'), | ||||
|             'description': metadata.get('longDescription') or metadata.get('shortDescription'), | ||||
|             'timestamp': int_or_none(metadata.get('creationDate'), 1000), | ||||
|             'duration': int_or_none(metadata.get('duration')), | ||||
|             'season_number': int_or_none(metadata.get('season')), | ||||
|             'episode': episode, | ||||
|             'episode_number': int_or_none(metadata.get('episodeNumber')), | ||||
|             'release_year': int_or_none(metadata.get('year')), | ||||
|         } | ||||
|   | ||||
| @@ -7,17 +7,24 @@ from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     merge_dicts, | ||||
|     orderedSet, | ||||
|     parse_duration, | ||||
|     parse_resolution, | ||||
|     str_to_int, | ||||
|     url_or_none, | ||||
|     urlencode_postdata, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SpankBangIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:[^/]+\.)?spankbang\.com/ | ||||
|                         (?: | ||||
|                             (?P<id>[\da-z]+)/(?:video|play|embed)\b| | ||||
|                             [\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+ | ||||
|                         ) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://spankbang.com/3vvn/video/fantasy+solo', | ||||
|         'md5': '1cc433e1d6aa14bc376535b8679302f7', | ||||
| @@ -57,10 +64,14 @@ class SpankBangIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'https://spankbang.com/2y3td/embed/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') or mobj.group('id_2') | ||||
|         webpage = self._download_webpage( | ||||
|             url.replace('/%s/embed' % video_id, '/%s/video' % video_id), | ||||
|             video_id, headers={'Cookie': 'country=US'}) | ||||
| @@ -155,30 +166,33 @@ class SpankBangIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class SpankBangPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+' | ||||
|     _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', | ||||
|         'info_dict': { | ||||
|             'id': 'ug0k', | ||||
|             'title': 'Big Ass Titties', | ||||
|         }, | ||||
|         'playlist_mincount': 50, | ||||
|         'playlist_mincount': 40, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) | ||||
|  | ||||
|         entries = [self.url_result( | ||||
|             'https://spankbang.com/%s/video' % video_id, | ||||
|             ie=SpankBangIE.ie_key(), video_id=video_id) | ||||
|             for video_id in orderedSet(re.findall( | ||||
|                 r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))] | ||||
|             urljoin(url, mobj.group('path')), | ||||
|             ie=SpankBangIE.ie_key(), video_id=mobj.group('id')) | ||||
|             for mobj in re.finditer( | ||||
|                 r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1' | ||||
|                 % re.escape(display_id), webpage)] | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title', | ||||
|             r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title', | ||||
|             fatal=False) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title) | ||||
|   | ||||
| @@ -40,9 +40,15 @@ class ParamountNetworkIE(MTVServicesInfoExtractor): | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/' | ||||
|     _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|  | ||||
|     def _get_feed_query(self, uri): | ||||
|         return { | ||||
|             'arcEp': 'paramountnetwork.com', | ||||
|             'mgid': uri, | ||||
|         } | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         root_data = self._parse_json(self._search_regex( | ||||
|             r'window\.__DATA__\s*=\s*({.+})', | ||||
|   | ||||
| @@ -3,50 +3,62 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .adobepass import AdobePassIE | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     update_url_query, | ||||
|     int_or_none, | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SproutIE(AdobePassIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.sproutonline.com/watch/cowboy-adventure', | ||||
|         'md5': '74bf14128578d1e040c3ebc82088f45f', | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race', | ||||
|         'info_dict': { | ||||
|             'id': '9dexnwtmh8_X', | ||||
|             'id': 'bm0foJFaTKqb', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'A Cowboy Adventure', | ||||
|             'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.', | ||||
|             'timestamp': 1437758640, | ||||
|             'upload_date': '20150724', | ||||
|             'uploader': 'NBCU-SPROUT-NEW', | ||||
|         } | ||||
|     } | ||||
|             'title': 'Robot Bike Race', | ||||
|             'description': 'md5:436b1d97117cc437f54c383f4debc66d', | ||||
|             'timestamp': 1606148940, | ||||
|             'upload_date': '20201123', | ||||
|             'uploader': 'NBCU-MPAT', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.sproutonline.com/watch/cowboy-adventure', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.universalkids.com/watch/robot-bike-race', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _GEO_COUNTRIES = ['US'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_component = self._search_regex( | ||||
|             r'(?s)(<div[^>]+data-component="video"[^>]*?>)', | ||||
|             webpage, 'video component', default=None) | ||||
|         if video_component: | ||||
|             options = self._parse_json(extract_attributes( | ||||
|                 video_component)['data-options'], video_id) | ||||
|             theplatform_url = options['video'] | ||||
|         display_id = self._match_id(url) | ||||
|         mpx_metadata = self._download_json( | ||||
|             # http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/ | ||||
|             'https://www.universalkids.com/_api/videos/' + display_id, | ||||
|             display_id)['mpxMetadata'] | ||||
|         media_pid = mpx_metadata['mediaPid'] | ||||
|         theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|             'manifest': 'm3u', | ||||
|         } | ||||
|             if options.get('protected'): | ||||
|                 query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout') | ||||
|             theplatform_url = smuggle_url(update_url_query( | ||||
|                 theplatform_url, query), {'force_smil_url': True}) | ||||
|         else: | ||||
|             iframe = self._search_regex( | ||||
|                 r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)', | ||||
|                 webpage, 'iframe') | ||||
|             theplatform_url = extract_attributes(iframe)['src'] | ||||
|  | ||||
|         return self.url_result(theplatform_url, 'ThePlatform') | ||||
|         if mpx_metadata.get('entitlement') == 'auth': | ||||
|             query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout') | ||||
|         theplatform_url = smuggle_url( | ||||
|             update_url_query(theplatform_url, query), { | ||||
|                 'force_smil_url': True, | ||||
|                 'geo_countries': self._GEO_COUNTRIES, | ||||
|             }) | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': media_pid, | ||||
|             'url': theplatform_url, | ||||
|             'series': mpx_metadata.get('seriesName'), | ||||
|             'season_number': int_or_none(mpx_metadata.get('seasonNumber')), | ||||
|             'episode_number': int_or_none(mpx_metadata.get('episodeNumber')), | ||||
|             'ie_key': 'ThePlatform', | ||||
|         } | ||||
|   | ||||
| @@ -1,28 +1,74 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     clean_html, | ||||
|     clean_podcast_url, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     unescapeHTML, | ||||
|     str_or_none, | ||||
|     try_get, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class StitcherIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/(?:[^/]+/)+e/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)' | ||||
| class StitcherBaseIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/' | ||||
|  | ||||
|     def _call_api(self, path, video_id, query): | ||||
|         resp = self._download_json( | ||||
|             'https://api.prod.stitcher.com/' + path, | ||||
|             video_id, query=query) | ||||
|         error_massage = try_get(resp, lambda x: x['errors'][0]['message']) | ||||
|         if error_massage: | ||||
|             raise ExtractorError(error_massage, expected=True) | ||||
|         return resp['data'] | ||||
|  | ||||
|     def _extract_description(self, data): | ||||
|         return clean_html(data.get('html_description') or data.get('description')) | ||||
|  | ||||
|     def _extract_audio_url(self, episode): | ||||
|         return url_or_none(episode.get('audio_url') or episode.get('guid')) | ||||
|  | ||||
|     def _extract_show_info(self, show): | ||||
|         return { | ||||
|             'thumbnail': show.get('image_base_url'), | ||||
|             'series': show.get('title'), | ||||
|         } | ||||
|  | ||||
|     def _extract_episode(self, episode, audio_url, show_info): | ||||
|         info = { | ||||
|             'id': compat_str(episode['id']), | ||||
|             'display_id': episode.get('slug'), | ||||
|             'title': episode['title'].strip(), | ||||
|             'description': self._extract_description(episode), | ||||
|             'duration': int_or_none(episode.get('duration')), | ||||
|             'url': clean_podcast_url(audio_url), | ||||
|             'vcodec': 'none', | ||||
|             'timestamp': int_or_none(episode.get('date_published')), | ||||
|             'season_number': int_or_none(episode.get('season')), | ||||
|             'season_id': str_or_none(episode.get('season_id')), | ||||
|         } | ||||
|         info.update(show_info) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class StitcherIE(StitcherBaseIE): | ||||
|     _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', | ||||
|         'md5': '391dd4e021e6edeb7b8e68fbf2e9e940', | ||||
|         'md5': 'e9635098e0da10b21a0e2b85585530f6', | ||||
|         'info_dict': { | ||||
|             'id': '40789481', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Machine Learning Mastery and Cancer Clusters', | ||||
|             'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3', | ||||
|             'description': 'md5:547adb4081864be114ae3831b4c2b42f', | ||||
|             'duration': 1604, | ||||
|             'thumbnail': r're:^https?://.*\.jpg', | ||||
|             'upload_date': '20151008', | ||||
|             'timestamp': 1444285800, | ||||
|             'series': 'Talking Machines', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', | ||||
| @@ -38,6 +84,7 @@ class StitcherIE(InfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Page Not Found', | ||||
|     }, { | ||||
|         # escaped title | ||||
|         'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true', | ||||
| @@ -45,37 +92,53 @@ class StitcherIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         audio_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or audio_id | ||||
|         audio_id = self._match_id(url) | ||||
|         data = self._call_api( | ||||
|             'shows/episodes', audio_id, {'episode_ids': audio_id}) | ||||
|         episode = data['episodes'][0] | ||||
|         audio_url = self._extract_audio_url(episode) | ||||
|         if not audio_url: | ||||
|             self.raise_login_required() | ||||
|         show = try_get(data, lambda x: x['shows'][0], dict) or {} | ||||
|         return self._extract_episode( | ||||
|             episode, audio_url, self._extract_show_info(show)) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         episode = self._parse_json( | ||||
|             js_to_json(self._search_regex( | ||||
|                 r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')), | ||||
|             display_id)['config']['episode'] | ||||
| class StitcherShowIE(StitcherBaseIE): | ||||
|     _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.stitcher.com/podcast/the-talking-machines', | ||||
|         'info_dict': { | ||||
|             'id': 'the-talking-machines', | ||||
|             'title': 'Talking Machines', | ||||
|             'description': 'md5:831f0995e40f26c10231af39cf1ebf0b', | ||||
|         }, | ||||
|         'playlist_mincount': 106, | ||||
|     }, { | ||||
|         'url': 'https://www.stitcher.com/show/the-talking-machines', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|         title = unescapeHTML(episode['title']) | ||||
|         formats = [{ | ||||
|             'url': episode[episode_key], | ||||
|             'ext': determine_ext(episode[episode_key]) or 'mp3', | ||||
|             'vcodec': 'none', | ||||
|         } for episode_key in ('episodeURL',) if episode.get(episode_key)] | ||||
|         description = self._search_regex( | ||||
|             r'Episode Info:\s*</span>([^<]+)<', webpage, 'description', fatal=False) | ||||
|         duration = int_or_none(episode.get('duration')) | ||||
|         thumbnail = episode.get('episodeImage') | ||||
|     def _real_extract(self, url): | ||||
|         show_slug = self._match_id(url) | ||||
|         data = self._call_api( | ||||
|             'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000}) | ||||
|         show = try_get(data, lambda x: x['shows'][0], dict) or {} | ||||
|         show_info = self._extract_show_info(show) | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         entries = [] | ||||
|         for episode in (data.get('episodes') or []): | ||||
|             audio_url = self._extract_audio_url(episode) | ||||
|             if not audio_url: | ||||
|                 continue | ||||
|             entries.append(self._extract_episode(episode, audio_url, show_info)) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, show_slug, show.get('title'), | ||||
|             self._extract_description(show)) | ||||
|   | ||||
| @@ -2,25 +2,40 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import unified_strdate | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     str_or_none, | ||||
|     strip_or_none, | ||||
|     try_get, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class StreetVoiceIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://streetvoice.com/skippylu/songs/94440/', | ||||
|         'md5': '15974627fc01a29e492c98593c2fd472', | ||||
|         'url': 'https://streetvoice.com/skippylu/songs/123688/', | ||||
|         'md5': '0eb535970629a5195685355f3ed60bfd', | ||||
|         'info_dict': { | ||||
|             'id': '94440', | ||||
|             'id': '123688', | ||||
|             'ext': 'mp3', | ||||
|             'title': '輸', | ||||
|             'description': 'Crispy脆樂團 - 輸', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 260, | ||||
|             'upload_date': '20091018', | ||||
|             'title': '流浪', | ||||
|             'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6', | ||||
|             'thumbnail': r're:^https?://.*\.jpg', | ||||
|             'duration': 270, | ||||
|             'upload_date': '20100923', | ||||
|             'uploader': 'Crispy脆樂團', | ||||
|             'uploader_id': '627810', | ||||
|             'uploader_url': 're:^https?://streetvoice.com/skippylu/', | ||||
|             'timestamp': 1285261661, | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
|             'track': '流浪', | ||||
|             'track_id': '123688', | ||||
|             'album': '2010', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://tw.streetvoice.com/skippylu/songs/94440/', | ||||
| @@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         song_id = self._match_id(url) | ||||
|  | ||||
|         song = self._download_json( | ||||
|             'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'') | ||||
|  | ||||
|         base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id | ||||
|         song = self._download_json(base_url, song_id, query={ | ||||
|             'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username', | ||||
|         }) | ||||
|         title = song['name'] | ||||
|         author = song['user']['nickname'] | ||||
|  | ||||
|         formats = [] | ||||
|         for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]: | ||||
|             f_url = (self._download_json( | ||||
|                 base_url + suffix + '/', song_id, | ||||
|                 'Downloading %s format URL' % format_id, | ||||
|                 data=b'', fatal=False) or {}).get('file') | ||||
|             if not f_url: | ||||
|                 continue | ||||
|             f = { | ||||
|                 'ext': 'mp3', | ||||
|                 'format_id': format_id, | ||||
|                 'url': f_url, | ||||
|                 'vcodec': 'none', | ||||
|             } | ||||
|             if format_id == 'hls': | ||||
|                 f['protocol'] = 'm3u8_native' | ||||
|             abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None) | ||||
|             if abr: | ||||
|                 abr = int(abr) | ||||
|                 f.update({ | ||||
|                     'abr': abr, | ||||
|                     'tbr': abr, | ||||
|                 }) | ||||
|             formats.append(f) | ||||
|  | ||||
|         user = song.get('user') or {} | ||||
|         username = user.get('username') | ||||
|         get_count = lambda x: int_or_none(song.get(x + '_count')) | ||||
|  | ||||
|         return { | ||||
|             'id': song_id, | ||||
|             'url': song['file'], | ||||
|             'formats': formats, | ||||
|             'title': title, | ||||
|             'description': '%s - %s' % (author, title), | ||||
|             'thumbnail': self._proto_relative_url(song.get('image'), 'http:'), | ||||
|             'duration': song.get('length'), | ||||
|             'upload_date': unified_strdate(song.get('created_at')), | ||||
|             'uploader': author, | ||||
|             'uploader_id': compat_str(song['user']['id']), | ||||
|             'description': strip_or_none(song.get('synopsis')), | ||||
|             'thumbnail': song.get('image'), | ||||
|             'duration': int_or_none(song.get('length')), | ||||
|             'timestamp': parse_iso8601(song.get('created_at')), | ||||
|             'uploader': try_get(user, lambda x: x['profile']['nickname']), | ||||
|             'uploader_id': str_or_none(user.get('id')), | ||||
|             'uploader_url': urljoin(url, '/%s/' % username) if username else None, | ||||
|             'view_count': get_count('plays'), | ||||
|             'like_count': get_count('likes'), | ||||
|             'comment_count': get_count('comments'), | ||||
|             'repost_count': get_count('share'), | ||||
|             'track': title, | ||||
|             'track_id': song_id, | ||||
|             'album': try_get(song, lambda x: x['album']['name']), | ||||
|         } | ||||
|   | ||||
| @@ -8,13 +8,17 @@ from ..utils import ( | ||||
|     compat_str, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     smuggle_url, | ||||
|     str_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class STVPlayerIE(InfoExtractor): | ||||
|     IE_NAME = 'stv:player' | ||||
|     _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         # shortform | ||||
|         'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/', | ||||
|         'md5': '5adf9439c31d554f8be0707c7abe7e0a', | ||||
|         'info_dict': { | ||||
| @@ -27,7 +31,11 @@ class STVPlayerIE(InfoExtractor): | ||||
|             'uploader_id': '1486976045', | ||||
|         }, | ||||
|         'skip': 'this resource is unavailable outside of the UK', | ||||
|     } | ||||
|     }, { | ||||
|         # episodes | ||||
|         'url': 'https://player.stv.tv/episode/4125/jennifer-saunders-memory-lane', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s' | ||||
|     _PTYPE_MAP = { | ||||
|         'episode': 'episodes', | ||||
| @@ -36,11 +44,31 @@ class STVPlayerIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         ptype, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         resp = self._download_json( | ||||
|             'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id), | ||||
|             video_id) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id, fatal=False) or '' | ||||
|         props = (self._parse_json(self._search_regex( | ||||
|             r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>', | ||||
|             webpage, 'next data', default='{}'), video_id, | ||||
|             fatal=False) or {}).get('props') or {} | ||||
|         player_api_cache = try_get( | ||||
|             props, lambda x: x['initialReduxState']['playerApiCache']) or {} | ||||
|  | ||||
|         api_path, resp = None, {} | ||||
|         for k, v in player_api_cache.items(): | ||||
|             if k.startswith('/episodes/') or k.startswith('/shortform/'): | ||||
|                 api_path, resp = k, v | ||||
|                 break | ||||
|         else: | ||||
|             episode_id = str_or_none(try_get( | ||||
|                 props, lambda x: x['pageProps']['episodeId'])) | ||||
|             api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id) | ||||
|  | ||||
|         result = resp.get('results') | ||||
|         if not result: | ||||
|             resp = self._download_json( | ||||
|                 'https://player.api.stv.tv/v1' + api_path, video_id) | ||||
|             result = resp['results'] | ||||
|  | ||||
|         video = result['video'] | ||||
|         video_id = compat_str(video['id']) | ||||
|  | ||||
| @@ -57,7 +85,7 @@ class STVPlayerIE(InfoExtractor): | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id, | ||||
|             'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['GB']}), | ||||
|             'description': result.get('summary'), | ||||
|             'duration': float_or_none(video.get('length'), 1000), | ||||
|             'subtitles': subtitles, | ||||
|   | ||||
| @@ -1,43 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
|  | ||||
|  | ||||
| class TastyTradeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017', | ||||
|         'info_dict': { | ||||
|             'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'A History of Teaming', | ||||
|             'description': 'md5:2a9033db8da81f2edffa4c99888140b3', | ||||
|             'duration': 422.255, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         ooyala_code = self._search_regex( | ||||
|             r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1', | ||||
|             webpage, 'ooyala code', group='code') | ||||
|  | ||||
|         info = self._search_json_ld(webpage, display_id, fatal=False) | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': OoyalaIE.ie_key(), | ||||
|             'url': 'ooyala:%s' % ooyala_code, | ||||
|             'display_id': display_id, | ||||
|         }) | ||||
|         return info | ||||
| @@ -140,7 +140,7 @@ class TeachableIE(TeachableBaseIE): | ||||
|     @staticmethod | ||||
|     def _is_teachable(webpage): | ||||
|         return 'teachableTracker.linker:autoLink' in webpage and re.search( | ||||
|             r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com', | ||||
|             r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com', | ||||
|             webpage) | ||||
|  | ||||
|     @staticmethod | ||||
|   | ||||
| @@ -5,14 +5,11 @@ import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     str_or_none, | ||||
|     try_get, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -28,7 +25,7 @@ class TelecincoIE(InfoExtractor): | ||||
|             'description': 'md5:716caf5601e25c3c5ab6605b1ae71529', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': 'adb28c37238b675dad0f042292f209a7', | ||||
|             'md5': '7ee56d665cfd241c0e6d80fd175068b0', | ||||
|             'info_dict': { | ||||
|                 'id': 'JEA5ijCnF6p5W08A1rNKn7', | ||||
|                 'ext': 'mp4', | ||||
| @@ -38,7 +35,7 @@ class TelecincoIE(InfoExtractor): | ||||
|         }] | ||||
|     }, { | ||||
|         'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', | ||||
|         'md5': '9468140ebc300fbb8b9d65dc6e5c4b43', | ||||
|         'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a', | ||||
|         'info_dict': { | ||||
|             'id': 'jn24Od1zGLG4XUZcnUnZB6', | ||||
|             'ext': 'mp4', | ||||
| @@ -48,7 +45,7 @@ class TelecincoIE(InfoExtractor): | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', | ||||
|         'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6', | ||||
|         'md5': 'eddb50291df704ce23c74821b995bcac', | ||||
|         'info_dict': { | ||||
|             'id': 'aywerkD2Sv1vGNqq9b85Q2', | ||||
|             'ext': 'mp4', | ||||
| @@ -90,58 +87,24 @@ class TelecincoIE(InfoExtractor): | ||||
|  | ||||
|     def _parse_content(self, content, url): | ||||
|         video_id = content['dataMediaId'] | ||||
|         if content.get('dataCmsId') == 'ooyala': | ||||
|             return self.url_result( | ||||
|                 'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id) | ||||
|         config_url = urljoin(url, content['dataConfig']) | ||||
|         config = self._download_json( | ||||
|             config_url, video_id, 'Downloading config JSON') | ||||
|             content['dataConfig'], video_id, 'Downloading config JSON') | ||||
|         title = config['info']['title'] | ||||
|  | ||||
|         def mmc_url(mmc_type): | ||||
|             return re.sub( | ||||
|                 r'/(?:flash|html5)\.json', '/%s.json' % mmc_type, | ||||
|                 config['services']['mmc']) | ||||
|  | ||||
|         duration = None | ||||
|         formats = [] | ||||
|         for mmc_type in ('flash', 'html5'): | ||||
|             mmc = self._download_json( | ||||
|                 mmc_url(mmc_type), video_id, | ||||
|                 'Downloading %s mmc JSON' % mmc_type, fatal=False) | ||||
|             if not mmc: | ||||
|                 continue | ||||
|             if not duration: | ||||
|                 duration = int_or_none(mmc.get('duration')) | ||||
|             for location in mmc['locations']: | ||||
|                 gat = self._proto_relative_url(location.get('gat'), 'http:') | ||||
|                 gcp = location.get('gcp') | ||||
|                 ogn = location.get('ogn') | ||||
|                 if None in (gat, gcp, ogn): | ||||
|                     continue | ||||
|                 token_data = { | ||||
|                     'gcp': gcp, | ||||
|                     'ogn': ogn, | ||||
|                     'sta': 0, | ||||
|                 } | ||||
|                 media = self._download_json( | ||||
|                     gat, video_id, data=json.dumps(token_data).encode('utf-8'), | ||||
|                     headers={ | ||||
|                         'Content-Type': 'application/json;charset=utf-8', | ||||
|                         'Referer': url, | ||||
|                     }, fatal=False) or {} | ||||
|                 stream = media.get('stream') or media.get('file') | ||||
|                 if not stream: | ||||
|                     continue | ||||
|                 ext = determine_ext(stream) | ||||
|                 if ext == 'f4m': | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', | ||||
|                         video_id, f4m_id='hds', fatal=False)) | ||||
|                 elif ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         stream, video_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls', fatal=False)) | ||||
|         services = config['services'] | ||||
|         caronte = self._download_json(services['caronte'], video_id) | ||||
|         stream = caronte['dls'][0]['stream'] | ||||
|         headers = self.geo_verification_headers() | ||||
|         headers.update({ | ||||
|             'Content-Type': 'application/json;charset=UTF-8', | ||||
|             'Origin': re.match(r'https?://[^/]+', url).group(0), | ||||
|         }) | ||||
|         cdn = self._download_json( | ||||
|             caronte['cerbero'], video_id, data=json.dumps({ | ||||
|                 'bbx': caronte['bbx'], | ||||
|                 'gbx': self._download_json(services['gbx'], video_id)['gbx'], | ||||
|             }).encode(), headers=headers)['tokens']['1']['cdn'] | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
| @@ -149,7 +112,7 @@ class TelecincoIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'), | ||||
|             'duration': duration, | ||||
|             'duration': int_or_none(content.get('dataDuration')), | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -12,25 +12,16 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class TeleQuebecBaseIE(InfoExtractor): | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' | ||||
|  | ||||
|     @staticmethod | ||||
|     def _result(url, ie_key): | ||||
|     def _brightcove_result(brightcove_id, player_id, account_id='6150020952001'): | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': smuggle_url(url, {'geo_countries': ['CA']}), | ||||
|             'ie_key': ie_key, | ||||
|             'url': smuggle_url(TeleQuebecBaseIE.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, brightcove_id), {'geo_countries': ['CA']}), | ||||
|             'ie_key': 'BrightcoveNew', | ||||
|         } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _limelight_result(media_id): | ||||
|         return TeleQuebecBaseIE._result( | ||||
|             'limelight:media:' + media_id, 'LimelightMedia') | ||||
|  | ||||
|     @staticmethod | ||||
|     def _brightcove_result(brightcove_id): | ||||
|         return TeleQuebecBaseIE._result( | ||||
|             'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s' | ||||
|             % brightcove_id, 'BrightcoveNew') | ||||
|  | ||||
|  | ||||
| class TeleQuebecIE(TeleQuebecBaseIE): | ||||
|     _VALID_URL = r'''(?x) | ||||
| @@ -44,14 +35,18 @@ class TeleQuebecIE(TeleQuebecBaseIE): | ||||
|         # available till 01.01.2023 | ||||
|         'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane', | ||||
|         'info_dict': { | ||||
|             'id': '577116881b4b439084e6b1cf4ef8b1b3', | ||||
|             'id': '6155972771001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Un petit choc et puis repart!', | ||||
|             'description': 'md5:067bc84bd6afecad85e69d1000730907', | ||||
|             'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374', | ||||
|             'timestamp': 1589262469, | ||||
|             'uploader_id': '6150020952001', | ||||
|             'upload_date': '20200512', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|             'format': 'bestvideo', | ||||
|         }, | ||||
|         'add_ie': ['BrightcoveNew'], | ||||
|     }, { | ||||
|         'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout', | ||||
|         'info_dict': { | ||||
| @@ -65,7 +60,6 @@ class TeleQuebecIE(TeleQuebecBaseIE): | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'bestvideo', | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['BrightcoveNew'], | ||||
|     }, { | ||||
| @@ -79,25 +73,20 @@ class TeleQuebecIE(TeleQuebecBaseIE): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         media_id = self._match_id(url) | ||||
|  | ||||
|         media_data = self._download_json( | ||||
|             'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id, | ||||
|         media = self._download_json( | ||||
|             'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id, | ||||
|             media_id)['media'] | ||||
|  | ||||
|         source_id = media_data['streamInfo']['sourceId'] | ||||
|         source = (try_get( | ||||
|             media_data, lambda x: x['streamInfo']['source'], | ||||
|             compat_str) or 'limelight').lower() | ||||
|         if source == 'brightcove': | ||||
|             info = self._brightcove_result(source_id) | ||||
|         else: | ||||
|             info = self._limelight_result(source_id) | ||||
|         source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove') | ||||
|         info = self._brightcove_result(source_id, '22gPKdt7f') | ||||
|         product = media.get('product') or {} | ||||
|         season = product.get('season') or {} | ||||
|         info.update({ | ||||
|             'title': media_data.get('title'), | ||||
|             'description': try_get( | ||||
|                 media_data, lambda x: x['descriptions'][0]['text'], compat_str), | ||||
|             'duration': int_or_none( | ||||
|                 media_data.get('durationInMilliseconds'), 1000), | ||||
|             'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str), | ||||
|             'series': try_get(season, lambda x: x['serie']['titre']), | ||||
|             'season': season.get('name'), | ||||
|             'season_number': int_or_none(season.get('seasonNo')), | ||||
|             'episode': product.get('titre'), | ||||
|             'episode_number': int_or_none(product.get('episodeNo')), | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
| @@ -148,7 +137,7 @@ class TeleQuebecSquatIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class TeleQuebecEmissionIE(TeleQuebecBaseIE): | ||||
| class TeleQuebecEmissionIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
| @@ -160,15 +149,16 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE): | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente', | ||||
|         'info_dict': { | ||||
|             'id': '66648a6aef914fe3badda25e81a4d50a', | ||||
|             'id': '6154476028001', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?", | ||||
|             'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014', | ||||
|             'upload_date': '20171024', | ||||
|             'timestamp': 1508862118, | ||||
|             'title': 'Des soins esthétiques à 377 % d’intérêts annuels, ça vous tente?', | ||||
|             'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f', | ||||
|             'upload_date': '20200505', | ||||
|             'timestamp': 1588713424, | ||||
|             'uploader_id': '6150020952001', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|             'format': 'bestvideo', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression', | ||||
| @@ -187,26 +177,26 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE): | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         media_id = self._search_regex( | ||||
|             r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage, | ||||
|             'limelight id') | ||||
|             r'mediaId\s*:\s*(?P<id>\d+)', webpage, 'media id') | ||||
|  | ||||
|         info = self._limelight_result(media_id) | ||||
|         info.update({ | ||||
|             'title': self._og_search_title(webpage, default=None), | ||||
|             'description': self._og_search_description(webpage, default=None), | ||||
|         }) | ||||
|         return info | ||||
|         return self.url_result( | ||||
|             'http://zonevideo.telequebec.tv/media/' + media_id, | ||||
|             TeleQuebecIE.ie_key()) | ||||
|  | ||||
|  | ||||
| class TeleQuebecLiveIE(InfoExtractor): | ||||
| class TeleQuebecLiveIE(TeleQuebecBaseIE): | ||||
|     _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)' | ||||
|     _TEST = { | ||||
|         'url': 'http://zonevideo.telequebec.tv/endirect/', | ||||
|         'info_dict': { | ||||
|             'id': 'endirect', | ||||
|             'id': '6159095684001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|             'description': 'Canal principal de Télé-Québec', | ||||
|             'uploader_id': '6150020952001', | ||||
|             'timestamp': 1590439901, | ||||
|             'upload_date': '20200525', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -214,25 +204,49 @@ class TeleQuebecLiveIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self._brightcove_result('6159095684001', 'skCsmi2Uw') | ||||
|  | ||||
|         m3u8_url = None | ||||
|         webpage = self._download_webpage( | ||||
|             'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id, | ||||
|             fatal=False) | ||||
|         if webpage: | ||||
|             m3u8_url = self._search_regex( | ||||
|                 r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, | ||||
|                 'm3u8 url', default=None, group='url') | ||||
|         if not m3u8_url: | ||||
|             m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8' | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             m3u8_url, video_id, 'mp4', m3u8_id='hls') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._live_title('Télé-Québec - En direct'), | ||||
|             'is_live': True, | ||||
|             'formats': formats, | ||||
|         } | ||||
| class TeleQuebecVideoIE(TeleQuebecBaseIE): | ||||
|     _VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://video.telequebec.tv/player/31110/stream', | ||||
|         'info_dict': { | ||||
|             'id': '6202570652001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée', | ||||
|             'description': 'md5:685a7e4c450ba777c60adb6e71e41526', | ||||
|             'upload_date': '20201019', | ||||
|             'timestamp': 1603115930, | ||||
|             'uploader_id': '6101674910001', | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'bestvideo', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://video.telequebec.tv/player-live/28527', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _call_api(self, path, video_id): | ||||
|         return self._download_json( | ||||
|             'http://beacon.playback.api.brightcove.com/telequebec/api/assets/' + path, | ||||
|             video_id, query={'device_layout': 'web', 'device_type': 'web'})['data'] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         asset_id = self._match_id(url) | ||||
|         asset = self._call_api(asset_id, asset_id)['asset'] | ||||
|         stream = self._call_api( | ||||
|             asset_id + '/streams/' + asset['streams'][0]['id'], asset_id)['stream'] | ||||
|         stream_url = stream['url'] | ||||
|         account_id = try_get( | ||||
|             stream, lambda x: x['video_provider_details']['account_id']) or '6101674910001' | ||||
|         info = self._brightcove_result(stream_url, 'default', account_id) | ||||
|         info.update({ | ||||
|             'description': asset.get('long_description') or asset.get('short_description'), | ||||
|             'series': asset.get('series_original_name'), | ||||
|             'season_number': int_or_none(asset.get('season_number')), | ||||
|             'episode': asset.get('original_name'), | ||||
|             'episode_number': int_or_none(asset.get('episode_number')), | ||||
|         }) | ||||
|         return info | ||||
|   | ||||
| @@ -3,9 +3,10 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     HEADRequest, | ||||
|     parse_age_limit, | ||||
|     parse_iso8601, | ||||
|     smuggle_url, | ||||
|     # smuggle_url, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -24,14 +25,16 @@ class TenPlayIE(InfoExtractor): | ||||
|             'uploader_id': '2199827728001', | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'bestvideo', | ||||
|             # 'format': 'bestvideo', | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s' | ||||
|     # BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s' | ||||
|     _GEO_BYPASS = False | ||||
|     _FASTLY_URL_TEMPL = 'https://10-selector.global.ssl.fastly.net/s/kYEXFC/media/%s?mbr=true&manifest=m3u&format=redirect' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         content_id = self._match_id(url) | ||||
| @@ -40,19 +43,28 @@ class TenPlayIE(InfoExtractor): | ||||
|         video = data.get('video') or {} | ||||
|         metadata = data.get('metaData') or {} | ||||
|         brightcove_id = video.get('videoId') or metadata['showContentVideoId'] | ||||
|         brightcove_url = smuggle_url( | ||||
|             self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, | ||||
|             {'geo_countries': ['AU']}) | ||||
|         # brightcove_url = smuggle_url( | ||||
|         #     self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, | ||||
|         #     {'geo_countries': ['AU']}) | ||||
|         m3u8_url = self._request_webpage(HEADRequest( | ||||
|             self._FASTLY_URL_TEMPL % brightcove_id), brightcove_id).geturl() | ||||
|         if '10play-not-in-oz' in m3u8_url: | ||||
|             self.raise_geo_restricted(countries=['AU']) | ||||
|         formats = self._extract_m3u8_formats(m3u8_url, brightcove_id, 'mp4') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': brightcove_url, | ||||
|             'id': content_id, | ||||
|             'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'), | ||||
|             # '_type': 'url_transparent', | ||||
|             # 'url': brightcove_url, | ||||
|             'formats': formats, | ||||
|             'id': brightcove_id, | ||||
|             'title': video.get('title') or metadata.get('pageContentName') or metadata['showContentName'], | ||||
|             'description': video.get('description'), | ||||
|             'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')), | ||||
|             'series': metadata.get('showName'), | ||||
|             'season': metadata.get('showContentSeason'), | ||||
|             'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')), | ||||
|             'ie_key': 'BrightcoveNew', | ||||
|             'thumbnail': video.get('poster'), | ||||
|             'uploader_id': '2199827728001', | ||||
|             # 'ie_key': 'BrightcoveNew', | ||||
|         } | ||||
|   | ||||
| @@ -234,6 +234,9 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': smuggled_data.get('geo_countries'), | ||||
|         }) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         provider_id = mobj.group('provider_id') | ||||
|   | ||||
| @@ -1,18 +1,22 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TheWeatherChannelIE(ThePlatformIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock', | ||||
|         'md5': 'ab924ac9574e79689c24c6b95e957def', | ||||
|         'md5': 'c4cbe74c9c17c5676b704b950b73dd92', | ||||
|         'info_dict': { | ||||
|             'id': 'cc82397e-cc3f-4d11-9390-a785add090e8', | ||||
|             'ext': 'mp4', | ||||
| @@ -20,18 +24,33 @@ class TheWeatherChannelIE(ThePlatformIE): | ||||
|             'description': 'md5:55606ce1378d4c72e6545e160c9d9695', | ||||
|             'uploader': 'TWC - Digital (No Distro)', | ||||
|             'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c', | ||||
|             'upload_date': '20160720', | ||||
|             'timestamp': 1469018835, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         drupal_settings = self._parse_json(self._search_regex( | ||||
|             r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', | ||||
|             webpage, 'drupal settings'), display_id) | ||||
|         video_id = drupal_settings['twc']['contexts']['node']['uuid'] | ||||
|         video_data = self._download_json( | ||||
|             'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id) | ||||
|         asset_name, locale, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         if not locale: | ||||
|             locale = 'en-US' | ||||
|         video_data = list(self._download_json( | ||||
|             'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{ | ||||
|                 'name': 'getCMSAssetsUrlConfig', | ||||
|                 'params': { | ||||
|                     'language': locale.replace('-', '_'), | ||||
|                     'query': { | ||||
|                         'assetName': { | ||||
|                             '$in': asset_name, | ||||
|                         }, | ||||
|                     }, | ||||
|                 } | ||||
|             }]).encode(), headers={ | ||||
|                 'Content-Type': 'application/json', | ||||
|             })['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0] | ||||
|         video_id = video_data['id'] | ||||
|         seo_meta = video_data.get('seometa', {}) | ||||
|         title = video_data.get('title') or seo_meta['title'] | ||||
|  | ||||
| @@ -66,6 +85,8 @@ class TheWeatherChannelIE(ThePlatformIE): | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         cc_url = video_data.get('cc_url') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
| @@ -74,6 +95,8 @@ class TheWeatherChannelIE(ThePlatformIE): | ||||
|             'duration': parse_duration(video_data.get('duration')), | ||||
|             'uploader': video_data.get('providername'), | ||||
|             'uploader_id': video_data.get('providerid'), | ||||
|             'timestamp': parse_iso8601(video_data.get('publishdate')), | ||||
|             'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
| @@ -3,10 +3,13 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -15,29 +18,35 @@ class ThreeQSDNIE(InfoExtractor): | ||||
|     IE_DESC = '3Q SDN' | ||||
|     _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' | ||||
|     _TESTS = [{ | ||||
|         # ondemand from http://www.philharmonie.tv/veranstaltung/26/ | ||||
|         'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http', | ||||
|         'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd', | ||||
|         # https://player.3qsdn.com/demo.html | ||||
|         'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be', | ||||
|         'md5': '64a57396b16fa011b15e0ea60edce918', | ||||
|         'info_dict': { | ||||
|             'id': '0280d6b9-1215-11e6-b427-0cc47a188158', | ||||
|             'id': '7201c779-6b3c-11e7-a40e-002590c750be', | ||||
|             'ext': 'mp4', | ||||
|             'title': '0280d6b9-1215-11e6-b427-0cc47a188158', | ||||
|             'title': 'Video Ads', | ||||
|             'is_live': False, | ||||
|             'description': 'Video Ads Demo', | ||||
|             'timestamp': 1500334803, | ||||
|             'upload_date': '20170717', | ||||
|             'duration': 888.032, | ||||
|             'subtitles': { | ||||
|                 'eng': 'count:1', | ||||
|             }, | ||||
|         'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'], | ||||
|         }, | ||||
|         'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], | ||||
|     }, { | ||||
|         # live video stream | ||||
|         'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', | ||||
|         'url': 'https://playout.3qsdn.com/66e68995-11ca-11e8-9273-002590c750be', | ||||
|         'info_dict': { | ||||
|             'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f', | ||||
|             'id': '66e68995-11ca-11e8-9273-002590c750be', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'title': 're:^66e68995-11ca-11e8-9273-002590c750be [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # m3u8 downloads | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download MPD manifest'], | ||||
|     }, { | ||||
|         # live audio stream | ||||
|         'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', | ||||
| @@ -58,6 +67,14 @@ class ThreeQSDNIE(InfoExtractor): | ||||
|         # live video with rtmp link | ||||
|         'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # ondemand from http://www.philharmonie.tv/veranstaltung/26/ | ||||
|         'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # live video stream | ||||
|         'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -70,73 +87,78 @@ class ThreeQSDNIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         js = self._download_webpage( | ||||
|             'http://playout.3qsdn.com/%s' % video_id, video_id, | ||||
|             query={'js': 'true'}) | ||||
|  | ||||
|         if any(p in js for p in ( | ||||
|                 '>This content is not available in your country', | ||||
|                 'playout.3qsdn.com/forbidden')): | ||||
|         try: | ||||
|             config = self._download_json( | ||||
|                 url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | ||||
|                 self.raise_geo_restricted() | ||||
|             raise | ||||
|  | ||||
|         stream_content = self._search_regex( | ||||
|             r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js, | ||||
|             'stream content', default='demand', group='content') | ||||
|  | ||||
|         live = stream_content == 'live' | ||||
|  | ||||
|         stream_type = self._search_regex( | ||||
|             r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js, | ||||
|             'stream type', default='video', group='type') | ||||
|         live = config.get('streamContent') == 'live' | ||||
|         aspect = float_or_none(config.get('aspect')) | ||||
|  | ||||
|         formats = [] | ||||
|         urls = set() | ||||
|  | ||||
|         def extract_formats(item_url, item={}): | ||||
|             if not item_url or item_url in urls: | ||||
|                 return | ||||
|             urls.add(item_url) | ||||
|             ext = mimetype2ext(item.get('type')) or determine_ext(item_url, default_ext=None) | ||||
|             if ext == 'mpd': | ||||
|         for source_type, source in (config.get('sources') or {}).items(): | ||||
|             if not source: | ||||
|                 continue | ||||
|             if source_type == 'dash': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     item_url, video_id, mpd_id='mpd', fatal=False)) | ||||
|             elif ext == 'm3u8': | ||||
|                     source, video_id, mpd_id='mpd', fatal=False)) | ||||
|             elif source_type == 'hls': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     item_url, video_id, 'mp4', | ||||
|                     entry_protocol='m3u8' if live else 'm3u8_native', | ||||
|                     source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             elif ext == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     item_url, video_id, f4m_id='hds', fatal=False)) | ||||
|             else: | ||||
|                 if not self._is_valid_url(item_url, video_id): | ||||
|                     return | ||||
|             elif source_type == 'progressive': | ||||
|                 for s in source: | ||||
|                     src = s.get('src') | ||||
|                     if not (src and self._is_valid_url(src, video_id)): | ||||
|                         continue | ||||
|                     width = None | ||||
|                     format_id = ['http'] | ||||
|                     ext = determine_ext(src) | ||||
|                     if ext: | ||||
|                         format_id.append(ext) | ||||
|                     height = int_or_none(s.get('height')) | ||||
|                     if height: | ||||
|                         format_id.append('%dp' % height) | ||||
|                         if aspect: | ||||
|                             width = int(height * aspect) | ||||
|                     formats.append({ | ||||
|                     'url': item_url, | ||||
|                     'format_id': item.get('quality'), | ||||
|                     'ext': 'mp4' if item_url.startswith('rtsp') else ext, | ||||
|                     'vcodec': 'none' if stream_type == 'audio' else None, | ||||
|                         'ext': ext, | ||||
|                         'format_id': '-'.join(format_id), | ||||
|                         'height': height, | ||||
|                         'source_preference': 0, | ||||
|                         'url': src, | ||||
|                         'vcodec': 'none' if height == 0 else None, | ||||
|                         'width': width, | ||||
|                     }) | ||||
|         for f in formats: | ||||
|             if f.get('acodec') == 'none': | ||||
|                 f['preference'] = -40 | ||||
|             elif f.get('vcodec') == 'none': | ||||
|                 f['preference'] = -50 | ||||
|         self._sort_formats(formats, ('preference', 'width', 'height', 'source_preference', 'tbr', 'vbr', 'abr', 'ext', 'format_id')) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for subtitle in (config.get('subtitles') or []): | ||||
|             src = subtitle.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             subtitles.setdefault(subtitle.get('label') or 'eng', []).append({ | ||||
|                 'url': src, | ||||
|             }) | ||||
|  | ||||
|         for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js): | ||||
|             f = self._parse_json( | ||||
|                 item_js, video_id, transform_source=js_to_json, fatal=False) | ||||
|             if not f: | ||||
|                 continue | ||||
|             extract_formats(f.get('src'), f) | ||||
|  | ||||
|         # More relaxed version to collect additional URLs and acting | ||||
|         # as a future-proof fallback | ||||
|         for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js): | ||||
|             extract_formats(src) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._live_title(video_id) if live else video_id | ||||
|         title = config.get('title') or video_id | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'title': self._live_title(title) if live else title, | ||||
|             'thumbnail': config.get('poster') or None, | ||||
|             'description': config.get('description') or None, | ||||
|             'timestamp': parse_iso8601(config.get('upload_date')), | ||||
|             'duration': float_or_none(config.get('vlength')) or None, | ||||
|             'is_live': live, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user