Merge f9d0a8cdcdf5c2edee13468125486c30f41f6af8 into 3eb8d22ddb8982ca4fb56bb7a8d6517538bf14c6

[JSInterp] Temporary fix for #33102
[Misc] Correct [_]IE_DESC/NAME in a few IEs
2025-07-18 17:34:13 +09:00 · 2025-04-01 11:19:47 +02:00 · 2025-03-31 04:21:09 +01:00 · 2025-03-26 12:47:19 +00:00 · 2025-03-26 12:17:49 +00:00 · 2021-07-05 12:04:54 -04:00
7 changed files with 53 additions and 15 deletions
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dl/extractor/adobepass.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import json
 import re
 import time
 import xml.etree.ElementTree as etree
@ -60,6 +61,11 @@ MSO_INFO = {
        'username_field': 'IDToken1',
        'password_field': 'IDToken2',
    },
+    'Spectrum': {
+        'name': 'Spectrum',
+        'username_field': 'IDToken1',
+        'password_field': 'IDToken2',
+    },
    'Verizon': {
        'name': 'Verizon FiOS',
        'username_field': 'IDToken1',
@ -1496,6 +1502,41 @@ class AdobePassIE(InfoExtractor):
                        }), headers={
                            'Content-Type': 'application/x-www-form-urlencoded'
                        })
+                elif mso_id == 'Spectrum':
+                    # Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
+                    # as a one-off implementation.
+                    provider_redirect_page, urlh = provider_redirect_page_res
+                    provider_login_page_res = post_form(
+                        provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
+                    saml_login_page, urlh = provider_login_page_res
+                    relay_state = self._search_regex(
+                        r'RelayState\s*=\s*"(?P<relay>.+?)";',
+                        saml_login_page, 'RelayState', group='relay')
+                    saml_request = self._search_regex(
+                        r'SAMLRequest\s*=\s*"(?P<saml_request>.+?)";',
+                        saml_login_page, 'SAMLRequest', group='saml_request')
+                    login_json = {
+                        mso_info['username_field']: username,
+                        mso_info['password_field']: password,
+                        'RelayState': relay_state,
+                        'SAMLRequest': saml_request,
+                    }
+                    saml_response_json = self._download_json(
+                        'https://tveauthn.spectrum.net/tveauthentication/api/v1/manualAuth', video_id,
+                        'Downloading SAML Response',
+                        data=json.dumps(login_json).encode(),
+                        headers={
+                            'Content-Type': 'application/json',
+                            'Accept': 'application/json',
+                        })
+                    self._download_webpage(
+                        saml_response_json['SAMLRedirectUri'], video_id,
+                        'Confirming Login', data=urlencode_postdata({
+                            'SAMLResponse': saml_response_json['SAMLResponse'],
+                            'RelayState': relay_state,
+                        }), headers={
+                            'Content-Type': 'application/x-www-form-urlencoded'
+                        })
                else:
                    # Some providers (e.g. DIRECTV NOW) have another meta refresh
                    # based redirect that should be followed.
--- a/youtube_dl/extractor/bokecc.py
+++ b/youtube_dl/extractor/bokecc.py
@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):


 class BokeCCIE(BokeCCBaseIE):
-    _IE_DESC = 'CC视频'
+    IE_DESC = 'CC视频'
    _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'

    _TESTS = [{
--- a/youtube_dl/extractor/cloudy.py
+++ b/youtube_dl/extractor/cloudy.py
@ -9,7 +9,7 @@ from ..utils import (


 class CloudyIE(InfoExtractor):
-    _IE_DESC = 'cloudy.ec'
+    IE_DESC = 'cloudy.ec'
    _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
    _TESTS = [{
        'url': 'https://www.cloudy.ec/v/af511e2527aac',
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -422,6 +422,8 @@ class InfoExtractor(object):
    _GEO_COUNTRIES = None
    _GEO_IP_BLOCKS = None
    _WORKING = True
+    # supply this in public subclasses: used in supported sites list, etc
+    # IE_DESC = 'short description of IE'

    def __init__(self, downloader=None):
        """Constructor. Receives an optional downloader."""
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@ -35,15 +35,6 @@ from ..utils import (

 class ITVBaseIE(InfoExtractor):

-    def _search_nextjs_data(self, webpage, video_id, **kw):
-        transform_source = kw.pop('transform_source', None)
-        fatal = kw.pop('fatal', True)
-        return self._parse_json(
-            self._search_regex(
-                r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
-                webpage, 'next.js data', group='js', fatal=fatal, **kw),
-            video_id, transform_source=transform_source, fatal=fatal)
-
    def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
        if errnote is False:
            return False
@ -109,7 +100,9 @@ class ITVBaseIE(InfoExtractor):

 class ITVIE(ITVBaseIE):
    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
-    _IE_DESC = 'ITVX'
+    IE_DESC = 'ITVX'
+    _WORKING = False
+
    _TESTS = [{
        'note': 'Hub URLs redirect to ITVX',
        'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@ -270,7 +263,7 @@ class ITVIE(ITVBaseIE):
                'ext': determine_ext(href, 'vtt'),
            })

-        next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
+        next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
        video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
        title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
        info = self._og_extract(webpage, require_title=not title)
@ -323,7 +316,7 @@ class ITVIE(ITVBaseIE):

 class ITVBTCCIE(ITVBaseIE):
    _VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
-    _IE_DESC = 'ITV articles: News, British Touring Car Championship'
+    IE_DESC = 'ITV articles: News, British Touring Car Championship'
    _TESTS = [{
        'note': 'British Touring Car Championship',
        'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
--- a/youtube_dl/extractor/senateisvp.py
+++ b/youtube_dl/extractor/senateisvp.py
@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
        ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
        ['arch', '', 'http://ussenate-f.akamaihd.net/']
    ]
-    _IE_NAME = 'senate.gov'
+    IE_NAME = 'senate.gov'
    _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
    _TESTS = [{
        'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -686,6 +686,8 @@ class JSInterpreter(object):
            raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)

    def _dump(self, obj, namespace):
+        if obj is JS_Undefined:
+            return 'undefined'
        try:
            return json.dumps(obj)
        except TypeError:
Author	SHA1	Message	Date
Kevin O'Connor	bd54176d5c	Merge f9d0a8cdcdf5c2edee13468125486c30f41f6af8 into 3eb8d22ddb8982ca4fb56bb7a8d6517538bf14c6	2025-04-01 11:19:47 +02:00
dirkf	3eb8d22ddb	[JSInterp] Temporary fix for #33102	2025-03-31 04:21:09 +01:00
dirkf	4e714f9df1	[Misc] Correct [_]IE_DESC/NAME in a few IEs * thx seproDev, yt-dlp/yt-dlp/pull/12694/commits/ae69e3c * also add documenting comment in `InfoExtractor`	2025-03-26 12:47:19 +00:00
dirkf	c1ea7f5a24	[ITV] Mark ITVX not working * update old shim * correct [_]IE_DESC	2025-03-26 12:17:49 +00:00
Kevin O'Connor	f9d0a8cdcd	[AdobePass] Add Spectrum as an Adobe Pass provider Some services no longer allow for the older services (ex. `Charter_Direct`) to be used and just return a generic 401 response. Going through their login flow manually, the new provider name appears to be `Spectrum`. This provider seems to work even place of services that still allow for `Charter_Direct` to be used. The Spectrum login form also needs special handling since it's dynamically loaded via JS thus we cannot simply capture the POST url and hidden form fields. Instead we need to hardcode the POST URL and recreate the request manually. Thankfully the SAMLRequest and RelayState are in the HTML response.	2021-07-05 12:04:54 -04:00