diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 2e01aff48..643dffe0a 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -1,8 +1,13 @@ from __future__ import unicode_literals +import os import re from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_unquote +) from ..utils import ( determine_ext, ExtractorError, @@ -23,6 +28,7 @@ from .ustream import UstreamIE class CSpanIE(InfoExtractor): + _BASIC_URL = r'https?://(?:www\.)?c-span\.org/' _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P[0-9a-f]+)' IE_DESC = 'C-SPAN' _TESTS = [{ @@ -81,6 +87,29 @@ class CSpanIE(InfoExtractor): }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' + @classmethod + def is_basic_url(cls, url): + if '_BASIC_URL_RE' not in cls.__dict__: + cls._BASIC_URL_RE = re.compile(cls._BASIC_URL) + + return cls._BASIC_URL_RE.match(url) is not None + + @classmethod + def get_basic_url(cls, url): + return "/".join(url.rstrip('/').split('/')[:3]) + + @classmethod + def _get_id(cls, url): + if '_ID_URL_RE' not in cls.__dict__: + cls._ID_URL_RE = re.compile(cls._ID_URL) + + if cls._ID_URL_RE.match(url) is not None: + m = cls._ID_URL_RE.match(url) + assert m + return compat_str(m.group('id')) + else: + return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + def _real_extract(self, url): video_id = self._match_id(url) video_type = None diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b01900afa..c0c59276e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -50,6 +50,7 @@ from .nexx import ( NexxIE, NexxEmbedIE, ) +from .cspan import CSpanIE from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE @@ -3766,6 +3767,16 @@ class GenericIE(InfoExtractor): if entry_info_dict.get('formats'): self._sort_formats(entry_info_dict['formats']) + if CSpanIE.is_basic_url(url): + basic_url = CSpanIE.get_basic_url(url) + for f in entry_info_dict['formats']: + f.setdefault('http_headers', {})['referer'] = basic_url + "/" + f.setdefault('http_headers', {})['origin'] = basic_url + f.setdefault('http_headers', {})['accept'] = "*/*" + f.setdefault('http_headers', {})['sec-fetch-dest'] = "empty" + f.setdefault('http_headers', {})['sec-fetch-mode'] = "cors" + f.setdefault('http_headers', {})['sec-fetch-site'] = "cross-site" + entries.append(entry_info_dict) if len(entries) == 1: