[noco] Remove Extractor(closes #10864 )

[nfl] fix extraction(closes #22245 )
[anvato] update ANVACK table and add experimental token generator for NFL
2025-04-06 15:17:26 +09:00 · 2020-12-15 19:07:14 +01:00 · 2020-12-15 16:06:10 +01:00 · 2020-12-15 16:01:33 +01:00
7 changed files with 226 additions and 413 deletions
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@ -9,6 +9,7 @@ import re
 import time
 from .common import InfoExtractor
 from .anvato_token_generator import NFLTokenGenerator
 from ..aes import aes_encrypt
 from ..compat import compat_str
 from ..utils import (
@ -116,7 +117,76 @@ class AnvatoIE(InfoExtractor):
        'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
        'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
        'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
-        'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
+        'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
        'X8POa4zPPaKVZHqmWjuEzfP31b1QM9VN': 'Dn5vOY9ooDw7VSl9qztjZI5o0g08mA0z',
        'M2v78QkBMpNJlSPp9diX5F2PBmBy6Bog': 'ka6K32kyo7nDZfNkjQCGWf1lpApXMd1B',
        'bvJ0dQpav07l0hG5JgfVLF2dv1vARwpP': 'BzoQW24GrJZoJfmNodiJKSPeB9B8NOxj',
        'lxQMLg2XZKuEZaWgsqubBxV9INZ6bryY': 'Vm2Mx6noKds9jB71h6urazwlTG3m9x8l',
        '04EnjvXeoSmkbJ9ckPs7oY0mcxv7PlyN': 'aXERQP9LMfQVlEDsgGs6eEA1SWznAQ8P',
        'mQbO2ge6BFRWVPYCYpU06YvNt80XLvAX': 'E2BV1NGmasN5v7eujECVPJgwflnLPm2A',
        'g43oeBzJrCml7o6fa5fRL1ErCdeD8z4K': 'RX34mZ6zVH4Nr6whbxIGLv9WSbxEKo8V',
        'VQrDJoP7mtdBzkxhXbSPwGB1coeElk4x': 'j2VejQx0VFKQepAF7dI0mJLKtOVJE18z',
        'WxA5NzLRjCrmq0NUgaU5pdMDuZO7RJ4w': 'lyY5ADLKaIOLEgAsGQCveEMAcqnx3rY9',
        'M4lpMXB71ie0PjMCjdFzVXq0SeRVqz49': 'n2zVkOqaLIv3GbLfBjcwW51LcveWOZ2e',
        'dyDZGEqN8u8nkJZcJns0oxYmtP7KbGAn': 'VXOEqQW9BtEVLajfZQSLEqxgS5B7qn2D',
        'E7QNjrVY5u5mGvgu67IoDgV1CjEND8QR': 'rz8AaDmdKIkLmPNhB5ILPJnjS5PnlL8d',
        'a4zrqjoKlfzg0dwHEWtP31VqcLBpjm4g': 'LY9J16gwETdGWa3hjBu5o0RzuoQDjqXQ',
        'dQP5BZroMsMVLO1hbmT5r2Enu86GjxA6': '7XR3oOdbPF6x3PRFLDCq9RkgsRjAo48V',
        'M4lKNBO1NFe0PjMCj1tzVXq0SeRVqzA9': 'n2zoRqGLRUv3GbLfBmTwW51LcveWOZYe',
        'nAZ7MZdpGCGg1pqFEbsoJOz2C60mv143': 'dYJgdqA9aT4yojETqGi7yNgoFADxqmXP',
        '3y1MERYgOuE9NzbFgwhV6Wv2F0YKvbyz': '081xpZDQgC4VadLTavhWQxrku56DAgXV',
        'bmQvmEXr5HWklBMCZOcpE2Z3HBYwqGyl': 'zxXPbVNyMiMAZldhr9FkOmA0fl4aKr2v',
        'wA7oDNYldfr6050Hwxi52lPZiVlB86Ap': 'ZYK16aA7ni0d3l3c34uwpxD7CbReMm8Q',
        'g43MbKMWmFml7o7sJoSRkXxZiXRvJ3QK': 'RX3oBJonvs4Nr6rUWBCGn3matRGqJPXV',
        'mA9VdlqpLS0raGaSDvtoqNrBTzb8XY4q': '0XN4OjBD3fnW7r7IbmtJB4AyfOmlrE2r',
        'mAajOwgkGt17oGoFmEuklMP9H0GnW54d': 'lXbBLPGyzikNGeGujAuAJGjZiwLRxyXR',
        'vy8vjJ9kbUwrRqRu59Cj5dWZfzYErlAb': 'K8l7gpwaGcBpnAnCLNCmPZRdin3eaQX0',
        'xQMWBpR8oHEZaWaSMGUb0avOHjLVYn4Y': 'm2MrN4vEaf9jB7BFy5Srb40jTrN67AYl',
        'xyKEmVO3miRr6D6UVkt7oB8jtD6aJEAv': 'g2ddDebqDfqdgKgswyUKwGjbTWwzq923',
        '7Qk0wa2D9FjKapacoJF27aLvUDKkLGA0': 'b2kgBEkephJaMkMTL7s1PLe4Ua6WyP2P',
        '3QLg6nqmNTJ5VvVTo7f508LPidz1xwyY': 'g2L1GgpraipmAOAUqmIbBnPxHOmw4MYa',
        '3y1B7zZjXTE9NZNSzZSVNPZaTNLjo6Qz': '081b5G6wzH4VagaURmcWbN5mT4JGEe2V',
        'lAqnwvkw6SG6D8DSqmUg6DRLUp0w3G4x': 'O2pbP0xPDFNJjpjIEvcdryOJtpkVM4X5',
        'awA7xd1N0Hr6050Hw2c52lPZiVlB864p': 'GZYKpn4aoT0d3l3c3PiwpxD7CbReMmXQ',
        'jQVqPLl9YHL1WGWtR1HDgWBGT63qRNyV': '6X03ne6vrU4oWyWUN7tQVoajikxJR3Ye',
        'GQRMR8mL7uZK797t7xH3eNzPIP5dOny1': 'm2vqPWGd4U31zWzSyasDRAoMT1PKRp8o',
        'zydq9RdmRhXLkNkfNoTJlMzaF0lWekQB': '3X7LnvE7vH5nkEkSqLiey793Un7dLB8e',
        'VQrDzwkB2IdBzjzu9MHPbEYkSB50gR4x': 'j2VebLzoKUKQeEesmVh0gM1eIp9jKz8z',
        'mAa2wMamBs17oGoFmktklMP9H0GnW54d': 'lXbgP74xZTkNGeGujVUAJGjZiwLRxy8R',
        '7yjB6ZLG6sW8R6RF2xcan1KGfJ5dNoyd': 'wXQkPorvPHZ45N5t4Jf6qwg5Tp4xvw29',
        'a4zPpNeWGuzg0m0iX3tPeanGSkRKWXQg': 'LY9oa3QAyHdGW9Wu3Ri5JGeEik7l1N8Q',
        'k2rneA2M38k25cXDwwSknTJlxPxQLZ6M': '61lyA2aEVDzklfdwmmh31saPxQx2VRjp',
        'bK9Zk4OvPnvxduLgxvi8VUeojnjA02eV': 'o5jANYjbeMb4nfBaQvcLAt1jzLzYx6ze',
        '5VD6EydM3R9orHmNMGInGCJwbxbQvGRw': 'w3zjmX7g4vnxzCxElvUEOiewkokXprkZ',
        '70X35QbVYVYNPUmP9YfbzI06YqYQk2R1': 'vG4Aj2BMjMjoztB7zeFOnCVPJpJ8lMOa',
        '26qYwQVG9p1Bks2GgBckjfDJOXOAMgG1': 'r4ev9X0mv5zqJc0yk5IBDcQOwZw8mnwQ',
        'rvVKpA56MBXWlSxMw3cobT5pdkd4Dm7q': '1J7ZkY53pZ645c93owcLZuveE7E8B3rL',
        'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo': 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo',
        'jdKqRGF16dKsBviMDae7IGDl7oTjEbVV': 'Q09l7vhlNxPFErIOK6BVCe7KnwUW5DVV',
        '3QLkogW1OUJ5VvPsrDH56DY2u7lgZWyY': 'g2LRE1V9espmAOPhE4ubj4ZdUA57yDXa',
        'wyJvWbXGBSdbkEzhv0CW8meou82aqRy8': 'M2wolPvyBIpQGkbT4juedD4ruzQGdK2y',
        '7QkdZrzEkFjKap6IYDU2PB0oCNZORmA0': 'b2kN1l96qhJaMkPs9dt1lpjBfwqZoA8P',
        'pvA05113MHG1w3JTYxc6DVlRCjErVz4O': 'gQXeAbblBUnDJ7vujbHvbRd1cxlz3AXO',
        'mA9blJDZwT0raG1cvkuoeVjLC7ZWd54q': '0XN9jRPwMHnW7rvumgfJZOD9CJgVkWYr',
        '5QwRN5qKJTvGKlDTmnf7xwNZcjRmvEy9': 'R2GP6LWBJU1QlnytwGt0B9pytWwAdDYy',
        'eyn5rPPbkfw2KYxH32fG1q58CbLJzM40': 'p2gyqooZnS56JWeiDgfmOy1VugOQEBXn',
        '3BABn3b5RfPJGDwilbHe7l82uBoR05Am': '7OYZG7KMVhbPdKJS3xcWEN3AuDlLNmXj',
        'xA5zNGXD3HrmqMlF6OS5pdMDuZO7RJ4w': 'yY5DAm6r1IOLE3BCVMFveEMAcqnx3r29',
        'g43PgW3JZfml7o6fDEURL1ErCdeD8zyK': 'RX3aQn1zrS4Nr6whDgCGLv9WSbxEKo2V',
        'lAqp8WbGgiG6D8LTKJcg3O72CDdre1Qx': 'O2pnm6473HNJjpKuVosd3vVeh975yrX5',
        'wyJbYEDxKSdbkJ6S6RhW8meou82aqRy8': 'M2wPm7EgRSpQGlAh70CedD4ruzQGdKYy',
        'M4lgW28nLCe0PVdtaXszVXq0SeRVqzA9': 'n2zmJvg4jHv3G0ETNgiwW51LcveWOZ8e',
        '5Qw3OVvp9FvGKlDTmOC7xwNZcjRmvEQ9': 'R2GzDdml9F1Qlnytw9s0B9pytWwAdD8y',
        'vy8a98X7zCwrRqbHrLUjYzwDiK2b70Qb': 'K8lVwzyjZiBpnAaSGeUmnAgxuGOBxmY0',
        'g4eGjJLLoiqRD3Pf9oT5O03LuNbLRDQp': '6XqD59zzpfN4EwQuaGt67qNpSyRBlnYy',
        'g43OPp9boIml7o6fDOIRL1ErCdeD8z4K': 'RX33alNB4s4Nr6whDPUGLv9WSbxEKoXV',
        'xA2ng9OkBcGKzDbTkKsJlx7dUK8R3dA5': 'z2aPnJvzBfObkwGC3vFaPxeBhxoMqZ8K',
        'xyKEgBajZuRr6DEC0Kt7XpD1cnNW9gAv': 'g2ddlEBvRsqdgKaI4jUK9PrgfMexGZ23',
        'BAogww51jIMa2JnH1BcYpXM5F658RNAL': 'rYWDmm0KptlkGv4FGJFMdZmjs9RDE6XR',
        'BAokpg62VtMa2JnH1mHYpXM5F658RNAL': 'rYWryDnlNslkGv4FG4HMdZmjs9RDE62R',
        'a4z1Px5e2hzg0m0iMMCPeanGSkRKWXAg': 'LY9eorNQGUdGW9WuKKf5JGeEik7l1NYQ',
        'kAx69R58kF9nY5YcdecJdl2pFXP53WyX': 'gXyRxELpbfPvLeLSaRil0mp6UEzbZJ8L',
        'BAoY13nwViMa2J2uo2cY6BlETgmdwryL': 'rYWwKzJmNFlkGvGtNoUM9bzwIJVzB1YR',
    }
    _MCP_TO_ACCESS_KEY_TABLE = {
@ -134,6 +204,10 @@ class AnvatoIE(InfoExtractor):
        'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
    }
    _TOKEN_GENERATORS = {
        'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
    }
    _API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
    _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
@ -189,19 +263,20 @@ class AnvatoIE(InfoExtractor):
        video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
        anvrid = md5_text(time.time() * 1000 * random.random())[:30]
-        payload = {
+        api = {
            'api': {
            'anvrid': anvrid,
                'anvstk': md5_text('%s|%s|%d|%s' % (
                    access_key, anvrid, server_time,
                    self._ANVACK_TABLE.get(access_key, self._API_KEY))),
            'anvts': server_time,
            },
        }
        if access_key in self._TOKEN_GENERATORS:
            api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
        else:
            api['anvstk'] = md5_text('%s|%s|%d|%s' % (
                access_key, anvrid, server_time,
                self._ANVACK_TABLE.get(access_key, self._API_KEY)))
        return self._download_json(
            video_data_url, video_id, transform_source=strip_jsonp,
-            data=json.dumps(payload).encode('utf-8'))
+            data=json.dumps({'api': api}).encode('utf-8'))
    def _get_anvato_videos(self, access_key, video_id):
        video_data = self._get_video_json(access_key, video_id)
@ -259,7 +334,7 @@ class AnvatoIE(InfoExtractor):
            'description': video_data.get('def_description'),
            'tags': video_data.get('def_tags', '').split(','),
            'categories': video_data.get('categories'),
-            'thumbnail': video_data.get('thumbnail'),
+            'thumbnail': video_data.get('src_image_url') or video_data.get('thumbnail'),
            'timestamp': int_or_none(video_data.get(
                'ts_published') or video_data.get('ts_added')),
            'uploader': video_data.get('mcp_id'),
--- a/youtube_dl/extractor/anvato_token_generator/init.py
+++ b/youtube_dl/extractor/anvato_token_generator/init.py
@ -0,0 +1,7 @@
 from __future__ import unicode_literals
 from .nfl import NFLTokenGenerator
 __all__ = [
    'NFLTokenGenerator',
 ]
--- a/youtube_dl/extractor/anvato_token_generator/common.py
+++ b/youtube_dl/extractor/anvato_token_generator/common.py
@ -0,0 +1,6 @@
 from __future__ import unicode_literals
 class TokenGenerator:
    def generate(self, anvack, mcp_id):
        raise NotImplementedError('This method must be implemented by subclasses')
--- a/youtube_dl/extractor/anvato_token_generator/nfl.py
+++ b/youtube_dl/extractor/anvato_token_generator/nfl.py
@ -0,0 +1,30 @@
 from __future__ import unicode_literals
 import json
 from .common import TokenGenerator
 class NFLTokenGenerator(TokenGenerator):
    _AUTHORIZATION = None
    def generate(ie, anvack, mcp_id):
        if not NFLTokenGenerator._AUTHORIZATION:
            reroute = ie._download_json(
                'https://api.nfl.com/v1/reroute', mcp_id,
                data=b'grant_type=client_credentials',
                headers={'X-Domain-Id': 100})
            NFLTokenGenerator._AUTHORIZATION = '%s %s' % (reroute.get('token_type') or 'Bearer', reroute['access_token'])
        return ie._download_json(
            'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
                'query': '''{
  viewer {
    mediaToken(anvack: "%s", id: %s) {
      token
    }
  }
 }''' % (anvack, mcp_id),
            }).encode(), headers={
                'Authorization': NFLTokenGenerator._AUTHORIZATION,
                'Content-Type': 'application/json',
            })['data']['viewer']['mediaToken']['token']
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -724,7 +724,10 @@ from .nexx import (
    NexxIE,
    NexxEmbedIE,
 )
-from .nfl import NFLIE
+from .nfl import (
    NFLIE,
    NFLArticleIE,
 )
 from .nhk import (
    NhkVodIE,
    NhkVodProgramIE,
@ -744,7 +747,6 @@ from .ninenow import NineNowIE
 from .nintendo import NintendoIE
 from .njpwworld import NJPWWorldIE
 from .nobelprize import NobelPrizeIE
 from .noco import NocoIE
 from .nonktube import NonkTubeIE
 from .noovo import NoovoIE
 from .normalboots import NormalbootsIE
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dl/extractor/nfl.py
@ -4,19 +4,15 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
-    ExtractorError,
+    clean_html,
-    int_or_none,
+    determine_ext,
-    remove_end,
+    get_element_by_class,
 )
-class NFLIE(InfoExtractor):
+class NFLBaseIE(InfoExtractor):
-    IE_NAME = 'nfl.com'
+    _VALID_URL_BASE = r'''(?x)
    _VALID_URL = r'''(?x)
                    https?://
                        (?P<host>
                            (?:www\.)?
@ -34,15 +30,15 @@ class NFLIE(InfoExtractor):
                                    houstontexans|
                                    colts|
                                    jaguars|
-                                    titansonline|
+                                    (?:titansonline|tennesseetitans)|
                                    denverbroncos|
-                                    kcchiefs|
+                                    (?:kc)?chiefs|
                                    raiders|
                                    chargers|
                                    dallascowboys|
                                    giants|
                                    philadelphiaeagles|
-                                    redskins|
+                                    (?:redskins|washingtonfootball)|
                                    chicagobears|
                                    detroitlions|
                                    packers|
@ -52,180 +48,112 @@ class NFLIE(InfoExtractor):
                                    neworleanssaints|
                                    buccaneers|
                                    azcardinals|
-                                    stlouisrams|
+                                    (?:stlouis|the)rams|
                                    49ers|
                                    seahawks
                                )\.com|
                                .+?\.clubs\.nfl\.com
                            )
                        )/
                        (?:.+?/)*
                        (?P<id>[^/#?&]+)
                    '''
    _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+})'
    def _parse_video_config(self, video_config, display_id):
        video_config = self._parse_json(video_config, display_id)
        item = video_config['playlist'][0]
        mcp_id = item.get('mcpID')
        if mcp_id:
            info = self.url_result(
                'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + mcp_id,
                'Anvato', mcp_id)
        else:
            media_id = item.get('id') or item['entityId']
            title = item['title']
            item_url = item['url']
            info = {'id': media_id}
            ext = determine_ext(item_url)
            if ext == 'm3u8':
                info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
                self._sort_formats(info['formats'])
            else:
                info['url'] = item_url
                if item.get('audio') is True:
                    info['vcodec'] = 'none'
            is_live = video_config.get('live') is True
            thumbnails = None
            image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
            if image_url:
                thumbnails = [{
                    'url': image_url,
                    'ext': determine_ext(image_url, 'jpg'),
                }]
            info.update({
                'title': self._live_title(title) if is_live else title,
                'is_live': is_live,
                'description': clean_html(item.get('description')),
                'thumbnails': thumbnails,
            })
        return info
 class NFLIE(NFLBaseIE):
    IE_NAME = 'nfl.com'
    _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)'
    _TESTS = [{
-        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
+        'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14',
        'md5': '394ef771ddcd1354f665b471d78ec4c6',
        'info_dict': {
-            'id': '0ap3000000398478',
+            'id': '899441',
            'ext': 'mp4',
-            'title': 'Week 3: Redskins vs. Eagles highlights',
+            'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
-            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
+            'description': 'md5:85e05a3cc163f8c344340f220521136d',
-            'upload_date': '20140921',
+            'upload_date': '20201215',
-            'timestamp': 1411337580,
+            'timestamp': 1608009755,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'NFL',
        }
    }, {
-        'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
+        'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
-        'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
+        'md5': '6886b32c24b463038c760ceb55a34566',
        'info_dict': {
-            'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
+            'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
-            'ext': 'mp4',
+            'ext': 'mp3',
-            'title': 'LIVE: Post Game vs. Browns',
+            'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
-            'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
+            'description': 'md5:12ada8ee70e6762658c30e223e095075',
            'upload_date': '20131229',
            'timestamp': 1388354455,
            'thumbnail': r're:^https?://.*\.jpg$',
        }
    }, {
-        'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
+        'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
        'info_dict': {
            'id': '0ap3000000467607',
            'ext': 'mp4',
            'title': 'Frustrations flare on the field',
            'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
            'timestamp': 1422850320,
            'upload_date': '20150202',
        },
    }, {
        'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
        'md5': '4c319e2f625ffd0b481b4382c6fc124c',
        'info_dict': {
            'id': 'n-238346',
            'ext': 'mp4',
            'title': '10 Days at Gillette',
            'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
            'timestamp': 1442618809,
            'upload_date': '20150918',
        },
    }, {
        # lowercase data-contentid
        'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7',
        'info_dict': {
            'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2',
            'ext': 'mp4',
            'title': 'Tomlin looks ahead to Ravens on a short week',
            'description': 'md5:32f3f7b139f43913181d5cbb24ecad75',
            'timestamp': 1443459651,
            'upload_date': '20150928',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
        'only_matching': True,
    }, {
-        'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
+        'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz',
        'only_matching': True,
    }]
-    @staticmethod
+    def _real_extract(self, url):
-    def prepend_host(host, url):
+        display_id = self._match_id(url)
-        if not url.startswith('http'):
+        webpage = self._download_webpage(url, display_id)
-            if not url.startswith('/'):
+        return self._parse_video_config(self._search_regex(
-                url = '/%s' % url
+            self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id)
            url = 'http://{0:}{1:}'.format(host, url)
        return url
-    @staticmethod
+
-    def format_from_stream(stream, protocol, host, path_prefix='',
+class NFLArticleIE(NFLBaseIE):
-                           preference=0, note=None):
+    IE_NAME = 'nfl.com:article'
-        url = '{protocol:}://{host:}/{prefix:}{path:}'.format(
+    _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)'
-            protocol=protocol,
+    _TEST = {
-            host=host,
+        'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
-            prefix=path_prefix,
+        'info_dict': {
-            path=stream.get('path'),
+            'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
-        )
+            'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations",
-        return {
+        },
-            'url': url,
+        'playlist_count': 4,
            'vbr': int_or_none(stream.get('rate', 0), 1000),
            'preference': preference,
            'format_note': note,
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        display_id = self._match_id(url)
-        video_id, host = mobj.group('id'), mobj.group('host')
+        webpage = self._download_webpage(url, display_id)
-
+        entries = []
-        webpage = self._download_webpage(url, video_id)
+        for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
-
+            entries.append(self._parse_video_config(video_config, display_id))
-        config_url = NFLIE.prepend_host(host, self._search_regex(
+        title = clean_html(get_element_by_class(
-            r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
+            'nfl-c-article__title', webpage)) or self._html_search_meta(
-            webpage, 'config URL', default='static/content/static/config/video/config.json',
+            ['og:title', 'twitter:title'], webpage)
-            group='config'))
+        return self.playlist_result(entries, display_id, title)
        # For articles, the id in the url is not the video id
        video_id = self._search_regex(
            r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1',
            webpage, 'video id', default=video_id, group='id')
        config = self._download_json(config_url, video_id, 'Downloading player config')
        url_template = NFLIE.prepend_host(
            host, '{contentURLTemplate:}'.format(**config))
        video_data = self._download_json(
            url_template.format(id=video_id), video_id)
        formats = []
        cdn_data = video_data.get('cdnData', {})
        streams = cdn_data.get('bitrateInfo', [])
        if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM':
            parts = compat_urllib_parse_urlparse(cdn_data.get('uri'))
            protocol, host = parts.scheme, parts.netloc
            for stream in streams:
                formats.append(
                    NFLIE.format_from_stream(stream, protocol, host))
        else:
            cdns = config.get('cdns')
            if not cdns:
                raise ExtractorError('Failed to get CDN data', expected=True)
            for name, cdn in cdns.items():
                # LimeLight streams don't seem to work
                if cdn.get('name') == 'LIMELIGHT':
                    continue
                protocol = cdn.get('protocol')
                host = remove_end(cdn.get('host', ''), '/')
                if not (protocol and host):
                    continue
                prefix = cdn.get('pathprefix', '')
                if prefix and not prefix.endswith('/'):
                    prefix = '%s/' % prefix
                preference = 0
                if protocol == 'rtmp':
                    preference = -2
                elif 'prog' in name.lower():
                    preference = 1
                for stream in streams:
                    formats.append(
                        NFLIE.format_from_stream(stream, protocol, host,
                                                 prefix, preference, name))
        self._sort_formats(formats)
        thumbnail = None
        for q in ('xl', 'l', 'm', 's', 'xs'):
            thumbnail = video_data.get('imagePaths', {}).get(q)
            if thumbnail:
                break
        return {
            'id': video_id,
            'title': video_data.get('headline'),
            'formats': formats,
            'description': video_data.get('caption'),
            'duration': video_data.get('duration'),
            'thumbnail': thumbnail,
            'timestamp': int_or_none(video_data.get('posted'), 1000),
        }
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@ -1,235 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 import time
 import hashlib
 from .common import InfoExtractor
 from ..compat import (
    compat_str,
    compat_urlparse,
 )
 from ..utils import (
    clean_html,
    ExtractorError,
    int_or_none,
    float_or_none,
    parse_iso8601,
    sanitized_Request,
    urlencode_postdata,
 )
 class NocoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
    _LOGIN_URL = 'https://noco.tv/do.php'
    _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
    _SUB_LANG_TEMPLATE = '&sub_lang=%s'
    _NETRC_MACHINE = 'noco'
    _TESTS = [
        {
            'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
            'md5': '0a993f0058ddbcd902630b2047ef710e',
            'info_dict': {
                'id': '11538',
                'ext': 'mp4',
                'title': 'Ami Ami Idol - Hello! France',
                'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
                'upload_date': '20140412',
                'uploader': 'Nolife',
                'uploader_id': 'NOL',
                'duration': 2851.2,
            },
            'skip': 'Requires noco account',
        },
        {
            'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
            'md5': 'c190f1f48e313c55838f1f412225934d',
            'info_dict': {
                'id': '12610',
                'ext': 'mp4',
                'title': 'The Guild #1 - Wake-Up Call',
                'timestamp': 1403863200,
                'upload_date': '20140627',
                'uploader': 'LBL42',
                'uploader_id': 'LBL',
                'duration': 233.023,
            },
            'skip': 'Requires noco account',
        }
    ]
    def _real_initialize(self):
        self._login()
    def _login(self):
        username, password = self._get_login_info()
        if username is None:
            return
        login = self._download_json(
            self._LOGIN_URL, None, 'Logging in',
            data=urlencode_postdata({
                'a': 'login',
                'cookie': '1',
                'username': username,
                'password': password,
            }),
            headers={
                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            })
        if 'erreur' in login:
            raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
    @staticmethod
    def _ts():
        return int(time.time() * 1000)
    def _call_api(self, path, video_id, note, sub_lang=None):
        ts = compat_str(self._ts() + self._ts_offset)
        tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
        url = self._API_URL_TEMPLATE % (path, ts, tk)
        if sub_lang:
            url += self._SUB_LANG_TEMPLATE % sub_lang
        request = sanitized_Request(url)
        request.add_header('Referer', self._referer)
        resp = self._download_json(request, video_id, note)
        if isinstance(resp, dict) and resp.get('error'):
            self._raise_error(resp['error'], resp['description'])
        return resp
    def _raise_error(self, error, description):
        raise ExtractorError(
            '%s returned error: %s - %s' % (self.IE_NAME, error, description),
            expected=True)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        # Timestamp adjustment offset between server time and local time
        # must be calculated in order to use timestamps closest to server's
        # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864)
        webpage = self._download_webpage(url, video_id)
        player_url = self._search_regex(
            r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1',
            webpage, 'noco player', group='player',
            default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf')
        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
        ts = int_or_none(qs.get('ts', [None])[0])
        self._ts_offset = ts - self._ts() if ts else 0
        self._referer = player_url
        medias = self._call_api(
            'shows/%s/medias' % video_id,
            video_id, 'Downloading video JSON')
        show = self._call_api(
            'shows/by_id/%s' % video_id,
            video_id, 'Downloading show JSON')[0]
        options = self._call_api(
            'users/init', video_id,
            'Downloading user options JSON')['options']
        audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
        if audio_lang_pref == 'original':
            audio_lang_pref = show['original_lang']
        if len(medias) == 1:
            audio_lang_pref = list(medias.keys())[0]
        elif audio_lang_pref not in medias:
            audio_lang_pref = 'fr'
        qualities = self._call_api(
            'qualities',
            video_id, 'Downloading qualities JSON')
        formats = []
        for audio_lang, audio_lang_dict in medias.items():
            preference = 1 if audio_lang == audio_lang_pref else 0
            for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
                for format_id, fmt in lang_dict['quality_list'].items():
                    format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
                    video = self._call_api(
                        'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
                        video_id, 'Downloading %s video JSON' % format_id_extended,
                        sub_lang if sub_lang != 'none' else None)
                    file_url = video['file']
                    if not file_url:
                        continue
                    if file_url in ['forbidden', 'not found']:
                        popmessage = video['popmessage']
                        self._raise_error(popmessage['title'], popmessage['message'])
                    formats.append({
                        'url': file_url,
                        'format_id': format_id_extended,
                        'width': int_or_none(fmt.get('res_width')),
                        'height': int_or_none(fmt.get('res_lines')),
                        'abr': int_or_none(fmt.get('audiobitrate'), 1000),
                        'vbr': int_or_none(fmt.get('videobitrate'), 1000),
                        'filesize': int_or_none(fmt.get('filesize')),
                        'format_note': qualities[format_id].get('quality_name'),
                        'quality': qualities[format_id].get('priority'),
                        'preference': preference,
                    })
        self._sort_formats(formats)
        timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ')
        if timestamp is not None and timestamp < 0:
            timestamp = None
        uploader = show.get('partner_name')
        uploader_id = show.get('partner_key')
        duration = float_or_none(show.get('duration_ms'), 1000)
        thumbnails = []
        for thumbnail_key, thumbnail_url in show.items():
            m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
            if not m:
                continue
            thumbnails.append({
                'url': thumbnail_url,
                'width': int(m.group('width')),
                'height': int(m.group('height')),
            })
        episode = show.get('show_TT') or show.get('show_OT')
        family = show.get('family_TT') or show.get('family_OT')
        episode_number = show.get('episode_number')
        title = ''
        if family:
            title += family
        if episode_number:
            title += ' #' + compat_str(episode_number)
        if episode:
            title += ' - ' + compat_str(episode)
        description = show.get('show_resume') or show.get('family_resume')
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnails': thumbnails,
            'timestamp': timestamp,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'duration': duration,
            'formats': formats,
        }
Author	SHA1	Message	Date
Remita Amine	1bc1520adc	[noco] Remove Extractor(closes #10864 )	2020-12-15 19:07:14 +01:00
Remita Amine	022e05dc1f	[nfl] fix extraction(closes #22245 )	2020-12-15 16:06:10 +01:00
Remita Amine	b34c9551aa	[anvato] update ANVACK table and add experimental token generator for NFL	2020-12-15 16:01:33 +01:00