From 13b0b2d37fb474c2584f3780b143b4c5f2cccf46 Mon Sep 17 00:00:00 2001 From: Paper <37962225+mrpapersonic@users.noreply.github.com> Date: Sat, 7 May 2022 16:49:45 -0400 Subject: [PATCH] [VidLii] Add HD support Also fix a bug with the view count --- youtube_dl/extractor/vidlii.py | 56 +++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dl/extractor/vidlii.py index f4774256b..9b86efc3e 100644 --- a/youtube_dl/extractor/vidlii.py +++ b/youtube_dl/extractor/vidlii.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor + from ..utils import ( float_or_none, get_element_by_id, @@ -35,6 +36,25 @@ class VidLiiIE(InfoExtractor): 'categories': ['News & Politics'], 'tags': ['Vidlii', 'Jan', 'Videogames'], } + }, { # HD + 'url': 'https://www.vidlii.com/watch?v=2Ng8Abj2Fkl', + 'md5': '450e7da379c884788c3a4fa02a3ce1a4', + 'info_dict': { + 'id': '2Ng8Abj2Fkl', + 'ext': 'mp4', + 'title': 'test', + 'description': 'md5:cc55a86032a7b6b3cbfd0f6b155b52e9', + 'thumbnail': 'https://www.vidlii.com/usfi/thmp/2Ng8Abj2Fkl.jpg', + 'uploader': 'VidLii', + 'uploader_url': 'https://www.vidlii.com/user/VidLii', + 'upload_date': '20200927', + 'duration': 5, + 'view_count': int, + 'comment_count': int, + 'average_rating': float, + 'categories': ['Film & Animation'], + 'tags': ['1', '2'], + }, }, { 'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0', 'only_matching': True, @@ -46,9 +66,31 @@ class VidLiiIE(InfoExtractor): webpage = self._download_webpage( 'https://www.vidlii.com/watch?v=%s' % video_id, video_id) - video_url = self._search_regex( - r'src\s*:\s*(["\'])(?P(?:https?://)?(?:(?!\1).)+)\1', webpage, - 'video url', group='url') + formats = [] + + def add_format(format_url, height=None): + height = int(self._search_regex(r"(\d+).mp4", + format_url, "height", default=360)) + + formats.append({ + 'url': format_url, + 'format_id': '%dp' % height if height else None, + 'height': height, + }) + + sources = re.findall( + r'src\s*:\s*(["\'])(?P(?:https?://)?(?:(?!\1).)+)\1', + webpage) + + try: + self._request_webpage(sources[1][1], video_id, 'Checking HD URL') + add_format(sources[1][1]) + except Exception: + pass + + add_format(sources[0][1]) + + self._sort_formats(formats) title = self._search_regex( (r'

([^<]+)

', r'([^<]+) - VidLii<'), webpage, @@ -83,9 +125,9 @@ class VidLiiIE(InfoExtractor): r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) view_count = int_or_none(self._search_regex( - (r'<strong>(\d+)</strong> views', - r'Views\s*:\s*<strong>(\d+)</strong>'), - webpage, 'view count', fatal=False)) + (r'<strong>([\d,]+)</strong> views', + r'Views\s*:\s*<strong>([\d,]+)</strong>'), + webpage, 'view count', fatal=False).replace(",", "")) comment_count = int_or_none(self._search_regex( (r'<span[^>]+id=["\']cmt_num[^>]+>(\d+)', @@ -109,7 +151,7 @@ class VidLiiIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail,