From be3392a0d491af81f353b4372d47d589fda54b0c Mon Sep 17 00:00:00 2001 From: Epsilonator <28658223+clueless-skywatcher@users.noreply.github.com> Date: Thu, 2 Feb 2023 23:03:09 +0530 Subject: [PATCH] [Blerp] Add new extractor (#31398) Co-authored-by: dirkf --- youtube_dl/extractor/blerp.py | 173 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 174 insertions(+) create mode 100644 youtube_dl/extractor/blerp.py diff --git a/youtube_dl/extractor/blerp.py b/youtube_dl/extractor/blerp.py new file mode 100644 index 000000000..355daef6e --- /dev/null +++ b/youtube_dl/extractor/blerp.py @@ -0,0 +1,173 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from ..utils import ( + strip_or_none, + traverse_obj, +) +from .common import InfoExtractor + + +class BlerpIE(InfoExtractor): + IE_NAME = 'blerp' + _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P[0-9a-zA-Z]+)' + _TESTS = [{ + 'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a', + 'info_dict': { + 'id': '6320fe8745636cb4dd677a5a', + 'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016', + 'uploader': 'luminousaj', + 'uploader_id': '5fb81e51aa66ae000c395478', + 'ext': 'mp3', + 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'], + } + }, { + 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f', + 'info_dict': { + 'id': '5bc94ef4796001000498429f', + 'title': 'Yee', + 'uploader': '179617322678353920', + 'uploader_id': '5ba99cf71386730004552c42', + 'ext': 'mp3', + 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'] + } + }] + + _GRAPHQL_OPERATIONNAME = "webBitePageGetBite" + _GRAPHQL_QUERY = ( + '''query webBitePageGetBite($_id: MongoID!) { + web { + biteById(_id: $_id) { + ...bitePageFrag + __typename + } + __typename + } + } + + fragment bitePageFrag on Bite { + _id + title + userKeywords + keywords + color + visibility + isPremium + owned + price + extraReview + isAudioExists + image { + filename + original { + url + __typename + } + __typename + } + userReactions { + _id + reactions + createdAt + __typename + } + topReactions + totalSaveCount + saved + blerpLibraryType + license + licenseMetaData + playCount + totalShareCount + totalFavoriteCount + totalAddedToBoardCount + userCategory + userAudioQuality + audioCreationState + transcription + userTranscription + description + createdAt + updatedAt + author + listingType + ownerObject { + _id + username + profileImage { + filename + original { + url + __typename + } + __typename + } + __typename + } + transcription + favorited + visibility + isCurated + sourceUrl + audienceRating + strictAudienceRating + ownerId + reportObject { + reportedContentStatus + __typename + } + giphy { + mp4 + gif + __typename + } + audio { + filename + original { + url + __typename + } + mp3 { + url + __typename + } + __typename + } + __typename + } + + ''') + + def _real_extract(self, url): + audio_id = self._match_id(url) + + data = { + 'operationName': self._GRAPHQL_OPERATIONNAME, + 'query': self._GRAPHQL_QUERY, + 'variables': { + '_id': audio_id + } + } + + headers = { + 'Content-Type': 'application/json' + } + + json_result = self._download_json('https://api.blerp.com/graphql', + audio_id, data=json.dumps(data).encode('utf-8'), headers=headers) + + bite_json = json_result['data']['web']['biteById'] + + info_dict = { + 'id': bite_json['_id'], + 'url': bite_json['audio']['mp3']['url'], + 'title': bite_json['title'], + 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none), + 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none), + 'ext': 'mp3', + 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None) + } + + return info_dict diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f7bb4042f..b8db4c818 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -138,6 +138,7 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) +from .blerp import BlerpIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE