import base64 import datetime import itertools import netrc import os import re import socket import time import email.utils import xml.etree.ElementTree import random import math import operator import hashlib import binascii import urllib from .utils import * from .extractor.common import InfoExtractor, SearchInfoExtractor from .extractor.ard import ARDIE from .extractor.arte import ArteTvIE from .extractor.dailymotion import DailymotionIE from .extractor.gametrailers import GametrailersIE from .extractor.generic import GenericIE from .extractor.metacafe import MetacafeIE from .extractor.statigram import StatigramIE from .extractor.photobucket import PhotobucketIE from .extractor.vimeo import VimeoIE from .extractor.yahoo import YahooIE, YahooSearchIE from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE from .extractor.zdf import ZDFIE class BlipTVUserIE(InfoExtractor): """Information Extractor for blip.tv users.""" _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' _PAGE_SIZE = 12 IE_NAME = u'blip.tv:user' def _real_extract(self, url): # Extract username mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) username = mobj.group(1) page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' page = self._download_webpage(url, username, u'Downloading user page') mobj = re.search(r'data-users-id="([^"]+)"', page) page_base = page_base % mobj.group(1) # Download video ids using BlipTV Ajax calls. Result size per # query is limited (currently to 12 videos) so we need to query # page by page until there are no video ids - it means we got # all of them. video_ids = [] pagenum = 1 while True: url = page_base + "&page=" + str(pagenum) page = self._download_webpage(url, username, u'Downloading video ids from page %d' % pagenum) # Extract video identifiers ids_in_page = [] for mobj in re.finditer(r'href="/([^"]+)"', page): if mobj.group(1) not in ids_in_page: ids_in_page.append(unescapeHTML(mobj.group(1))) video_ids.extend(ids_in_page) # A little optimization - if current page is not # "full", ie. does not contain PAGE_SIZE video ids then # we can assume that this page is the last one - there # are no more ids on further pages - no need to query # again. if len(ids_in_page) < self._PAGE_SIZE: break pagenum += 1 urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] url_entries = [self.url_result(url, 'BlipTV') for url in urls] return [self.playlist_result(url_entries, playlist_title = username)] class DepositFilesIE(InfoExtractor): """Information extractor for depositfiles.com""" _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' def _real_extract(self, url): file_id = url.split('/')[-1] # Rebuild url in english locale url = 'http://depositfiles.com/en/files/' + file_id # Retrieve file webpage with 'Free download' button pressed free_download_indication = { 'gateway_result' : '1' } request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication)) try: self.report_download_webpage(file_id) webpage = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err)) # Search for the real file URL mobj = re.search(r'