X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=57a6b1820c90891a9c9c138e27ff042155f36670;hb=aff24732b96b5ec89cb41a05fd132e12c5990caf;hp=75cb96eb713852f736716d462fd279d7cf669b20;hpb=1f9da9049b5b6dcede3d274acd58aa1b6dea5d2c;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 75cb96eb7..57a6b1820 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,7 @@ from ..utils import ( compat_urlparse, ExtractorError, + HEADRequest, smuggle_url, unescapeHTML, unified_strdate, @@ -109,21 +110,18 @@ class GenericIE(InfoExtractor): def _send_head(self, url): """Check if it is a redirect, like url shorteners, in case return the new url.""" - class HeadRequest(compat_urllib_request.Request): - def get_method(self): - return "HEAD" class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler): """ Subclass the HTTPRedirectHandler to make it use our - HeadRequest also on the redirected URL + HEADRequest also on the redirected URL """ def redirect_request(self, req, fp, code, msg, headers, newurl): if code in (301, 302, 303, 307): newurl = newurl.replace(' ', '%20') newheaders = dict((k,v) for k,v in req.headers.items() if k.lower() not in ("content-length", "content-type")) - return HeadRequest(newurl, + return HEADRequest(newurl, headers=newheaders, origin_req_host=req.get_origin_req_host(), unverifiable=True) @@ -152,7 +150,7 @@ class GenericIE(InfoExtractor): compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: opener.add_handler(handler()) - response = opener.open(HeadRequest(url)) + response = opener.open(HEADRequest(url)) if response is None: raise ExtractorError(u'Invalid URL protocol') return response @@ -164,6 +162,8 @@ class GenericIE(InfoExtractor): return self.url_result('http://' + url) video_id = os.path.splitext(url.split('/')[-1])[0] + self.to_screen(u'%s: Requesting header' % video_id) + try: response = self._send_head(url) @@ -224,7 +224,7 @@ class GenericIE(InfoExtractor): self.to_screen(u'Brightcove video detected.') return self.url_result(bc_url, 'Brightcove') - # Look for embedded Vimeo player + # Look for embedded (iframe) Vimeo player mobj = re.search( r']+?src="(https?://player.vimeo.com/video/.+?)"', webpage) if mobj: @@ -232,6 +232,12 @@ class GenericIE(InfoExtractor): surl = smuggle_url(player_url, {'Referer': url}) return self.url_result(surl, 'Vimeo') + # Look for embedded (swf embed) Vimeo player + mobj = re.search( + r']+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage) + if mobj: + return self.url_result(mobj.group(1), 'Vimeo') + # Look for embedded YouTube player matches = re.findall(r'''(?x) (?:]+?src=|embedSWF\(\s*) @@ -267,16 +273,12 @@ class GenericIE(InfoExtractor): } # Look for embedded blip.tv player - mobj = re.search(r']*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage) + mobj = re.search(r']*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) if mobj: - return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV') - mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage) + return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV') + mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage) if mobj: - player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1) - player_page = self._download_webpage(player_url, mobj.group(1)) - blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False) - if blip_video_id: - return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV') + return self.url_result(mobj.group(1), 'BlipTV') # Look for Bandcamp pages with custom domain mobj = re.search(r']*?content="(.*?bandcamp\.com.*?)"', webpage) @@ -296,6 +298,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return OoyalaIE._build_url_result(mobj.group(1)) + # Look for Aparat videos + mobj = re.search(r'