X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=377ae91c4383c78c0749937650097d231a7f2ca8;hb=c8805576667f316b34f0a9a81d32da7d24dc754c;hp=3c56daa02eccb9e08ed0d1b0840bf99cf9b8ef5c;hpb=c0d0b01f0e12ce23f7a751ef05e52dabd3e4c1e7;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3c56daa02..377ae91c4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,7 @@ from ..utils import ( compat_urlparse, ExtractorError, + HEADRequest, smuggle_url, unescapeHTML, unified_strdate, @@ -109,21 +110,18 @@ class GenericIE(InfoExtractor): def _send_head(self, url): """Check if it is a redirect, like url shorteners, in case return the new url.""" - class HeadRequest(compat_urllib_request.Request): - def get_method(self): - return "HEAD" class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler): """ Subclass the HTTPRedirectHandler to make it use our - HeadRequest also on the redirected URL + HEADRequest also on the redirected URL """ def redirect_request(self, req, fp, code, msg, headers, newurl): if code in (301, 302, 303, 307): newurl = newurl.replace(' ', '%20') newheaders = dict((k,v) for k,v in req.headers.items() if k.lower() not in ("content-length", "content-type")) - return HeadRequest(newurl, + return HEADRequest(newurl, headers=newheaders, origin_req_host=req.get_origin_req_host(), unverifiable=True) @@ -152,7 +150,7 @@ class GenericIE(InfoExtractor): compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: opener.add_handler(handler()) - response = opener.open(HeadRequest(url)) + response = opener.open(HEADRequest(url)) if response is None: raise ExtractorError(u'Invalid URL protocol') return response @@ -164,6 +162,8 @@ class GenericIE(InfoExtractor): return self.url_result('http://' + url) video_id = os.path.splitext(url.split('/')[-1])[0] + self.to_screen(u'%s: Requesting header' % video_id) + try: response = self._send_head(url) @@ -224,7 +224,7 @@ class GenericIE(InfoExtractor): self.to_screen(u'Brightcove video detected.') return self.url_result(bc_url, 'Brightcove') - # Look for embedded Vimeo player + # Look for embedded (iframe) Vimeo player mobj = re.search( r']+?src="(https?://player.vimeo.com/video/.+?)"', webpage) if mobj: @@ -232,9 +232,18 @@ class GenericIE(InfoExtractor): surl = smuggle_url(player_url, {'Referer': url}) return self.url_result(surl, 'Vimeo') + # Look for embedded (swf embed) Vimeo player + mobj = re.search( + r']+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage) + if mobj: + return self.url_result(mobj.group(1), 'Vimeo') + # Look for embedded YouTube player - matches = re.findall( - r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage) + matches = re.findall(r'''(?x) + (?:]+?src=|embedSWF\(\s*) + (["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/ + (?:embed|v)/.+?) + \1''', webpage) if matches: urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') for tuppl in matches] @@ -293,6 +302,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return OoyalaIE._build_url_result(mobj.group(1)) + # Look for Aparat videos + mobj = re.search(r'