X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=b77db52f7af4f1631e661bcbaf86bedbada80bf6;hb=632256d9ecd86f17b370b2cbe3f50acdf961d6cc;hp=cf3781cd6b4e0cb34a96ffddc445403ed1505f2e;hpb=f889cea109b4e2647e3fd6a462c9893b88b21e04;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cf3781cd6..b77db52f7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -242,7 +242,6 @@ class InfoExtractor(object): def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): """ Returns a tuple (page content as string, URL handle) """ - # Strip hashes from the URL (#1038) if isinstance(url_or_request, (compat_str, str)): url_or_request = url_or_request.partition('#')[0] @@ -251,6 +250,10 @@ class InfoExtractor(object): if urlh is False: assert not fatal return False + content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) + return (content, urlh) + + def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True): content_type = urlh.headers.get('Content-Type', '') webpage_bytes = urlh.read() m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) @@ -309,7 +312,7 @@ class InfoExtractor(object): msg += ' Visit %s for more details' % blocked_iframe raise ExtractorError(msg, expected=True) - return (content, urlh) + return content def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): """ Returns the data of the page as a string """ @@ -686,7 +689,10 @@ class InfoExtractor(object): if re.match(r'^https?://', u) else compat_urlparse.urljoin(m3u8_url, u)) - m3u8_doc = self._download_webpage(m3u8_url, video_id) + m3u8_doc = self._download_webpage( + m3u8_url, video_id, + note='Downloading m3u8 information', + errnote='Failed to download m3u8 information') last_info = None kv_rex = re.compile( r'(?P[a-zA-Z_-]+)=(?P"[^"]+"|[^",]+)(?:,|$)')