X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=03f3f18c83012cdced0e305fe1cc02d69a85bb7c;hb=121c09c7be1ac2944f3432122104c1952bfd1f04;hp=df32b5ca0ba081df6c5f4c27f2f00c1a46e7c246;hpb=e4a8eae701f22395dae607ed000d39f7a57e80a0;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index df32b5ca0..03f3f18c8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -114,6 +114,9 @@ class InfoExtractor(object): to add to the request. * http_post_data Additional data to send with a POST request. + * stretched_ratio If given and not 1, indicates that the + video's pixels are not square. + width : height ratio as float. url: Final video URL. ext: Video filename extension. format: The video format, defaults to ext (used for --get-format) @@ -147,6 +150,17 @@ class InfoExtractor(object): like_count: Number of positive ratings of the video dislike_count: Number of negative ratings of the video comment_count: Number of comments on the video + comments: A list of comments, each with one or more of the following + properties (all but one of text or html optional): + * "author" - human-readable name of the comment author + * "author_id" - user ID of the comment author + * "id" - Comment ID + * "html" - Comment as HTML + * "text" - Plain text of the comment + * "timestamp" - UNIX timestamp of comment + * "parent" - ID of the comment this one is replying to. + Set to "root" to indicate that this is a + comment to the original video. age_limit: Age restriction for the video, as an integer (years) webpage_url: The url to the video webpage, if given to youtube-dl it should allow to get the same result again. (It will be set @@ -365,9 +379,19 @@ class InfoExtractor(object): return content - def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5): """ Returns the data of the page as a string """ - res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) + success = False + try_count = 0 + while success is False: + try: + res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) + success = True + except compat_http_client.IncompleteRead as e: + try_count += 1 + if try_count >= tries: + raise e + self._sleep(timeout, video_id) if res is False: return res else: @@ -594,7 +618,7 @@ class InfoExtractor(object): return self._html_search_regex( r'''(?isx)]+(?:itemprop|name|property)=(["\']?)%s\1) - [^>]+content=(["\'])(?P.*?)\1''' % re.escape(name), + [^>]+?content=(["\'])(?P.*?)\2''' % re.escape(name), html, display_name, fatal=fatal, group='content', **kwargs) def _dc_search_uploader(self, html): @@ -718,8 +742,14 @@ class InfoExtractor(object): 'Unable to download f4m manifest') formats = [] + manifest_version = '1.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') + if not media_nodes: + manifest_version = '2.0' + media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') for i, media_el in enumerate(media_nodes): + if manifest_version == '2.0': + manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href') tbr = int_or_none(media_el.attrib.get('bitrate')) format_id = 'f4m-%d' % (i if tbr is None else tbr) formats.append({