X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=cd155a0901b6a50189d064da26b74951a41b1e18;hb=ff21a8e0ee43d4ce0b75cd938f9bdfab664dd579;hp=df32b5ca0ba081df6c5f4c27f2f00c1a46e7c246;hpb=e4a8eae701f22395dae607ed000d39f7a57e80a0;p=youtube-dl
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index df32b5ca0..cd155a090 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -147,6 +147,17 @@ class InfoExtractor(object):
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
comment_count: Number of comments on the video
+ comments: A list of comments, each with one or more of the following
+ properties (all but one of text or html optional):
+ * "author" - human-readable name of the comment author
+ * "author_id" - user ID of the comment author
+ * "id" - Comment ID
+ * "html" - Comment as HTML
+ * "text" - Plain text of the comment
+ * "timestamp" - UNIX timestamp of comment
+ * "parent" - ID of the comment this one is replying to.
+ Set to "root" to indicate that this is a
+ comment to the original video.
age_limit: Age restriction for the video, as an integer (years)
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
@@ -365,9 +376,19 @@ class InfoExtractor(object):
return content
- def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
+ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
""" Returns the data of the page as a string """
- res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+ success = False
+ try_count = 0
+ while success is False:
+ try:
+ res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
+ success = True
+ except compat_http_client.IncompleteRead as e:
+ try_count += 1
+ if try_count >= tries:
+ raise e
+ self._sleep(timeout, video_id)
if res is False:
return res
else:
@@ -594,7 +615,7 @@ class InfoExtractor(object):
return self._html_search_regex(
r'''(?isx)]+(?:itemprop|name|property)=(["\']?)%s\1)
- [^>]+content=(["\'])(?P.*?)\1''' % re.escape(name),
+ [^>]+?content=(["\'])(?P.*?)\2''' % re.escape(name),
html, display_name, fatal=fatal, group='content', **kwargs)
def _dc_search_uploader(self, html):