X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=423e54ceaa2544d62d2d266f5a499caef5293c72;hb=dca087205692c934163ec9aca5962056f890cd19;hp=e0ccba533534709b6ce65af518ca16fadee0c4ea;hpb=2a1a8ffe4113302a6c82562fb565eb8f20404e36;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e0ccba533..423e54cea 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -8,7 +8,6 @@ import netrc from ..utils import ( compat_http_client, compat_urllib_error, - compat_urllib_request, compat_str, clean_html, @@ -19,6 +18,7 @@ from ..utils import ( unescapeHTML, ) + class InfoExtractor(object): """Information Extractor class. @@ -71,6 +71,10 @@ class InfoExtractor(object): ("3D" or "DASH video") * width Width of the video, if known * height Height of the video, if known + * abr Average audio bitrate in KBit/s + * acodec Name of the audio codec in use + * vbr Average video bitrate in KBit/s + * vcodec Name of the video codec in use webpage_url: The url to the video webpage, if given to youtube-dl it should allow to get the same result again. (It will be set by YoutubeDL if it's missing) @@ -152,7 +156,7 @@ class InfoExtractor(object): elif note is not False: self.to_screen(u'%s: %s' % (video_id, note)) try: - return compat_urllib_request.urlopen(url_or_request) + return self._downloader.urlopen(url_or_request) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: if errnote is None: errnote = u'Unable to download webpage' @@ -315,13 +319,21 @@ class InfoExtractor(object): # Helper functions for extracting OpenGraph info @staticmethod - def _og_regex(prop): - return r']+?)"|\'(.+?)\')' + property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop) + template = r']+?%s[^>]+?%s' + return [ + template % (property_re, content_re), + template % (content_re, property_re), + ] def _og_search_property(self, prop, html, name=None, **kargs): if name is None: name = 'OpenGraph %s' % prop - escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs) + escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs) + if escaped is None: + return None return unescapeHTML(escaped) def _og_search_thumbnail(self, html, **kargs): @@ -334,10 +346,21 @@ class InfoExtractor(object): return self._og_search_property('title', html, **kargs) def _og_search_video_url(self, html, name='video url', secure=True, **kargs): - regexes = [self._og_regex('video')] - if secure: regexes.insert(0, self._og_regex('video:secure_url')) + regexes = self._og_regexes('video') + if secure: regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) + def _html_search_meta(self, name, html, display_name=None): + if display_name is None: + display_name = name + return self._html_search_regex( + r'''(?ix)]+(?:name|property)=["\']%s["\']) + [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), + html, display_name, fatal=False) + + def _dc_search_uploader(self, html): + return self._html_search_meta('dc.creator', html, 'uploader') + def _rta_search(self, html): # See http://www.rtalabel.org/index.php?content=howtofaq#single if re.search(r'(?ix)