X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcommon.py;h=3d8ac8ba2d044f446489c54c39195c9a2418fcea;hb=c059bdd432911cff8c7426380a876c9679855ab5;hp=45dd01789b786da425545a528edc4838cb91de19;hpb=78fb87b2837e15124b5855734a951598dfe025fe;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 45dd01789..3d8ac8ba2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -4,6 +4,7 @@ import re import socket import sys import netrc +import xml.etree.ElementTree from ..utils import ( compat_http_client, @@ -71,6 +72,11 @@ class InfoExtractor(object): ("3D" or "DASH video") * width Width of the video, if known * height Height of the video, if known + * abr Average audio bitrate in KBit/s + * acodec Name of the audio codec in use + * vbr Average video bitrate in KBit/s + * vcodec Name of the video codec in use + * filesize The number of bytes, if known in advance webpage_url: The url to the video webpage, if given to youtube-dl it should allow to get the same result again. (It will be set by YoutubeDL if it's missing) @@ -204,6 +210,11 @@ class InfoExtractor(object): """ Returns the data of the page as a string """ return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] + def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'): + """Return the xml as an xml.etree.ElementTree.Element""" + xml_string = self._download_webpage(url_or_request, video_id, note, errnote) + return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) + def to_screen(self, msg): """Print msg to screen, prefixing it with '[ie_name]'""" self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg)) @@ -225,12 +236,14 @@ class InfoExtractor(object): self.to_screen(u'Logging in') #Methods for following #608 - def url_result(self, url, ie=None): + def url_result(self, url, ie=None, video_id=None): """Returns a url that points to a page that should be processed""" #TODO: ie should be the class used for getting the info video_info = {'_type': 'url', 'url': url, 'ie_key': ie} + if video_id is not None: + video_info['id'] = video_id return video_info def playlist_result(self, entries, playlist_id=None, playlist_title=None): """Returns a playlist""" @@ -346,6 +359,17 @@ class InfoExtractor(object): if secure: regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) + def _html_search_meta(self, name, html, display_name=None): + if display_name is None: + display_name = name + return self._html_search_regex( + r'''(?ix)]+(?:name|property)=["\']%s["\']) + [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), + html, display_name, fatal=False) + + def _dc_search_uploader(self, html): + return self._html_search_meta('dc.creator', html, 'uploader') + def _rta_search(self, html): # See http://www.rtalabel.org/index.php?content=howtofaq#single if re.search(r'(?ix)