X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=92a0c50508af273a2f895b6061968cfae5284454;hb=72135030d1235f608a2b5e0ec007ca8e6e19e3b4;hp=f787d0a3c0b3afd47f38ddbb09eab8476126516d;hpb=2b35c9ef742bf261078ea10c6c0bba848db1a0df;p=youtube-dl
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f787d0a3c..92a0c5050 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -4,11 +4,11 @@ import re
import socket
import sys
import netrc
+import xml.etree.ElementTree
from ..utils import (
compat_http_client,
compat_urllib_error,
- compat_urllib_request,
compat_str,
clean_html,
@@ -19,6 +19,7 @@ from ..utils import (
unescapeHTML,
)
+
class InfoExtractor(object):
"""Information Extractor class.
@@ -54,6 +55,9 @@ class InfoExtractor(object):
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
view_count: How many users have watched the video on the platform.
+ like_count: Number of positive ratings of the video
+ dislike_count: Number of negative ratings of the video
+ comment_count: Number of comments on the video
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
age_limit: Age restriction for the video, as an integer (years)
@@ -75,6 +79,7 @@ class InfoExtractor(object):
* acodec Name of the audio codec in use
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
+ * filesize The number of bytes, if known in advance
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
@@ -156,7 +161,7 @@ class InfoExtractor(object):
elif note is not False:
self.to_screen(u'%s: %s' % (video_id, note))
try:
- return compat_urllib_request.urlopen(url_or_request)
+ return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
@@ -208,6 +213,12 @@ class InfoExtractor(object):
""" Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
+ def _download_xml(self, url_or_request, video_id,
+ note=u'Downloading XML', errnote=u'Unable to download XML'):
+ """Return the xml as an xml.etree.ElementTree.Element"""
+ xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+ return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
@@ -229,12 +240,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in')
#Methods for following #608
- def url_result(self, url, ie=None):
+ def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
+ if video_id is not None:
+ video_info['id'] = video_id
return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
@@ -350,6 +363,18 @@ class InfoExtractor(object):
if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
+ def _html_search_meta(self, name, html, display_name=None):
+ if display_name is None:
+ display_name = name
+ return self._html_search_regex(
+ r'''(?ix)]+(?:itemprop|name|property)=["\']%s["\'])
+ [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
+ html, display_name, fatal=False)
+
+ def _dc_search_uploader(self, html):
+ return self._html_search_meta('dc.creator', html, 'uploader')
+
def _rta_search(self, html):
# See http://www.rtalabel.org/index.php?content=howtofaq#single
if re.search(r'(?ix)