thumbnail: Full URL to a video thumbnail image.
description: Full video description.
uploader: Full name of the video uploader.
+ creator: The main artist who created the video.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
+ average_rating: Average rating give by users, the scale used depends on the webpage
comment_count: Number of comments on the video
comments: A list of comments, each with one or more of the following
properties (all but one of text or html optional):
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
- self.initialize()
- return self._real_extract(url)
+ try:
+ self.initialize()
+ return self._real_extract(url)
+ except ExtractorError:
+ raise
+ except compat_http_client.IncompleteRead as e:
+ raise ExtractorError('A network error has occured.', cause=e, expected=True)
+ except (KeyError, StopIteration) as e:
+ raise ExtractorError('An extractor error has occured.', cause=e)
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""
if mobj:
break
- if os.name != 'nt' and sys.stderr.isatty():
+ if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name
else:
_name = name
}
return RATING_TABLE.get(rating.lower(), None)
+ def _family_friendly_search(self, html):
+ # See http://schema.org/VideoObj
+ family_friendly = self._html_search_meta('isFamilyFriendly', html)
+
+ if not family_friendly:
+ return None
+
+ RATING_TABLE = {
+ '1': 0,
+ 'true': 0,
+ '0': 18,
+ 'false': 18,
+ }
+ return RATING_TABLE.get(family_friendly.lower(), None)
+
def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html,
'twitter card player')
preference,
f.get('language_preference') if f.get('language_preference') is not None else -1,
f.get('quality') if f.get('quality') is not None else -1,
+ f.get('tbr') if f.get('tbr') is not None else -1,
+ f.get('vbr') if f.get('vbr') is not None else -1,
f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
ext_preference,
- f.get('tbr') if f.get('tbr') is not None else -1,
- f.get('vbr') if f.get('vbr') is not None else -1,
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('fps') if f.get('fps') is not None else -1,
self.to_screen(msg)
time.sleep(timeout)
- def _extract_f4m_formats(self, manifest_url, video_id):
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest')
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
- manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
+ manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
+ + (media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate'))
- format_id = 'f4m-%d' % (i if tbr is None else tbr)
formats.append({
- 'format_id': format_id,
+ 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
'url': manifest_url,
'ext': 'flv',
'tbr': tbr,
'width': int_or_none(media_el.attrib.get('width')),
'height': int_or_none(media_el.attrib.get('height')),
+ 'preference': preference,
})
self._sort_formats(formats)
return formats
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
- entry_protocol='m3u8', preference=None):
+ entry_protocol='m3u8', preference=None,
+ m3u8_id=None):
formats = [{
- 'format_id': 'm3u8-meta',
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
formats.append({'url': format_url(line)})
continue
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
-
f = {
- 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
'url': format_url(line.strip()),
'tbr': tbr,
'ext': ext,