X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=562e656e0b24b2a43c69cae81994fed902eb2851;hb=ecd1936695e73ba850d0618828b4a40d7d16c091;hp=5b5e33cea8da39ebed4ea56df1969ad7d5e5ab81;hpb=6219802165d2cda4dd128b06c9435038f43b1145;p=youtube-dl
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 5b5e33cea..562e656e0 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -40,7 +40,7 @@ class InfoExtractor(object):
information about the video (or videos) the URL refers to. This
information includes the real video URL, the video title, author and
others. The information is stored in a dictionary which is then
- passed to the FileDownloader. The FileDownloader processes this
+ passed to the YoutubeDL. The YoutubeDL processes this
information possibly downloading the video to the file system, among
other possible outcomes.
@@ -92,6 +92,8 @@ class InfoExtractor(object):
by this field, regardless of all other values.
-1 for default (order by other properties),
-2 or smaller for less than default.
+ < -1000 to hide the format (if there is
+ another one which is strictly better)
* language_preference Is this in the correct requested
language?
10 if it's what the URL is about,
@@ -118,6 +120,7 @@ class InfoExtractor(object):
The following fields are optional:
+ alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
something like "4234987", title "Dancing naked mole rats",
@@ -129,7 +132,7 @@ class InfoExtractor(object):
* "resolution" (optional, string "{width}x{height"},
deprecated)
thumbnail: Full URL to a video thumbnail image.
- description: One-line video description.
+ description: Full video description.
uploader: Full name of the video uploader.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
@@ -158,8 +161,8 @@ class InfoExtractor(object):
_type "playlist" indicates multiple videos.
- There must be a key "entries", which is a list or a PagedList object, each
- element of which is a valid dictionary under this specfication.
+ There must be a key "entries", which is a list, an iterable, or a PagedList
+ object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above).
@@ -174,9 +177,10 @@ class InfoExtractor(object):
_type "url" indicates that the video must be extracted from another
location, possibly by a different extractor. Its only required key is:
"url" - the next URL to extract.
-
- Additionally, it may have properties believed to be identical to the
- resolved entity, for example "title" if the title of the referred video is
+ The key "ie_key" can be set to the class name (minus the trailing "IE",
+ e.g. "Youtube") if the extractor class is known in advance.
+ Additionally, the dictionary may have any properties of the resolved entity
+ known in advance, for example "title" if the title of the referred video is
known ahead of time.
@@ -390,6 +394,10 @@ class InfoExtractor(object):
url_or_request, video_id, note, errnote, fatal=fatal)
if (not fatal) and json_string is False:
return None
+ return self._parse_json(
+ json_string, video_id, transform_source=transform_source, fatal=fatal)
+
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
if transform_source:
json_string = transform_source(json_string)
try:
@@ -439,7 +447,7 @@ class InfoExtractor(object):
return video_info
@staticmethod
- def playlist_result(entries, playlist_id=None, playlist_title=None):
+ def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
"""Returns a playlist"""
video_info = {'_type': 'playlist',
'entries': entries}
@@ -447,6 +455,8 @@ class InfoExtractor(object):
video_info['id'] = playlist_id
if playlist_title:
video_info['title'] = playlist_title
+ if playlist_description:
+ video_info['description'] = playlist_description
return video_info
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@@ -581,7 +591,7 @@ class InfoExtractor(object):
if display_name is None:
display_name = name
return self._html_search_regex(
- r'''(?ix)]+(?:itemprop|name|property)=(["\']?)%s\1)
[^>]+content=(["\'])(?P.*?)\1''' % re.escape(name),
html, display_name, fatal=fatal, group='content', **kwargs)
@@ -790,6 +800,49 @@ class InfoExtractor(object):
self._sort_formats(formats)
return formats
+ # TODO: improve extraction
+ def _extract_smil_formats(self, smil_url, video_id):
+ smil = self._download_xml(
+ smil_url, video_id, 'Downloading SMIL file',
+ 'Unable to download SMIL file')
+
+ base = smil.find('./head/meta').get('base')
+
+ formats = []
+ rtmp_count = 0
+ for video in smil.findall('./body/switch/video'):
+ src = video.get('src')
+ if not src:
+ continue
+ bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ width = int_or_none(video.get('width'))
+ height = int_or_none(video.get('height'))
+ proto = video.get('proto')
+ if not proto:
+ if base:
+ if base.startswith('rtmp'):
+ proto = 'rtmp'
+ elif base.startswith('http'):
+ proto = 'http'
+ ext = video.get('ext')
+ if proto == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(src, video_id, ext))
+ elif proto == 'rtmp':
+ rtmp_count += 1
+ streamer = video.get('streamer') or base
+ formats.append({
+ 'url': streamer,
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ return formats
+
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()