projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[extractor/common] Clarify preference key in formats
[youtube-dl]
/
youtube_dl
/
extractor
/
common.py
diff --git
a/youtube_dl/extractor/common.py
b/youtube_dl/extractor/common.py
index 56c54a5ce2627ecc9d488fbe12901c689fefeb3b..78f238f8428c5df0fce2dcc26f66b1301595e62b 100644
(file)
--- a/
youtube_dl/extractor/common.py
+++ b/
youtube_dl/extractor/common.py
@@
-66,14
+66,15
@@
class InfoExtractor(object):
* asr Audio sampling rate in Hertz
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* asr Audio sampling rate in Hertz
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
+ * container Name of the container format
* filesize The number of bytes, if known in advance
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
* filesize The number of bytes, if known in advance
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
- "http", "https", "rtsp", "rtmp" or so.
+ "http", "https", "rtsp", "rtmp"
, "m3u8"
or so.
* preference Order number of this format. If this field is
present and not None, the formats get sorted
* preference Order number of this format. If this field is
present and not None, the formats get sorted
- by this field.
+ by this field
, regardless of all other values
.
-1 for default (order by other properties),
-2 or smaller for less than default.
* quality Order number of the video quality of this
-1 for default (order by other properties),
-2 or smaller for less than default.
* quality Order number of the video quality of this
@@
-87,12
+88,18
@@
class InfoExtractor(object):
The following fields are optional:
The following fields are optional:
+ display_id An alternative identifier for the video, not necessarily
+ unique, but available before title. Typically, id is
+ something like "4234987", title "Dancing naked mole rats",
+ and display_id "dancing-naked-mole-rats"
thumbnails: A list of dictionaries (with the entries "resolution" and
"url") for the varying thumbnails
thumbnail: Full URL to a video thumbnail image.
description: One-line video description.
uploader: Full name of the video uploader.
thumbnails: A list of dictionaries (with the entries "resolution" and
"url") for the varying thumbnails
thumbnail: Full URL to a video thumbnail image.
description: One-line video description.
uploader: Full name of the video uploader.
+ timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
upload_date: Video upload date (YYYYMMDD).
+ If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
subtitles: The subtitle file contents as a dictionary in the format
uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
subtitles: The subtitle file contents as a dictionary in the format
@@
-113,9
+120,6
@@
class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
- _real_extract() must return a *list* of information dictionaries as
- described above.
-
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
"""
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
"""
@@
-221,6
+225,8
@@
class InfoExtractor(object):
webpage_bytes[:1024])
if m:
encoding = m.group(1).decode('ascii')
webpage_bytes[:1024])
if m:
encoding = m.group(1).decode('ascii')
+ elif webpage_bytes.startswith(b'\xff\xfe'):
+ encoding = 'utf-16'
else:
encoding = 'utf-8'
if self._downloader.params.get('dump_intermediate_pages', False):
else:
encoding = 'utf-8'
if self._downloader.params.get('dump_intermediate_pages', False):
@@
-237,7
+243,7
@@
class InfoExtractor(object):
except AttributeError:
url = url_or_request
if len(url) > 200:
except AttributeError:
url = url_or_request
if len(url) > 200:
- h =
hashlib.md5(url
).hexdigest()
+ h =
u'___' + hashlib.md5(url.encode('utf-8')
).hexdigest()
url = url[:200 - len(h)] + h
raw_filename = ('%s_%s.dump' % (video_id, url))
filename = sanitize_filename(raw_filename, restricted=True)
url = url[:200 - len(h)] + h
raw_filename = ('%s_%s.dump' % (video_id, url))
filename = sanitize_filename(raw_filename, restricted=True)
@@
-268,8
+274,11
@@
class InfoExtractor(object):
def _download_json(self, url_or_request, video_id,
note=u'Downloading JSON metadata',
def _download_json(self, url_or_request, video_id,
note=u'Downloading JSON metadata',
- errnote=u'Unable to download JSON metadata'):
+ errnote=u'Unable to download JSON metadata',
+ transform_source=None):
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
+ if transform_source:
+ json_string = transform_source(json_string)
try:
return json.loads(json_string)
except ValueError as ve:
try:
return json.loads(json_string)
except ValueError as ve:
@@
-396,7
+405,7
@@
class InfoExtractor(object):
# Helper functions for extracting OpenGraph info
@staticmethod
def _og_regexes(prop):
# Helper functions for extracting OpenGraph info
@staticmethod
def _og_regexes(prop):
- content_re = r'content=(?:"([^>]+?)"|\'(
.
+?)\')'
+ content_re = r'content=(?:"([^>]+?)"|\'(
[^>]
+?)\')'
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
@@
-426,14
+435,14
@@
class InfoExtractor(object):
if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
- def _html_search_meta(self, name, html, display_name=None):
+ def _html_search_meta(self, name, html, display_name=None
, fatal=False
):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
- html, display_name, fatal=
False
)
+ html, display_name, fatal=
fatal
)
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
@@
-462,7
+471,14
@@
class InfoExtractor(object):
}
return RATING_TABLE.get(rating.lower(), None)
}
return RATING_TABLE.get(rating.lower(), None)
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
def _sort_formats(self, formats):
def _sort_formats(self, formats):
+ if not formats:
+ raise ExtractorError(u'No video formats found')
+
def _formats_key(f):
# TODO remove the following workaround
from ..utils import determine_ext
def _formats_key(f):
# TODO remove the following workaround
from ..utils import determine_ext