X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=647720c8a14ed894f5e75eae73b1103f37d266f7;hb=64e7ad6045990f01b250b622b9934035f75da624;hp=db1ca9edb446568479d3770604b42ae66c2ddd75;hpb=db1f388878db8ce2ae6473a5447a5aa6c9ea86f1;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index db1ca9edb..647720c8a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -88,12 +88,18 @@ class InfoExtractor(object): The following fields are optional: + display_id An alternative identifier for the video, not necessarily + unique, but available before title. Typically, id is + something like "4234987", title "Dancing naked mole rats", + and display_id "dancing-naked-mole-rats" thumbnails: A list of dictionaries (with the entries "resolution" and "url") for the varying thumbnails thumbnail: Full URL to a video thumbnail image. description: One-line video description. uploader: Full name of the video uploader. + timestamp: UNIX timestamp of the moment the video became available. upload_date: Video upload date (YYYYMMDD). + If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. location: Physical location of the video. subtitles: The subtitle file contents as a dictionary in the format @@ -114,9 +120,6 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. - _real_extract() must return a *list* of information dictionaries as - described above. - Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ @@ -271,8 +274,11 @@ class InfoExtractor(object): def _download_json(self, url_or_request, video_id, note=u'Downloading JSON metadata', - errnote=u'Unable to download JSON metadata'): + errnote=u'Unable to download JSON metadata', + transform_source=None): json_string = self._download_webpage(url_or_request, video_id, note, errnote) + if transform_source: + json_string = transform_source(json_string) try: return json.loads(json_string) except ValueError as ve: @@ -399,7 +405,7 @@ class InfoExtractor(object): # Helper functions for extracting OpenGraph info @staticmethod def _og_regexes(prop): - content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')' + content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')' property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop) template = r']+?%s[^>]+?%s' return [ @@ -429,14 +435,14 @@ class InfoExtractor(object): if secure: regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) - def _html_search_meta(self, name, html, display_name=None): + def _html_search_meta(self, name, html, display_name=None, fatal=False): if display_name is None: display_name = name return self._html_search_regex( r'''(?ix)]+(?:itemprop|name|property)=["\']%s["\']) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), - html, display_name, fatal=False) + html, display_name, fatal=fatal) def _dc_search_uploader(self, html): return self._html_search_meta('dc.creator', html, 'uploader') @@ -465,7 +471,14 @@ class InfoExtractor(object): } return RATING_TABLE.get(rating.lower(), None) + def _twitter_search_player(self, html): + return self._html_search_meta('twitter:player', html, + 'twitter card player') + def _sort_formats(self, formats): + if not formats: + raise ExtractorError(u'No video formats found') + def _formats_key(f): # TODO remove the following workaround from ..utils import determine_ext