X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=3c2d46dd5c8ee780a04cd0f3fedb05e33707b71c;hb=9e1a5b845586a0a5431fb72467142046d8571e6f;hp=7f627c44c49351d3f30368aa51a6fb4734dabadd;hpb=aff2f4f4f56e15976c539211def26236a4cd55ef;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7f627c44c..3c2d46dd5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -43,7 +43,11 @@ class InfoExtractor(object): information possibly downloading the video to the file system, among other possible outcomes. - The dictionaries must include the following fields: + The type field determines the the type of the result. + By far the most common value (and the default if _type is missing) is + "video", which indicates a single video. + + For a video, the dictionaries must include the following fields: id: Video identifier. title: Video title, unescaped. @@ -151,6 +155,38 @@ class InfoExtractor(object): Unless mentioned otherwise, None is equivalent to absence of information. + + _type "playlist" indicates multiple videos. + There must be a key "entries", which is a list or a PagedList object, each + element of which is a valid dictionary under this specfication. + + Additionally, playlists can have "title" and "id" attributes with the same + semantics as videos (see above). + + + _type "multi_video" indicates that there are multiple videos that + form a single show, for examples multiple acts of an opera or TV episode. + It must have an entries key like a playlist and contain all the keys + required for a video at the same time. + + + _type "url" indicates that the video must be extracted from another + location, possibly by a different extractor. Its only required key is: + "url" - the next URL to extract. + + Additionally, it may have properties believed to be identical to the + resolved entity, for example "title" if the title of the referred video is + known ahead of time. + + + _type "url_transparent" entities have the same specification as "url", but + indicate that the given additional information is more precise than the one + associated with the resolved URL. + This is useful when a site employs a video service that hosts the video and + its technical metadata, but that video service does not embed a useful + title, description etc. + + Subclasses of this one should re-define the _real_initialize() and _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. @@ -387,17 +423,18 @@ class InfoExtractor(object): """Report attempt to log in.""" self.to_screen('Logging in') - #Methods for following #608 + # Methods for following #608 @staticmethod def url_result(url, ie=None, video_id=None): """Returns a url that points to a page that should be processed""" - #TODO: ie should be the class used for getting the info + # TODO: ie should be the class used for getting the info video_info = {'_type': 'url', 'url': url, 'ie_key': ie} if video_id is not None: video_info['id'] = video_id return video_info + @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None): """Returns a playlist""" @@ -441,7 +478,7 @@ class InfoExtractor(object): raise RegexNotFoundError('Unable to extract %s' % _name) else: self._downloader.report_warning('unable to extract %s; ' - 'please report this issue on http://yt-dl.org/bug' % _name) + 'please report this issue on http://yt-dl.org/bug' % _name) return None def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): @@ -481,7 +518,7 @@ class InfoExtractor(object): raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError) as err: self._downloader.report_warning('parsing .netrc: %s' % compat_str(err)) - + return (username, password) def _get_tfa_info(self): @@ -575,7 +612,7 @@ class InfoExtractor(object): def _twitter_search_player(self, html): return self._html_search_meta('twitter:player', html, - 'twitter card player') + 'twitter card player') def _sort_formats(self, formats): if not formats: