X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=dc508050413c8490882323e01d91ea3a3ba88c9a;hb=25a4c5a9ed59eca0241922363e83e61172527658;hp=1272834c515da2c8c29bc80e73c0cd81af7fe25c;hpb=297a564beeb20ca8b00d94f5707532110631f409;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1272834c5..dc5080504 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,10 +14,12 @@ import xml.etree.ElementTree from ..compat import ( compat_cookiejar, + compat_cookies, compat_HTTPError, compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, + compat_urllib_request, compat_urlparse, compat_str, ) @@ -65,7 +67,7 @@ class InfoExtractor(object): Potential fields: * url Mandatory. The URL of the video file - * ext Will be calculated from url if missing + * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). Calculated from the format_id, width, height. @@ -155,7 +157,7 @@ class InfoExtractor(object): lower to higher preference, each element is a dictionary with the "ext" entry and one of: * "data": The subtitles file contents - * "url": A url pointing to the subtitles file + * "url": A URL pointing to the subtitles file automatic_captions: Like 'subtitles', used by the YoutubeIE for automatically generated captions duration: Length of the video in seconds, as an integer. @@ -176,17 +178,18 @@ class InfoExtractor(object): Set to "root" to indicate that this is a comment to the original video. age_limit: Age restriction for the video, as an integer (years) - webpage_url: The url to the video webpage, if given to youtube-dl it + webpage_url: The URL to the video webpage, if given to youtube-dl it should allow to get the same result again. (It will be set by YoutubeDL if it's missing) categories: A list of categories that the video falls in, for example ["Sports", "Berlin"] + tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"] is_live: True, False, or None (=unknown). Whether this video is a live stream that goes on instead of a fixed-length video. start_time: Time in seconds where the reproduction should start, as - specified in the url. + specified in the URL. end_time: Time in seconds where the reproduction should end, as - specified in the url. + specified in the URL. Unless mentioned otherwise, the fields should be Unicode strings. @@ -505,7 +508,7 @@ class InfoExtractor(object): # Methods for following #608 @staticmethod def url_result(url, ie=None, video_id=None, video_title=None): - """Returns a url that points to a page that should be processed""" + """Returns a URL that points to a page that should be processed""" # TODO: ie should be the class used for getting the info video_info = {'_type': 'url', 'url': url, @@ -630,6 +633,12 @@ class InfoExtractor(object): template % (content_re, property_re), ] + @staticmethod + def _meta_regex(prop): + return r'''(?isx)]+(?:itemprop|name|property)=(["\']?)%s\1) + [^>]+?content=(["\'])(?P.*?)\2''' % re.escape(prop) + def _og_search_property(self, prop, html, name=None, **kargs): if name is None: name = 'OpenGraph %s' % prop @@ -639,7 +648,7 @@ class InfoExtractor(object): return unescapeHTML(escaped) def _og_search_thumbnail(self, html, **kargs): - return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs) + return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs) def _og_search_description(self, html, **kargs): return self._og_search_property('description', html, fatal=False, **kargs) @@ -660,9 +669,7 @@ class InfoExtractor(object): if display_name is None: display_name = name return self._html_search_regex( - r'''(?isx)]+(?:itemprop|name|property)=(["\']?)%s\1) - [^>]+?content=(["\'])(?P.*?)\2''' % re.escape(name), + self._meta_regex(name), html, display_name, fatal=fatal, group='content', **kwargs) def _dc_search_uploader(self, html): @@ -1069,6 +1076,12 @@ class InfoExtractor(object): None, '/', True, False, expire_time, '', None, None, None) self._downloader.cookiejar.set_cookie(cookie) + def _get_cookies(self, url): + """ Return a compat_cookies.SimpleCookie with the cookies for the url """ + req = compat_urllib_request.Request(url) + self._downloader.cookiejar.add_cookie_header(req) + return compat_cookies.SimpleCookie(req.get_header('Cookie')) + def get_testcases(self, include_onlymatching=False): t = getattr(self, '_TEST', None) if t: @@ -1120,7 +1133,7 @@ class InfoExtractor(object): class SearchInfoExtractor(InfoExtractor): """ Base class for paged search queries extractors. - They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query} + They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query} Instances should define _SEARCH_KEY and _MAX_RESULTS. """