X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcommon.py;h=e8366f7f91c663f1f7bdf70db0588016f49da3de;hb=8230018c20595a22e636b834ebb522a6a85d0d8b;hp=929dd1e97efd70e5699dc333d222fe7a97a8de9a;hpb=1c1cff6a525bc8fc506cf2c6eb8963abc3b1fcee;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 929dd1e97..e8366f7f9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import base64 +import datetime import hashlib import json import netrc @@ -15,11 +16,13 @@ from ..utils import ( compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, + compat_urlparse, compat_str, clean_html, compiled_regex_type, ExtractorError, + float_or_none, int_or_none, RegexNotFoundError, sanitize_filename, @@ -130,9 +133,13 @@ class InfoExtractor(object): by YoutubeDL if it's missing) categories: A list of categories that the video falls in, for example ["Sports", "Berlin"] + is_live: True, False, or None (=unknown). Whether this video is a + live stream that goes on instead of a fixed-length video. Unless mentioned otherwise, the fields should be Unicode strings. + Unless mentioned otherwise, None is equivalent to absence of information. + Subclasses of this one should re-define the _real_initialize() and _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. @@ -161,6 +168,14 @@ class InfoExtractor(object): cls._VALID_URL_RE = re.compile(cls._VALID_URL) return cls._VALID_URL_RE.match(url) is not None + @classmethod + def _match_id(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return m.group('id') + @classmethod def working(cls): """Getter method for _WORKING.""" @@ -266,6 +281,12 @@ class InfoExtractor(object): raw_filename = basen + '.dump' filename = sanitize_filename(raw_filename, restricted=True) self.to_screen('Saving request to ' + filename) + # Working around MAX_PATH limitation on Windows (see + # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) + if os.name == 'nt': + absfilepath = os.path.abspath(filename) + if len(absfilepath) > 259: + filename = '\\\\?\\' + absfilepath with open(filename, 'wb') as outf: outf.write(webpage_bytes) @@ -321,7 +342,11 @@ class InfoExtractor(object): try: return json.loads(json_string) except ValueError as ve: - raise ExtractorError('Failed to download JSON', cause=ve) + errmsg = '%s: Failed to parse JSON ' % video_id + if fatal: + raise ExtractorError(errmsg, cause=ve) + else: + self.report_warning(errmsg + str(ve)) def report_warning(self, msg, video_id=None): idstr = '' if video_id is None else '%s: ' % video_id @@ -638,7 +663,9 @@ class InfoExtractor(object): return formats - def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None): + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, + entry_protocol='m3u8', preference=None): + formats = [{ 'format_id': 'm3u8-meta', 'url': m3u8_url, @@ -649,6 +676,11 @@ class InfoExtractor(object): 'format_note': 'Quality selection URL', }] + format_url = lambda u: ( + u + if re.match(r'^https?://', u) + else compat_urlparse.urljoin(m3u8_url, u)) + m3u8_doc = self._download_webpage(m3u8_url, video_id) last_info = None kv_rex = re.compile( @@ -665,15 +697,17 @@ class InfoExtractor(object): continue else: if last_info is None: - formats.append({'url': line}) + formats.append({'url': format_url(line)}) continue tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) f = { 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), - 'url': line.strip(), + 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, + 'protocol': entry_protocol, + 'preference': preference, } codecs = last_info.get('CODECS') if codecs: @@ -693,6 +727,34 @@ class InfoExtractor(object): self._sort_formats(formats) return formats + def _live_title(self, name): + """ Generate the title for a live video """ + now = datetime.datetime.now() + now_str = now.strftime("%Y-%m-%d %H:%M") + return name + ' ' + now_str + + def _int(self, v, name, fatal=False, **kwargs): + res = int_or_none(v, **kwargs) + if 'get_attr' in kwargs: + print(getattr(v, kwargs['get_attr'])) + if res is None: + msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + if fatal: + raise ExtractorError(msg) + else: + self._downloader.report_warning(msg) + return res + + def _float(self, v, name, fatal=False, **kwargs): + res = float_or_none(v, **kwargs) + if res is None: + msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + if fatal: + raise ExtractorError(msg) + else: + self._downloader.report_warning(msg) + return res + class SearchInfoExtractor(InfoExtractor): """