X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=f43a0a569a3d90d555a27cece8ac3e68951c5106;hb=99b67fecc5ab6c57eada1e1678034dd71c57e338;hp=929dd1e97efd70e5699dc333d222fe7a97a8de9a;hpb=1c1cff6a525bc8fc506cf2c6eb8963abc3b1fcee;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 929dd1e97..f43a0a569 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import base64 +import datetime import hashlib import json import netrc @@ -15,11 +16,13 @@ from ..utils import ( compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, + compat_urlparse, compat_str, clean_html, compiled_regex_type, ExtractorError, + float_or_none, int_or_none, RegexNotFoundError, sanitize_filename, @@ -130,6 +133,8 @@ class InfoExtractor(object): by YoutubeDL if it's missing) categories: A list of categories that the video falls in, for example ["Sports", "Berlin"] + is_live: True, False, or None (=unknown). Whether this video is a + live stream that goes on instead of a fixed-length video. Unless mentioned otherwise, the fields should be Unicode strings. @@ -161,6 +166,14 @@ class InfoExtractor(object): cls._VALID_URL_RE = re.compile(cls._VALID_URL) return cls._VALID_URL_RE.match(url) is not None + @classmethod + def _match_id(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return m.group('id') + @classmethod def working(cls): """Getter method for _WORKING.""" @@ -638,7 +651,9 @@ class InfoExtractor(object): return formats - def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None): + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, + entry_protocol='m3u8', preference=None): + formats = [{ 'format_id': 'm3u8-meta', 'url': m3u8_url, @@ -649,6 +664,11 @@ class InfoExtractor(object): 'format_note': 'Quality selection URL', }] + format_url = lambda u: ( + u + if re.match(r'^https?://', u) + else compat_urlparse.urljoin(m3u8_url, u)) + m3u8_doc = self._download_webpage(m3u8_url, video_id) last_info = None kv_rex = re.compile( @@ -665,15 +685,17 @@ class InfoExtractor(object): continue else: if last_info is None: - formats.append({'url': line}) + formats.append({'url': format_url(line)}) continue tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) f = { 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), - 'url': line.strip(), + 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, + 'protocol': entry_protocol, + 'preference': preference, } codecs = last_info.get('CODECS') if codecs: @@ -693,6 +715,34 @@ class InfoExtractor(object): self._sort_formats(formats) return formats + def _live_title(self, name): + """ Generate the title for a live video """ + now = datetime.datetime.now() + now_str = now.strftime("%Y-%m-%d %H:%M") + return name + ' ' + now_str + + def _int(self, v, name, fatal=False, **kwargs): + res = int_or_none(v, **kwargs) + if 'get_attr' in kwargs: + print(getattr(v, kwargs['get_attr'])) + if res is None: + msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + if fatal: + raise ExtractorError(msg) + else: + self._downloader.report_warning(msg) + return res + + def _float(self, v, name, fatal=False, **kwargs): + res = float_or_none(v, **kwargs) + if res is None: + msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + if fatal: + raise ExtractorError(msg) + else: + self._downloader.report_warning(msg) + return res + class SearchInfoExtractor(InfoExtractor): """