X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fcommon.py;h=e8366f7f91c663f1f7bdf70db0588016f49da3de;hb=8230018c20595a22e636b834ebb522a6a85d0d8b;hp=929dd1e97efd70e5699dc333d222fe7a97a8de9a;hpb=1c1cff6a525bc8fc506cf2c6eb8963abc3b1fcee;p=youtube-dl

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 929dd1e97..e8366f7f9 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 import base64
+import datetime
 import hashlib
 import json
 import netrc
@@ -15,11 +16,13 @@ from ..utils import (
     compat_http_client,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
+    compat_urlparse,
     compat_str,
 
     clean_html,
     compiled_regex_type,
     ExtractorError,
+    float_or_none,
     int_or_none,
     RegexNotFoundError,
     sanitize_filename,
@@ -130,9 +133,13 @@ class InfoExtractor(object):
                     by YoutubeDL if it's missing)
     categories:     A list of categories that the video falls in, for example
                     ["Sports", "Berlin"]
+    is_live:        True, False, or None (=unknown). Whether this video is a
+                    live stream that goes on instead of a fixed-length video.
 
     Unless mentioned otherwise, the fields should be Unicode strings.
 
+    Unless mentioned otherwise, None is equivalent to absence of information.
+
     Subclasses of this one should re-define the _real_initialize() and
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
@@ -161,6 +168,14 @@ class InfoExtractor(object):
             cls._VALID_URL_RE = re.compile(cls._VALID_URL)
         return cls._VALID_URL_RE.match(url) is not None
 
+    @classmethod
+    def _match_id(cls, url):
+        if '_VALID_URL_RE' not in cls.__dict__:
+            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+        m = cls._VALID_URL_RE.match(url)
+        assert m
+        return m.group('id')
+
     @classmethod
     def working(cls):
         """Getter method for _WORKING."""
@@ -266,6 +281,12 @@ class InfoExtractor(object):
             raw_filename = basen + '.dump'
             filename = sanitize_filename(raw_filename, restricted=True)
             self.to_screen('Saving request to ' + filename)
+            # Working around MAX_PATH limitation on Windows (see
+            # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+            if os.name == 'nt':
+                absfilepath = os.path.abspath(filename)
+                if len(absfilepath) > 259:
+                    filename = '\\\\?\\' + absfilepath
             with open(filename, 'wb') as outf:
                 outf.write(webpage_bytes)
 
@@ -321,7 +342,11 @@ class InfoExtractor(object):
         try:
             return json.loads(json_string)
         except ValueError as ve:
-            raise ExtractorError('Failed to download JSON', cause=ve)
+            errmsg = '%s: Failed to parse JSON ' % video_id
+            if fatal:
+                raise ExtractorError(errmsg, cause=ve)
+            else:
+                self.report_warning(errmsg + str(ve))
 
     def report_warning(self, msg, video_id=None):
         idstr = '' if video_id is None else '%s: ' % video_id
@@ -638,7 +663,9 @@ class InfoExtractor(object):
 
         return formats
 
-    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
+    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+                              entry_protocol='m3u8', preference=None):
+
         formats = [{
             'format_id': 'm3u8-meta',
             'url': m3u8_url,
@@ -649,6 +676,11 @@ class InfoExtractor(object):
             'format_note': 'Quality selection URL',
         }]
 
+        format_url = lambda u: (
+            u
+            if re.match(r'^https?://', u)
+            else compat_urlparse.urljoin(m3u8_url, u))
+
         m3u8_doc = self._download_webpage(m3u8_url, video_id)
         last_info = None
         kv_rex = re.compile(
@@ -665,15 +697,17 @@ class InfoExtractor(object):
                 continue
             else:
                 if last_info is None:
-                    formats.append({'url': line})
+                    formats.append({'url': format_url(line)})
                     continue
                 tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
 
                 f = {
                     'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
-                    'url': line.strip(),
+                    'url': format_url(line.strip()),
                     'tbr': tbr,
                     'ext': ext,
+                    'protocol': entry_protocol,
+                    'preference': preference,
                 }
                 codecs = last_info.get('CODECS')
                 if codecs:
@@ -693,6 +727,34 @@ class InfoExtractor(object):
         self._sort_formats(formats)
         return formats
 
+    def _live_title(self, name):
+        """ Generate the title for a live video """
+        now = datetime.datetime.now()
+        now_str = now.strftime("%Y-%m-%d %H:%M")
+        return name + ' ' + now_str
+
+    def _int(self, v, name, fatal=False, **kwargs):
+        res = int_or_none(v, **kwargs)
+        if 'get_attr' in kwargs:
+            print(getattr(v, kwargs['get_attr']))
+        if res is None:
+            msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+            if fatal:
+                raise ExtractorError(msg)
+            else:
+                self._downloader.report_warning(msg)
+        return res
+
+    def _float(self, v, name, fatal=False, **kwargs):
+        res = float_or_none(v, **kwargs)
+        if res is None:
+            msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+            if fatal:
+                raise ExtractorError(msg)
+            else:
+                self._downloader.report_warning(msg)
+        return res
+
 
 class SearchInfoExtractor(InfoExtractor):
     """