[vgtv] Add new extractor

[youtube-dl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 10b0cbe695a68b6a3c3928a6c89125020a2fff3d..e68657314ecde5406ec2d27fef005f899341daf1 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,11 +1,12 @@
  import base64
  import hashlib
  import json
+import netrc
  import os
  import re
  import socket
  import sys
-import netrc
+import time
  import xml.etree.ElementTree
  
  from ..utils import (
@@ -92,8 +93,12 @@ class InfoExtractor(object):
                      unique, but available before title. Typically, id is
                      something like "4234987", title "Dancing naked mole rats",
                      and display_id "dancing-naked-mole-rats"
-    thumbnails:     A list of dictionaries (with the entries "resolution" and
-                    "url") for the varying thumbnails
+    thumbnails:     A list of dictionaries, with the following entries:
+                        * "url"
+                        * "width" (optional, int)
+                        * "height" (optional, int)
+                        * "resolution" (optional, string "{width}x{height"},
+                                        deprecated)
      thumbnail:      Full URL to a video thumbnail image.
      description:    One-line video description.
      uploader:       Full name of the video uploader.
@@ -113,6 +118,8 @@ class InfoExtractor(object):
      webpage_url:    The url to the video webpage, if given to youtube-dl it
                      should allow to get the same result again. (It will be set
                      by YoutubeDL if it's missing)
+    categories:     A list of categories that the video falls in, for example
+                    ["Sports", "Berlin"]
  
      Unless mentioned otherwise, the fields should be Unicode strings.
  
@@ -242,10 +249,11 @@ class InfoExtractor(object):
                  url = url_or_request.get_full_url()
              except AttributeError:
                  url = url_or_request
-            if len(url) > 200:
-                h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
-                url = url[:200 - len(h)] + h
-            raw_filename = ('%s_%s.dump' % (video_id, url))
+            basen = '%s_%s' % (video_id, url)
+            if len(basen) > 240:
+                h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+                basen = basen[:240 - len(h)] + h
+            raw_filename = basen + '.dump'
              filename = sanitize_filename(raw_filename, restricted=True)
              self.to_screen(u'Saving request to ' + filename)
              with open(filename, 'wb') as outf:
@@ -452,14 +460,17 @@ class InfoExtractor(object):
          if secure: regexes = self._og_regexes('video:secure_url') + regexes
          return self._html_search_regex(regexes, html, name, **kargs)
  
-    def _html_search_meta(self, name, html, display_name=None, fatal=False):
+    def _og_search_url(self, html, **kargs):
+        return self._og_search_property('url', html, **kargs)
+
+    def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
          if display_name is None:
              display_name = name
          return self._html_search_regex(
              r'''(?ix)<meta
                      (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
                      [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
-            html, display_name, fatal=fatal)
+            html, display_name, fatal=fatal, **kwargs)
  
      def _dc_search_uploader(self, html):
          return self._html_search_meta('dc.creator', html, 'uploader')
@@ -555,6 +566,23 @@ class InfoExtractor(object):
              if self._downloader.params.get('prefer_insecure', False)
              else 'https:')
  
+    def _proto_relative_url(self, url, scheme=None):
+        if url is None:
+            return url
+        if url.startswith('//'):
+            if scheme is None:
+                scheme = self.http_scheme()
+            return scheme + url
+        else:
+            return url
+
+    def _sleep(self, timeout, video_id, msg_template=None):
+        if msg_template is None:
+            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+        msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+        self.to_screen(msg)
+        time.sleep(timeout)
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
@@ -598,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor):
      @property
      def SEARCH_KEY(self):
          return self._SEARCH_KEY
-