Merge branch 'master' into opener-to-ydl
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 24 Nov 2013 14:18:44 +0000 (15:18 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 24 Nov 2013 14:18:44 +0000 (15:18 +0100)
1  2 
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/common.py
youtube_dl/utils.py

diff --combined youtube_dl/YoutubeDL.py
index 0a845a344baa259151347b41966fb3ce23dedf4b,d7e2417acf507e21a0b7644e11b220cc1c0c9e83..87eb1a0b37fd5342b9a50d55109a2efe29a8c2fb
@@@ -7,10 -7,8 +7,10 @@@ import errn
  import io
  import json
  import os
 +import platform
  import re
  import shutil
 +import subprocess
  import socket
  import sys
  import time
@@@ -20,7 -18,6 +20,7 @@@ if os.name == 'nt'
      import ctypes
  
  from .utils import (
 +    compat_cookiejar,
      compat_http_client,
      compat_print,
      compat_str,
      encodeFilename,
      ExtractorError,
      locked_file,
 +    make_HTTPS_handler,
      MaxDownloadsReached,
      PostProcessingError,
 +    platform_name,
      preferredencoding,
      SameFileError,
      sanitize_filename,
      UnavailableVideoError,
      write_json_file,
      write_string,
 +    YoutubeDLHandler,
  )
  from .extractor import get_info_extractor, gen_extractors
  from .FileDownloader import FileDownloader
 +from .version import __version__
  
  
  class YoutubeDL(object):
      playlistend:       Playlist item to end at.
      matchtitle:        Download only matching titles.
      rejecttitle:       Reject downloads for matching titles.
+     logger:            Log messages to a logging.Logger instance.
      logtostderr:       Log messages to stderr instead of stdout.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
      downloadarchive:   File name of a file where all downloads are recorded.
                         Videos already present in the file are not downloaded
                         again.
 +    cookiefile:        File name where cookies should be read from and dumped to.
 +    nocheckcertificate:Do not verify SSL certificates
 +    proxy:             URL of the proxy server to use
  
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
          if '%(stitle)s' in self.params['outtmpl']:
              self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
  
 +        self._setup_opener()
 +
      def add_info_extractor(self, ie):
          """Add an InfoExtractor object to the end of the list."""
          self._ies.append(ie)
  
      def to_screen(self, message, skip_eol=False):
          """Print message to stdout if not in quiet mode."""
-         if not self.params.get('quiet', False):
+         if self.params.get('logger'):
+             self.params['logger'].debug(message)
+         elif not self.params.get('quiet', False):
              terminator = [u'\n', u''][skip_eol]
              output = message + terminator
              write_string(output, self._screen_file)
      def to_stderr(self, message):
          """Print message to stderr."""
          assert type(message) == type(u'')
-         output = message + u'\n'
-         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
-             output = output.encode(preferredencoding())
-         sys.stderr.write(output)
+         if self.params.get('logger'):
+             self.params['logger'].error(message)
+         else:
+             output = message + u'\n'
+             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+                 output = output.encode(preferredencoding())
+             sys.stderr.write(output)
  
      def to_console_title(self, message):
          if not self.params.get('consoletitle', False):
  
      def __exit__(self, *args):
          self.restore_console_title()
 +    
 +        if self.params.get('cookiefile') is not None:
 +            self.cookiejar.save()
  
      def fixed_template(self):
          """Checks if the output template is fixed."""
      def _match_entry(self, info_dict):
          """ Returns None iff the file should be downloaded """
  
-         title = info_dict['title']
-         matchtitle = self.params.get('matchtitle', False)
-         if matchtitle:
-             if not re.search(matchtitle, title, re.IGNORECASE):
-                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
-         rejecttitle = self.params.get('rejecttitle', False)
-         if rejecttitle:
-             if re.search(rejecttitle, title, re.IGNORECASE):
-                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+         if 'title' in info_dict:
+             # This can happen when we're just evaluating the playlist
+             title = info_dict['title']
+             matchtitle = self.params.get('matchtitle', False)
+             if matchtitle:
+                 if not re.search(matchtitle, title, re.IGNORECASE):
+                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+             rejecttitle = self.params.get('rejecttitle', False)
+             if rejecttitle:
+                 if re.search(rejecttitle, title, re.IGNORECASE):
+                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
          date = info_dict.get('upload_date', None)
          if date is not None:
              dateRange = self.params.get('daterange', DateRange())
              if age_limit < info_dict.get('age_limit', 0):
                  return u'Skipping "' + title + '" because it is age restricted'
          if self.in_download_archive(info_dict):
-             return (u'%(title)s has already been recorded in archive'
-                     % info_dict)
+             return (u'%s has already been recorded in archive'
+                     % info_dict.get('title', info_dict.get('id', u'video')))
          return None
  
      @staticmethod
                                       ie_key=ie_result.get('ie_key'),
                                       extra_info=extra_info)
          elif result_type == 'playlist':
-             self.add_extra_info(ie_result, extra_info)
              # We process each entry in the playlist
              playlist = ie_result.get('title', None) or ie_result.get('id', None)
              self.to_screen(u'[download] Downloading playlist: %s' % playlist)
                      'webpage_url': ie_result['webpage_url'],
                      'extractor_key': ie_result['extractor_key'],
                  }
+                 reason = self._match_entry(entry)
+                 if reason is not None:
+                     self.to_screen(u'[download] ' + reason)
+                     continue
                  entry_result = self.process_ie_result(entry,
                                                        download=download,
                                                        extra_info=extra)
  
          # Forced printings
          if self.params.get('forcetitle', False):
-             compat_print(info_dict['title'])
+             compat_print(info_dict['fulltitle'])
          if self.params.get('forceid', False):
              compat_print(info_dict['id'])
          if self.params.get('forceurl', False):
          for url in url_list:
              try:
                  #It also downloads the videos
 -                videos = self.extract_info(url)
 +                self.extract_info(url)
              except UnavailableVideoError:
                  self.report_error(u'unable to download video')
              except MaxDownloadsReached:
          fn = self.params.get('download_archive')
          if fn is None:
              return False
-         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+         extractor = info_dict.get('extractor_id')
+         if extractor is None:
+             if 'id' in info_dict:
+                 extractor = info_dict.get('ie_key')  # key in a playlist
+         if extractor is None:
+             return False  # Incomplete video information
+         # Future-proof against any change in case
+         # and backwards compatibility with prior versions
+         extractor = extractor.lower()
+         vid_id = extractor + u' ' + info_dict['id']
          try:
              with locked_file(fn, 'r', encoding='utf-8') as archive_file:
                  for line in archive_file:
              '_resolution': u'resolution', 'format_note': u'note'})
          self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
                         (info_dict['id'], header_line, u"\n".join(formats_s)))
 +
 +    def urlopen(self, req):
 +        """ Start an HTTP download """
 +        return self._opener.open(req)
 +
 +    def print_debug_header(self):
 +        if not self.params.get('verbose'):
 +            return
 +        write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
 +        try:
 +            sp = subprocess.Popen(
 +                ['git', 'rev-parse', '--short', 'HEAD'],
 +                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 +                cwd=os.path.dirname(os.path.abspath(__file__)))
 +            out, err = sp.communicate()
 +            out = out.decode().strip()
 +            if re.match('[0-9a-f]+', out):
 +                write_string(u'[debug] Git HEAD: ' + out + u'\n')
 +        except:
 +            try:
 +                sys.exc_clear()
 +            except:
 +                pass
 +        write_string(u'[debug] Python version %s - %s' %
 +                     (platform.python_version(), platform_name()) + u'\n')
 +
 +        proxy_map = {}
 +        for handler in self._opener.handlers:
 +            if hasattr(handler, 'proxies'):
 +                proxy_map.update(handler.proxies)
 +        write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
 +
 +    def _setup_opener(self, timeout=300):
 +        opts_cookiefile = self.params.get('cookiefile')
 +        opts_proxy = self.params.get('proxy')
 +
 +        if opts_cookiefile is None:
 +            self.cookiejar = compat_cookiejar.CookieJar()
 +        else:
 +            self.cookiejar = compat_cookiejar.MozillaCookieJar(
 +                opts_cookiefile)
 +            if os.access(opts_cookiefile, os.R_OK):
 +                self.cookiejar.load()
 +
 +        cookie_processor = compat_urllib_request.HTTPCookieProcessor(
 +            self.cookiejar)
 +        if opts_proxy is not None:
 +            if opts_proxy == '':
 +                proxies = {}
 +            else:
 +                proxies = {'http': opts_proxy, 'https': opts_proxy}
 +        else:
 +            proxies = compat_urllib_request.getproxies()
 +            # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
 +            if 'http' in proxies and 'https' not in proxies:
 +                proxies['https'] = proxies['http']
 +        proxy_handler = compat_urllib_request.ProxyHandler(proxies)
 +        https_handler = make_HTTPS_handler(
 +            self.params.get('nocheckcertificate', False))
 +        opener = compat_urllib_request.build_opener(
 +            https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
 +        # Delete the default user-agent header, which would otherwise apply in
 +        # cases where our custom HTTP handler doesn't come into play
 +        # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
 +        opener.addheaders = []
 +        self._opener = opener
 +
 +        # TODO remove this global modification
 +        compat_urllib_request.install_opener(opener)
 +        socket.setdefaulttimeout(timeout)
diff --combined youtube_dl/__init__.py
index 27886593b4f40a2a4108b867bb7ca71b26379e93,19904dbfdd8f0a7427e31aa0e8bc547573a4b4f4..1f15c7eaa03acc63a5d3cbf1e244b292a053344e
@@@ -35,40 -35,51 +35,41 @@@ __authors__  = 
      'Jelle van der Waa',
      'Marcin Cieślak',
      'Anton Larionov',
+     'Takuya Tsuchida',
  )
  
  __license__ = 'Public Domain'
  
  import codecs
 -import collections
  import getpass
  import optparse
  import os
  import random
  import re
  import shlex
 -import socket
  import subprocess
  import sys
 -import traceback
 -import platform
  
  
  from .utils import (
 -    compat_cookiejar,
      compat_print,
 -    compat_str,
 -    compat_urllib_request,
      DateRange,
      decodeOption,
      determine_ext,
      DownloadError,
      get_cachedir,
 -    make_HTTPS_handler,
      MaxDownloadsReached,
 -    platform_name,
      preferredencoding,
      SameFileError,
      std_headers,
      write_string,
 -    YoutubeDLHandler,
  )
  from .update import update_self
 -from .version import __version__
  from .FileDownloader import (
      FileDownloader,
  )
  from .extractor import gen_extractors
 +from .version import __version__
  from .YoutubeDL import YoutubeDL
  from .PostProcessor import (
      FFmpegMetadataPP,
@@@ -441,6 -452,19 +442,6 @@@ def _real_main(argv=None)
  
      parser, opts, args = parseOpts(argv)
  
 -    # Open appropriate CookieJar
 -    if opts.cookiefile is None:
 -        jar = compat_cookiejar.CookieJar()
 -    else:
 -        try:
 -            jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
 -            if os.access(opts.cookiefile, os.R_OK):
 -                jar.load()
 -        except (IOError, OSError) as err:
 -            if opts.verbose:
 -                traceback.print_exc()
 -            write_string(u'ERROR: unable to open cookie file\n')
 -            sys.exit(101)
      # Set user agent
      if opts.user_agent is not None:
          std_headers['User-Agent'] = opts.user_agent
      all_urls = batchurls + args
      all_urls = [url.strip() for url in all_urls]
  
 -    opener = _setup_opener(jar=jar, opts=opts)
 -
      extractors = gen_extractors()
  
      if opts.list_extractors:
      if opts.retries is not None:
          try:
              opts.retries = int(opts.retries)
 -        except (TypeError, ValueError) as err:
 +        except (TypeError, ValueError):
              parser.error(u'invalid retry count specified')
      if opts.buffersize is not None:
          numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
          opts.playliststart = int(opts.playliststart)
          if opts.playliststart <= 0:
              raise ValueError(u'Playlist start must be positive')
 -    except (TypeError, ValueError) as err:
 +    except (TypeError, ValueError):
          parser.error(u'invalid playlist start number specified')
      try:
          opts.playlistend = int(opts.playlistend)
          if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
              raise ValueError(u'Playlist end must be greater than playlist start')
 -    except (TypeError, ValueError) as err:
 +    except (TypeError, ValueError):
          parser.error(u'invalid playlist end number specified')
      if opts.extractaudio:
          if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
          'youtube_print_sig_code': opts.youtube_print_sig_code,
          'age_limit': opts.age_limit,
          'download_archive': opts.download_archive,
 +        'cookiefile': opts.cookiefile,
 +        'nocheckcertificate': opts.no_check_certificate,
      }
  
      with YoutubeDL(ydl_opts) as ydl:
 -        if opts.verbose:
 -            write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
 -            try:
 -                sp = subprocess.Popen(
 -                    ['git', 'rev-parse', '--short', 'HEAD'],
 -                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 -                    cwd=os.path.dirname(os.path.abspath(__file__)))
 -                out, err = sp.communicate()
 -                out = out.decode().strip()
 -                if re.match('[0-9a-f]+', out):
 -                    write_string(u'[debug] Git HEAD: ' + out + u'\n')
 -            except:
 -                try:
 -                    sys.exc_clear()
 -                except:
 -                    pass
 -            write_string(u'[debug] Python version %s - %s' %
 -                         (platform.python_version(), platform_name()) + u'\n')
 -
 -            proxy_map = {}
 -            for handler in opener.handlers:
 -                if hasattr(handler, 'proxies'):
 -                    proxy_map.update(handler.proxies)
 -            write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
 -
 +        ydl.print_debug_header()
          ydl.add_default_info_extractors()
  
          # PostProcessors
              ydl.to_screen(u'--max-download limit reached, aborting.')
              retcode = 101
  
 -    # Dump cookie jar if requested
 -    if opts.cookiefile is not None:
 -        try:
 -            jar.save()
 -        except (IOError, OSError):
 -            sys.exit(u'ERROR: unable to save cookie jar')
 -
      sys.exit(retcode)
  
  
 -def _setup_opener(jar=None, opts=None, timeout=300):
 -    if opts is None:
 -        FakeOptions = collections.namedtuple(
 -            'FakeOptions', ['proxy', 'no_check_certificate'])
 -        opts = FakeOptions(proxy=None, no_check_certificate=False)
 -
 -    cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
 -    if opts.proxy is not None:
 -        if opts.proxy == '':
 -            proxies = {}
 -        else:
 -            proxies = {'http': opts.proxy, 'https': opts.proxy}
 -    else:
 -        proxies = compat_urllib_request.getproxies()
 -        # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
 -        if 'http' in proxies and 'https' not in proxies:
 -            proxies['https'] = proxies['http']
 -    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
 -    https_handler = make_HTTPS_handler(opts)
 -    opener = compat_urllib_request.build_opener(
 -        https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
 -    # Delete the default user-agent header, which would otherwise apply in
 -    # cases where our custom HTTP handler doesn't come into play
 -    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
 -    opener.addheaders = []
 -    compat_urllib_request.install_opener(opener)
 -    socket.setdefaulttimeout(timeout)
 -    return opener
 -
 -
  def main(argv=None):
      try:
          _real_main(argv)
index 423e54ceaa2544d62d2d266f5a499caef5293c72,482a231ec0dc4632daabb4526253dae89bea64f8..6ec835f8af56c61af620a389f8706125c21c8099
@@@ -4,10 -4,12 +4,11 @@@ import r
  import socket
  import sys
  import netrc
+ import xml.etree.ElementTree
  
  from ..utils import (
      compat_http_client,
      compat_urllib_error,
 -    compat_urllib_request,
      compat_str,
  
      clean_html,
@@@ -18,7 -20,6 +19,7 @@@
      unescapeHTML,
  )
  
 +
  class InfoExtractor(object):
      """Information Extractor class.
  
          elif note is not False:
              self.to_screen(u'%s: %s' % (video_id, note))
          try:
 -            return compat_urllib_request.urlopen(url_or_request)
 +            return self._downloader.urlopen(url_or_request)
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              if errnote is None:
                  errnote = u'Unable to download webpage'
          """ Returns the data of the page as a string """
          return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
  
+     def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
+         """Return the xml as an xml.etree.ElementTree.Element"""
+         xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
      def to_screen(self, msg):
          """Print msg to screen, prefixing it with '[ie_name]'"""
          self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
          self.to_screen(u'Logging in')
  
      #Methods for following #608
-     def url_result(self, url, ie=None):
+     def url_result(self, url, ie=None, video_id=None):
          """Returns a url that points to a page that should be processed"""
          #TODO: ie should be the class used for getting the info
          video_info = {'_type': 'url',
                        'url': url,
                        'ie_key': ie}
+         if video_id is not None:
+             video_info['id'] = video_id
          return video_info
      def playlist_result(self, entries, playlist_id=None, playlist_title=None):
          """Returns a playlist"""
diff --combined youtube_dl/utils.py
index 0d2b7bd10e1f385bf6515c0faae6588a1c7f31ec,34b3d19e05dcc23cc69930e54ddd9ddf41af1142..317aee2b572f31effd654bfefc95601f77b6482b
@@@ -12,6 -12,7 +12,7 @@@ import o
  import pipes
  import platform
  import re
+ import ssl
  import socket
  import sys
  import traceback
@@@ -535,17 -536,35 +536,34 @@@ def formatSeconds(secs)
      else:
          return '%d' % secs
  
 -
 -def make_HTTPS_handler(opts):
 +def make_HTTPS_handler(opts_no_check_certificate):
-     if sys.version_info < (3,2):
-         # Python's 2.x handler is very simplistic
-         return compat_urllib_request.HTTPSHandler()
+     if sys.version_info < (3, 2):
+         import httplib
+         class HTTPSConnectionV3(httplib.HTTPSConnection):
+             def __init__(self, *args, **kwargs):
+                 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
+             def connect(self):
+                 sock = socket.create_connection((self.host, self.port), self.timeout)
+                 if self._tunnel_host:
+                     self.sock = sock
+                     self._tunnel()
+                 try:
+                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
+                 except ssl.SSLError as e:
+                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
+         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
+             def https_open(self, req):
+                 return self.do_open(HTTPSConnectionV3, req)
+         return HTTPSHandlerV3()
      else:
-         import ssl
-         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+         context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
          context.set_default_verify_paths()
          
          context.verify_mode = (ssl.CERT_NONE
 -                               if opts.no_check_certificate
 +                               if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          return compat_urllib_request.HTTPSHandler(context=context)