[downloader/http] Simplify

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 4c8196d082fa9cfffbb45a1c5f5684664ecc1a80..702a6ad50b6c6bf2d3f3bfbd8c873cb3a64c8e7b 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -49,6 +49,7 @@ from .utils import (
      ExtractorError,
      format_bytes,
      formatSeconds,
+    HEADRequest,
      locked_file,
      make_HTTPS_handler,
      MaxDownloadsReached,
@@ -118,7 +119,7 @@ class YoutubeDL(object):
  
      username:          Username for authentication purposes.
      password:          Password for authentication purposes.
-    videopassword:     Password for acces a video.
+    videopassword:     Password for accessing a video.
      usenetrc:          Use netrc for authentication instead.
      verbose:           Print additional info to stdout.
      quiet:             Do not print messages to stdout.
@@ -138,6 +139,7 @@ class YoutubeDL(object):
      outtmpl:           Template for output names.
      restrictfilenames: Do not allow "&" and spaces in file names
      ignoreerrors:      Do not stop on download errors.
+    force_generic_extractor: Force downloader to use the generic extractor
      nooverwrites:      Prevent overwriting files.
      playliststart:     Playlist item to start at.
      playlistend:       Playlist item to end at.
@@ -260,6 +262,8 @@ class YoutubeDL(object):
      The following options are used by the post processors:
      prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                         otherwise prefer avconv.
+    postprocessor_args: A list of additional command-line arguments for the
+                        postprocessor.
      """
  
      params = None
@@ -625,13 +629,16 @@ class YoutubeDL(object):
              info_dict.setdefault(key, value)
  
      def extract_info(self, url, download=True, ie_key=None, extra_info={},
-                     process=True):
+                     process=True, force_generic_extractor=False):
          '''
          Returns a list with a dictionary for each video we find.
          If 'download', also downloads the videos.
          extra_info is a dict containing the extra values to add to each result
          '''
  
+        if not ie_key and force_generic_extractor:
+            ie_key = 'Generic'
+
          if ie_key:
              ies = [self.get_info_extractor(ie_key)]
          else:
@@ -759,7 +766,9 @@ class YoutubeDL(object):
              if isinstance(ie_entries, list):
                  n_all_entries = len(ie_entries)
                  if playlistitems:
-                    entries = [ie_entries[i - 1] for i in playlistitems]
+                    entries = [
+                        ie_entries[i - 1] for i in playlistitems
+                        if -n_all_entries <= i - 1 < n_all_entries]
                  else:
                      entries = ie_entries[playliststart:playlistend]
                  n_entries = len(entries)
@@ -921,8 +930,9 @@ class YoutubeDL(object):
                  if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
              if audiovideo_formats:
                  return audiovideo_formats[format_idx]
-            # for audio only urls, select the best/worst audio format
-            elif all(f.get('acodec') != 'none' for f in available_formats):
+            # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
+            elif (all(f.get('acodec') != 'none' for f in available_formats) or
+                  all(f.get('vcodec') != 'none' for f in available_formats)):
                  return available_formats[format_idx]
          elif format_spec == 'bestaudio':
              audio_formats = [
@@ -1000,7 +1010,7 @@ class YoutubeDL(object):
                  t.get('preference'), t.get('width'), t.get('height'),
                  t.get('id'), t.get('url')))
              for i, t in enumerate(thumbnails):
-                if 'width' in t and 'height' in t:
+                if t.get('width') and t.get('height'):
                      t['resolution'] = '%dx%d' % (t['width'], t['height'])
                  if t.get('id') is None:
                      t['id'] = '%d' % i
@@ -1012,13 +1022,13 @@ class YoutubeDL(object):
              info_dict['display_id'] = info_dict['id']
  
          if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
-            # Working around negative timestamps in Windows
-            # (see http://bugs.python.org/issue1646728)
-            if info_dict['timestamp'] < 0 and os.name == 'nt':
-                info_dict['timestamp'] = 0
-            upload_date = datetime.datetime.utcfromtimestamp(
-                info_dict['timestamp'])
-            info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+            # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+            # see http://bugs.python.org/issue1646728)
+            try:
+                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
+                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+            except (ValueError, OverflowError, OSError):
+                pass
  
          if self.params.get('listsubtitles', False):
              if 'automatic_captions' in info_dict:
@@ -1029,12 +1039,6 @@ class YoutubeDL(object):
              info_dict['id'], info_dict.get('subtitles'),
              info_dict.get('automatic_captions'))
  
-        # This extractors handle format selection themselves
-        if info_dict['extractor'] in ['Youku']:
-            if download:
-                self.process_info(info_dict)
-            return info_dict
-
          # We now pick which formats have to be downloaded
          if info_dict.get('formats') is None:
              # There's only one format available
@@ -1045,6 +1049,8 @@ class YoutubeDL(object):
          if not formats:
              raise ExtractorError('No video formats found!')
  
+        formats_dict = {}
+
          # We check that all the formats have the format and format_id fields
          for i, format in enumerate(formats):
              if 'url' not in format:
@@ -1052,6 +1058,18 @@ class YoutubeDL(object):
  
              if format.get('format_id') is None:
                  format['format_id'] = compat_str(i)
+            format_id = format['format_id']
+            if format_id not in formats_dict:
+                formats_dict[format_id] = []
+            formats_dict[format_id].append(format)
+
+        # Make sure all formats have unique format_id
+        for format_id, ambiguous_formats in formats_dict.items():
+            if len(ambiguous_formats) > 1:
+                for i, format in enumerate(ambiguous_formats):
+                    format['format_id'] = '%s-%d' % (format_id, i)
+
+        for i, format in enumerate(formats):
              if format.get('format') is None:
                  format['format'] = '{id} - {res}{note}'.format(
                      id=format['format_id'],
@@ -1086,7 +1104,8 @@ class YoutubeDL(object):
          if req_format is None:
              req_format_list = []
              if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    info_dict['extractor'] in ['youtube', 'ted']):
+                    info_dict['extractor'] in ['youtube', 'ted'] and
+                    not info_dict.get('is_live')):
                  merger = FFmpegMergerPP(self)
                  if merger.available and merger.can_merge():
                      req_format_list.append('bestvideo+bestaudio')
@@ -1366,7 +1385,7 @@ class YoutubeDL(object):
                          postprocessors = []
                          self.report_warning('You have requested multiple '
                                              'formats but ffmpeg or avconv are not installed.'
-                                            ' The formats won\'t be merged')
+                                            ' The formats won\'t be merged.')
                      else:
                          postprocessors = [merger]
  
@@ -1393,8 +1412,8 @@ class YoutubeDL(object):
                      requested_formats = info_dict['requested_formats']
                      if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
                          info_dict['ext'] = 'mkv'
-                        self.report_warning('You have requested formats incompatible for merge. '
-                                            'The formats will be merged into mkv')
+                        self.report_warning(
+                            'Requested formats are incompatible for merge and will be merged into mkv.')
                      # Ensure filename always has a correct extension for successful merge
                      filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
                      if os.path.exists(encodeFilename(filename)):
@@ -1481,7 +1500,8 @@ class YoutubeDL(object):
          for url in url_list:
              try:
                  # It also downloads the videos
-                res = self.extract_info(url)
+                res = self.extract_info(
+                    url, force_generic_extractor=self.params.get('force_generic_extractor', False))
              except UnavailableVideoError:
                  self.report_error('unable to download video')
              except MaxDownloadsReached:
@@ -1525,6 +1545,7 @@ class YoutubeDL(object):
              pps_chain.extend(ie_info['__postprocessors'])
          pps_chain.extend(self._pps)
          for pp in pps_chain:
+            files_to_delete = []
              try:
                  files_to_delete, info = pp.run(info)
              except PostProcessingError as e:
@@ -1703,7 +1724,8 @@ class YoutubeDL(object):
              if req_is_string:
                  req = url_escaped
              else:
-                req = compat_urllib_request.Request(
+                req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
+                req = req_type(
                      url_escaped, data=req.data, headers=req.headers,
                      origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
  
@@ -1849,7 +1871,7 @@ class YoutubeDL(object):
              thumb_ext = determine_ext(t['url'], 'jpg')
              suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
              thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
-            thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+            t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
  
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
                  self.to_screen('[%s] %s: Thumbnail %sis already present' %