[YoutubeDL] Add --playlist-items option (Fixes #2662)

[youtube-dl] / youtube_dl / YoutubeDL.py
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 54e732943bcfbf2c22bec9cdb647c4a806e9ada5..b7e93b8ddafb13a9897b9947ec0ad83d09dd1288 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -54,8 +54,10 @@ from .utils import (
      PostProcessingError,
      platform_name,
      preferredencoding,
      PostProcessingError,
      platform_name,
      preferredencoding,
+    render_table,
      SameFileError,
      sanitize_filename,
      SameFileError,
      sanitize_filename,
+    std_headers,
      subtitles_filename,
      takewhile_inclusive,
      UnavailableVideoError,
      subtitles_filename,
      takewhile_inclusive,
      UnavailableVideoError,
@@ -135,6 +137,7 @@ class YoutubeDL(object):
      nooverwrites:      Prevent overwriting files.
      playliststart:     Playlist item to start at.
      playlistend:       Playlist item to end at.
      nooverwrites:      Prevent overwriting files.
      playliststart:     Playlist item to start at.
      playlistend:       Playlist item to end at.
+    playlist_items:    Specific indices of playlist to download.
      playlistreverse:   Download playlist items in reverse order.
      matchtitle:        Download only matching titles.
      rejecttitle:       Reject downloads for matching titles.
      playlistreverse:   Download playlist items in reverse order.
      matchtitle:        Download only matching titles.
      rejecttitle:       Reject downloads for matching titles.
@@ -144,6 +147,7 @@ class YoutubeDL(object):
      writeinfojson:     Write the video description to a .info.json file
      writeannotations:  Write the video annotations to a .annotations.xml file
      writethumbnail:    Write the thumbnail image to a file
      writeinfojson:     Write the video description to a .info.json file
      writeannotations:  Write the video annotations to a .annotations.xml file
      writethumbnail:    Write the thumbnail image to a file
+    write_all_thumbnails:  Write all thumbnail formats to files
      writesubtitles:    Write the video subtitles to a file
      writeautomaticsub: Write the automatic subtitles to a file
      allsubtitles:      Downloads all the subtitles of the video
      writesubtitles:    Write the video subtitles to a file
      writeautomaticsub: Write the automatic subtitles to a file
      allsubtitles:      Downloads all the subtitles of the video
@@ -220,6 +224,8 @@ class YoutubeDL(object):
                         youtube-dl servers for debugging.
      sleep_interval:    Number of seconds to sleep before each download.
      external_downloader:  Executable of the external downloader to call.
                         youtube-dl servers for debugging.
      sleep_interval:    Number of seconds to sleep before each download.
      external_downloader:  Executable of the external downloader to call.
+    listformats:       Print an overview of available video formats and exit.
+    list_thumbnails:   Print a table of all thumbnails and exit.
  
  
      The following parameters are not used by YoutubeDL itself, they are used by
  
  
      The following parameters are not used by YoutubeDL itself, they are used by
@@ -698,24 +704,51 @@ class YoutubeDL(object):
              if playlistend == -1:
                  playlistend = None
  
              if playlistend == -1:
                  playlistend = None
  
+            playlistitems_str = self.params.get('playlist_items', None)
+            playlistitems = None
+            if playlistitems_str is not None:
+                def iter_playlistitems(format):
+                    for string_segment in format.split(','):
+                        if '-' in string_segment:
+                            start, end = string_segment.split('-')
+                            for item in range(int(start), int(end) + 1):
+                                yield int(item)
+                        else:
+                            yield int(string_segment)
+                playlistitems = iter_playlistitems(playlistitems_str)
+
              ie_entries = ie_result['entries']
              if isinstance(ie_entries, list):
                  n_all_entries = len(ie_entries)
              ie_entries = ie_result['entries']
              if isinstance(ie_entries, list):
                  n_all_entries = len(ie_entries)
-                entries = ie_entries[playliststart:playlistend]
+                if playlistitems:
+                    entries = [ie_entries[i - 1] for i in playlistitems]
+                else:
+                    entries = ie_entries[playliststart:playlistend]
                  n_entries = len(entries)
                  self.to_screen(
                      "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                      (ie_result['extractor'], playlist, n_all_entries, n_entries))
              elif isinstance(ie_entries, PagedList):
                  n_entries = len(entries)
                  self.to_screen(
                      "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                      (ie_result['extractor'], playlist, n_all_entries, n_entries))
              elif isinstance(ie_entries, PagedList):
-                entries = ie_entries.getslice(
-                    playliststart, playlistend)
+                if playlistitems:
+                    entries = []
+                    for item in playlistitems:
+                        entries.extend(ie_entries.getslice(
+                            item - 1, item
+                        ))
+                else:
+                    entries = ie_entries.getslice(
+                        playliststart, playlistend)
                  n_entries = len(entries)
                  self.to_screen(
                      "[%s] playlist %s: Downloading %d videos" %
                      (ie_result['extractor'], playlist, n_entries))
              else:  # iterable
                  n_entries = len(entries)
                  self.to_screen(
                      "[%s] playlist %s: Downloading %d videos" %
                      (ie_result['extractor'], playlist, n_entries))
              else:  # iterable
-                entries = list(itertools.islice(
-                    ie_entries, playliststart, playlistend))
+                if playlistitems:
+                    entry_list = list(ie_entries)
+                    entries = [entry_list[i - 1] for i in playlistitems]
+                else:
+                    entries = list(itertools.islice(
+                        ie_entries, playliststart, playlistend))
                  n_entries = len(entries)
                  self.to_screen(
                      "[%s] playlist %s: Downloading %d videos" %
                  n_entries = len(entries)
                  self.to_screen(
                      "[%s] playlist %s: Downloading %d videos" %
@@ -865,6 +898,42 @@ class YoutubeDL(object):
                  return matches[-1]
          return None
  
                  return matches[-1]
          return None
  
+    def _calc_headers(self, info_dict):
+        res = std_headers.copy()
+
+        add_headers = info_dict.get('http_headers')
+        if add_headers:
+            res.update(add_headers)
+
+        cookies = self._calc_cookies(info_dict)
+        if cookies:
+            res['Cookie'] = cookies
+
+        return res
+
+    def _calc_cookies(self, info_dict):
+        class _PseudoRequest(object):
+            def __init__(self, url):
+                self.url = url
+                self.headers = {}
+                self.unverifiable = False
+
+            def add_unredirected_header(self, k, v):
+                self.headers[k] = v
+
+            def get_full_url(self):
+                return self.url
+
+            def is_unverifiable(self):
+                return self.unverifiable
+
+            def has_header(self, h):
+                return h in self.headers
+
+        pr = _PseudoRequest(info_dict['url'])
+        self.cookiejar.add_cookie_header(pr)
+        return pr.headers.get('Cookie')
+
      def process_video_result(self, info_dict, download=True):
          assert info_dict.get('_type', 'video') == 'video'
  
      def process_video_result(self, info_dict, download=True):
          assert info_dict.get('_type', 'video') == 'video'
  
@@ -879,9 +948,14 @@ class YoutubeDL(object):
              info_dict['playlist_index'] = None
  
          thumbnails = info_dict.get('thumbnails')
              info_dict['playlist_index'] = None
  
          thumbnails = info_dict.get('thumbnails')
+        if thumbnails is None:
+            thumbnail = info_dict.get('thumbnail')
+            if thumbnail:
+                thumbnails = [{'url': thumbnail}]
          if thumbnails:
              thumbnails.sort(key=lambda t: (
          if thumbnails:
              thumbnails.sort(key=lambda t: (
-                t.get('width'), t.get('height'), t.get('url')))
+                t.get('preference'), t.get('width'), t.get('height'),
+                t.get('id'), t.get('url')))
              for t in thumbnails:
                  if 'width' in t and 'height' in t:
                      t['resolution'] = '%dx%d' % (t['width'], t['height'])
              for t in thumbnails:
                  if 'width' in t and 'height' in t:
                      t['resolution'] = '%dx%d' % (t['width'], t['height'])
@@ -933,6 +1007,11 @@ class YoutubeDL(object):
              # Automatically determine file extension if missing
              if 'ext' not in format:
                  format['ext'] = determine_ext(format['url']).lower()
              # Automatically determine file extension if missing
              if 'ext' not in format:
                  format['ext'] = determine_ext(format['url']).lower()
+            # Add HTTP headers, so that external programs can use them from the
+            # json output
+            full_format_info = info_dict.copy()
+            full_format_info.update(format)
+            format['http_headers'] = self._calc_headers(full_format_info)
  
          format_limit = self.params.get('format_limit', None)
          if format_limit:
  
          format_limit = self.params.get('format_limit', None)
          if format_limit:
@@ -948,9 +1027,12 @@ class YoutubeDL(object):
              # element in the 'formats' field in info_dict is info_dict itself,
              # wich can't be exported to json
              info_dict['formats'] = formats
              # element in the 'formats' field in info_dict is info_dict itself,
              # wich can't be exported to json
              info_dict['formats'] = formats
-        if self.params.get('listformats', None):
+        if self.params.get('listformats'):
              self.list_formats(info_dict)
              return
              self.list_formats(info_dict)
              return
+        if self.params.get('list_thumbnails'):
+            self.list_thumbnails(info_dict)
+            return
  
          req_format = self.params.get('format')
          if req_format is None:
  
          req_format = self.params.get('format')
          if req_format is None:
@@ -1157,25 +1239,7 @@ class YoutubeDL(object):
                      self.report_error('Cannot write metadata to JSON file ' + infofn)
                      return
  
                      self.report_error('Cannot write metadata to JSON file ' + infofn)
                      return
  
-        if self.params.get('writethumbnail', False):
-            if info_dict.get('thumbnail') is not None:
-                thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
-                thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
-                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
-                    self.to_screen('[%s] %s: Thumbnail is already present' %
-                                   (info_dict['extractor'], info_dict['id']))
-                else:
-                    self.to_screen('[%s] %s: Downloading thumbnail ...' %
-                                   (info_dict['extractor'], info_dict['id']))
-                    try:
-                        uf = self.urlopen(info_dict['thumbnail'])
-                        with open(thumb_filename, 'wb') as thumbf:
-                            shutil.copyfileobj(uf, thumbf)
-                        self.to_screen('[%s] %s: Writing thumbnail to: %s' %
-                                       (info_dict['extractor'], info_dict['id'], thumb_filename))
-                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                        self.report_warning('Unable to download thumbnail "%s": %s' %
-                                            (info_dict['thumbnail'], compat_str(err)))
+        self._write_thumbnails(info_dict, filename)
  
          if not self.params.get('skip_download', False):
              try:
  
          if not self.params.get('skip_download', False):
              try:
@@ -1458,8 +1522,26 @@ class YoutubeDL(object):
          header_line = line({
              'format_id': 'format code', 'ext': 'extension',
              'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
          header_line = line({
              'format_id': 'format code', 'ext': 'extension',
              'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
-        self.to_screen('[info] Available formats for %s:\n%s\n%s' %
-                       (info_dict['id'], header_line, '\n'.join(formats_s)))
+        self.to_screen(
+            '[info] Available formats for %s:\n%s\n%s' %
+            (info_dict['id'], header_line, '\n'.join(formats_s)))
+
+    def list_thumbnails(self, info_dict):
+        thumbnails = info_dict.get('thumbnails')
+        if not thumbnails:
+            tn_url = info_dict.get('thumbnail')
+            if tn_url:
+                thumbnails = [{'id': '0', 'url': tn_url}]
+            else:
+                self.to_screen(
+                    '[info] No thumbnails present for %s' % info_dict['id'])
+                return
+
+        self.to_screen(
+            '[info] Thumbnails for %s:' % info_dict['id'])
+        self.to_screen(render_table(
+            ['ID', 'width', 'height', 'URL'],
+            [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
  
      def urlopen(self, req):
          """ Start an HTTP download """
  
      def urlopen(self, req):
          """ Start an HTTP download """
@@ -1605,3 +1687,39 @@ class YoutubeDL(object):
          if encoding is None:
              encoding = preferredencoding()
          return encoding
          if encoding is None:
              encoding = preferredencoding()
          return encoding
+
+    def _write_thumbnails(self, info_dict, filename):
+        if self.params.get('writethumbnail', False):
+            thumbnails = info_dict.get('thumbnails')
+            if thumbnails:
+                thumbnails = [thumbnails[-1]]
+        elif self.params.get('write_all_thumbnails', False):
+            thumbnails = info_dict.get('thumbnails')
+        else:
+            return
+
+        if not thumbnails:
+            # No thumbnails present, so return immediately
+            return
+
+        for t in thumbnails:
+            thumb_ext = determine_ext(t['url'], 'jpg')
+            suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+            thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+            thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+                self.to_screen('[%s] %s: Thumbnail %sis already present' %
+                               (info_dict['extractor'], info_dict['id'], thumb_display_id))
+            else:
+                self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+                               (info_dict['extractor'], info_dict['id'], thumb_display_id))
+                try:
+                    uf = self.urlopen(t['url'])
+                    with open(thumb_filename, 'wb') as thumbf:
+                        shutil.copyfileobj(uf, thumbf)
+                    self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+                                   (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                    self.report_warning('Unable to download thumbnail "%s": %s' %
+                                        (t['url'], compat_str(err)))