Merge remote-tracking branch 'David-Development/rtl2.py'
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 25 Jan 2015 16:55:31 +0000 (17:55 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 25 Jan 2015 16:55:31 +0000 (17:55 +0100)
25 files changed:
.travis.yml
AUTHORS
test/helper.py
test/test_utils.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/downloader/common.py
youtube_dl/downloader/external.py
youtube_dl/downloader/http.py
youtube_dl/extractor/appletrailers.py
youtube_dl/extractor/atresplayer.py
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/bliptv.py
youtube_dl/extractor/common.py
youtube_dl/extractor/fc2.py
youtube_dl/extractor/folketinget.py
youtube_dl/extractor/krasview.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/smotri.py
youtube_dl/extractor/testtube.py
youtube_dl/extractor/ubu.py
youtube_dl/extractor/videomega.py
youtube_dl/extractor/wdr.py
youtube_dl/options.py
youtube_dl/utils.py

index f140144149920ed025f36101a413165516be17ec..fb34299fced3f24a7f4de265aadf3094299d6523 100644 (file)
@@ -4,6 +4,9 @@ python:
   - "2.7"
   - "3.3"
   - "3.4"
+before_install:
+  - sudo apt-get update -qq
+  - sudo apt-get install -yqq rtmpdump
 script: nosetests test --verbose
 notifications:
   email:
diff --git a/AUTHORS b/AUTHORS
index b8bf3cb6f8db5c1e90be0f177b17c38618abcf0c..8362b6d8a674570020e3a700db1fc52835780fc9 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -104,3 +104,4 @@ Ondřej Caletka
 Dinesh S
 Johan K. Jensen
 Yen Chi Hsuan
+Enam Mijbah Noor
index c416f388cbfe335678269b0226cc10708c49a850..27a68091f9034c238328b43dff56e3be59af6614 100644 (file)
@@ -140,7 +140,7 @@ def expect_info_dict(self, got_dict, expected_dict):
     # Are checkable fields missing from the test case definition?
     test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
                           for key, value in got_dict.items()
-                          if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+                          if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
     missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
     if missing_keys:
         def _repr(v):
index bdd7f268af46d5eaaef35507f839989a8343316b..ebec7986f2b660da01db37c04bb65ccc283884bd 100644 (file)
@@ -52,6 +52,7 @@ from youtube_dl.utils import (
     urlencode_postdata,
     version_tuple,
     xpath_with_ns,
+    render_table,
 )
 
 
@@ -434,5 +435,15 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
         self.assertTrue(is_html(  # UTF-32-LE
             b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
 
+    def test_render_table(self):
+        self.assertEqual(
+            render_table(
+                ['a', 'bcd'],
+                [[123, 4], [9999, 51]]),
+            'a    bcd\n'
+            '123  4\n'
+            '9999 51')
+
+
 if __name__ == '__main__':
     unittest.main()
index 54e732943bcfbf2c22bec9cdb647c4a806e9ada5..b772f87f11ea550d903ec1db704bf76ca64ba99f 100755 (executable)
@@ -54,8 +54,10 @@ from .utils import (
     PostProcessingError,
     platform_name,
     preferredencoding,
+    render_table,
     SameFileError,
     sanitize_filename,
+    std_headers,
     subtitles_filename,
     takewhile_inclusive,
     UnavailableVideoError,
@@ -135,6 +137,7 @@ class YoutubeDL(object):
     nooverwrites:      Prevent overwriting files.
     playliststart:     Playlist item to start at.
     playlistend:       Playlist item to end at.
+    playlist_items:    Specific indices of playlist to download.
     playlistreverse:   Download playlist items in reverse order.
     matchtitle:        Download only matching titles.
     rejecttitle:       Reject downloads for matching titles.
@@ -144,6 +147,7 @@ class YoutubeDL(object):
     writeinfojson:     Write the video description to a .info.json file
     writeannotations:  Write the video annotations to a .annotations.xml file
     writethumbnail:    Write the thumbnail image to a file
+    write_all_thumbnails:  Write all thumbnail formats to files
     writesubtitles:    Write the video subtitles to a file
     writeautomaticsub: Write the automatic subtitles to a file
     allsubtitles:      Downloads all the subtitles of the video
@@ -194,11 +198,12 @@ class YoutubeDL(object):
                        postprocessor.
     progress_hooks:    A list of functions that get called on download
                        progress, with a dictionary with the entries
-                       * filename: The final filename
-                       * status: One of "downloading" and "finished"
-
-                       The dict may also have some of the following entries:
+                       * status: One of "downloading" and "finished".
+                                 Check this first and ignore unknown values.
 
+                       If status is one of "downloading" or "finished", the
+                       following properties may also be present:
+                       * filename: The final filename (always present)
                        * downloaded_bytes: Bytes on disk
                        * total_bytes: Size of the whole file, None if unknown
                        * tmpfilename: The filename we're currently writing to
@@ -220,12 +225,15 @@ class YoutubeDL(object):
                        youtube-dl servers for debugging.
     sleep_interval:    Number of seconds to sleep before each download.
     external_downloader:  Executable of the external downloader to call.
+    listformats:       Print an overview of available video formats and exit.
+    list_thumbnails:   Print a table of all thumbnails and exit.
 
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
-    noresizebuffer, retries, continuedl, noprogress, consoletitle
+    noresizebuffer, retries, continuedl, noprogress, consoletitle,
+    xattr_set_filesize.
 
     The following options are used by the post processors:
     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
@@ -698,24 +706,51 @@ class YoutubeDL(object):
             if playlistend == -1:
                 playlistend = None
 
+            playlistitems_str = self.params.get('playlist_items', None)
+            playlistitems = None
+            if playlistitems_str is not None:
+                def iter_playlistitems(format):
+                    for string_segment in format.split(','):
+                        if '-' in string_segment:
+                            start, end = string_segment.split('-')
+                            for item in range(int(start), int(end) + 1):
+                                yield int(item)
+                        else:
+                            yield int(string_segment)
+                playlistitems = iter_playlistitems(playlistitems_str)
+
             ie_entries = ie_result['entries']
             if isinstance(ie_entries, list):
                 n_all_entries = len(ie_entries)
-                entries = ie_entries[playliststart:playlistend]
+                if playlistitems:
+                    entries = [ie_entries[i - 1] for i in playlistitems]
+                else:
+                    entries = ie_entries[playliststart:playlistend]
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
             elif isinstance(ie_entries, PagedList):
-                entries = ie_entries.getslice(
-                    playliststart, playlistend)
+                if playlistitems:
+                    entries = []
+                    for item in playlistitems:
+                        entries.extend(ie_entries.getslice(
+                            item - 1, item
+                        ))
+                else:
+                    entries = ie_entries.getslice(
+                        playliststart, playlistend)
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Downloading %d videos" %
                     (ie_result['extractor'], playlist, n_entries))
             else:  # iterable
-                entries = list(itertools.islice(
-                    ie_entries, playliststart, playlistend))
+                if playlistitems:
+                    entry_list = list(ie_entries)
+                    entries = [entry_list[i - 1] for i in playlistitems]
+                else:
+                    entries = list(itertools.islice(
+                        ie_entries, playliststart, playlistend))
                 n_entries = len(entries)
                 self.to_screen(
                     "[%s] playlist %s: Downloading %d videos" %
@@ -865,6 +900,42 @@ class YoutubeDL(object):
                 return matches[-1]
         return None
 
+    def _calc_headers(self, info_dict):
+        res = std_headers.copy()
+
+        add_headers = info_dict.get('http_headers')
+        if add_headers:
+            res.update(add_headers)
+
+        cookies = self._calc_cookies(info_dict)
+        if cookies:
+            res['Cookie'] = cookies
+
+        return res
+
+    def _calc_cookies(self, info_dict):
+        class _PseudoRequest(object):
+            def __init__(self, url):
+                self.url = url
+                self.headers = {}
+                self.unverifiable = False
+
+            def add_unredirected_header(self, k, v):
+                self.headers[k] = v
+
+            def get_full_url(self):
+                return self.url
+
+            def is_unverifiable(self):
+                return self.unverifiable
+
+            def has_header(self, h):
+                return h in self.headers
+
+        pr = _PseudoRequest(info_dict['url'])
+        self.cookiejar.add_cookie_header(pr)
+        return pr.headers.get('Cookie')
+
     def process_video_result(self, info_dict, download=True):
         assert info_dict.get('_type', 'video') == 'video'
 
@@ -879,9 +950,14 @@ class YoutubeDL(object):
             info_dict['playlist_index'] = None
 
         thumbnails = info_dict.get('thumbnails')
+        if thumbnails is None:
+            thumbnail = info_dict.get('thumbnail')
+            if thumbnail:
+                thumbnails = [{'url': thumbnail}]
         if thumbnails:
             thumbnails.sort(key=lambda t: (
-                t.get('width'), t.get('height'), t.get('url')))
+                t.get('preference'), t.get('width'), t.get('height'),
+                t.get('id'), t.get('url')))
             for t in thumbnails:
                 if 'width' in t and 'height' in t:
                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
@@ -933,6 +1009,11 @@ class YoutubeDL(object):
             # Automatically determine file extension if missing
             if 'ext' not in format:
                 format['ext'] = determine_ext(format['url']).lower()
+            # Add HTTP headers, so that external programs can use them from the
+            # json output
+            full_format_info = info_dict.copy()
+            full_format_info.update(format)
+            format['http_headers'] = self._calc_headers(full_format_info)
 
         format_limit = self.params.get('format_limit', None)
         if format_limit:
@@ -948,9 +1029,12 @@ class YoutubeDL(object):
             # element in the 'formats' field in info_dict is info_dict itself,
             # wich can't be exported to json
             info_dict['formats'] = formats
-        if self.params.get('listformats', None):
+        if self.params.get('listformats'):
             self.list_formats(info_dict)
             return
+        if self.params.get('list_thumbnails'):
+            self.list_thumbnails(info_dict)
+            return
 
         req_format = self.params.get('format')
         if req_format is None:
@@ -1157,25 +1241,7 @@ class YoutubeDL(object):
                     self.report_error('Cannot write metadata to JSON file ' + infofn)
                     return
 
-        if self.params.get('writethumbnail', False):
-            if info_dict.get('thumbnail') is not None:
-                thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
-                thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
-                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
-                    self.to_screen('[%s] %s: Thumbnail is already present' %
-                                   (info_dict['extractor'], info_dict['id']))
-                else:
-                    self.to_screen('[%s] %s: Downloading thumbnail ...' %
-                                   (info_dict['extractor'], info_dict['id']))
-                    try:
-                        uf = self.urlopen(info_dict['thumbnail'])
-                        with open(thumb_filename, 'wb') as thumbf:
-                            shutil.copyfileobj(uf, thumbf)
-                        self.to_screen('[%s] %s: Writing thumbnail to: %s' %
-                                       (info_dict['extractor'], info_dict['id'], thumb_filename))
-                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                        self.report_warning('Unable to download thumbnail "%s": %s' %
-                                            (info_dict['thumbnail'], compat_str(err)))
+        self._write_thumbnails(info_dict, filename)
 
         if not self.params.get('skip_download', False):
             try:
@@ -1186,6 +1252,7 @@ class YoutubeDL(object):
                     if self.params.get('verbose'):
                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
                     return fd.download(name, info)
+
                 if info_dict.get('requested_formats') is not None:
                     downloaded = []
                     success = True
@@ -1458,8 +1525,26 @@ class YoutubeDL(object):
         header_line = line({
             'format_id': 'format code', 'ext': 'extension',
             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
-        self.to_screen('[info] Available formats for %s:\n%s\n%s' %
-                       (info_dict['id'], header_line, '\n'.join(formats_s)))
+        self.to_screen(
+            '[info] Available formats for %s:\n%s\n%s' %
+            (info_dict['id'], header_line, '\n'.join(formats_s)))
+
+    def list_thumbnails(self, info_dict):
+        thumbnails = info_dict.get('thumbnails')
+        if not thumbnails:
+            tn_url = info_dict.get('thumbnail')
+            if tn_url:
+                thumbnails = [{'id': '0', 'url': tn_url}]
+            else:
+                self.to_screen(
+                    '[info] No thumbnails present for %s' % info_dict['id'])
+                return
+
+        self.to_screen(
+            '[info] Thumbnails for %s:' % info_dict['id'])
+        self.to_screen(render_table(
+            ['ID', 'width', 'height', 'URL'],
+            [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
 
     def urlopen(self, req):
         """ Start an HTTP download """
@@ -1605,3 +1690,39 @@ class YoutubeDL(object):
         if encoding is None:
             encoding = preferredencoding()
         return encoding
+
+    def _write_thumbnails(self, info_dict, filename):
+        if self.params.get('writethumbnail', False):
+            thumbnails = info_dict.get('thumbnails')
+            if thumbnails:
+                thumbnails = [thumbnails[-1]]
+        elif self.params.get('write_all_thumbnails', False):
+            thumbnails = info_dict.get('thumbnails')
+        else:
+            return
+
+        if not thumbnails:
+            # No thumbnails present, so return immediately
+            return
+
+        for t in thumbnails:
+            thumb_ext = determine_ext(t['url'], 'jpg')
+            suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+            thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+            thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+                self.to_screen('[%s] %s: Thumbnail %sis already present' %
+                               (info_dict['extractor'], info_dict['id'], thumb_display_id))
+            else:
+                self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+                               (info_dict['extractor'], info_dict['id'], thumb_display_id))
+                try:
+                    uf = self.urlopen(t['url'])
+                    with open(thumb_filename, 'wb') as thumbf:
+                        shutil.copyfileobj(uf, thumbf)
+                    self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+                                   (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                    self.report_warning('Unable to download thumbnail "%s": %s' %
+                                        (t['url'], compat_str(err)))
index 3fc7dc5c2f13aea99b96c5f0dbe2359e43404058..71d2c6f350beacecda8f844c2bd337c95732c61a 100644 (file)
@@ -143,10 +143,13 @@ def _real_main(argv=None):
             parser.error('invalid max_filesize specified')
         opts.max_filesize = numeric_limit
     if opts.retries is not None:
-        try:
-            opts.retries = int(opts.retries)
-        except (TypeError, ValueError):
-            parser.error('invalid retry count specified')
+        if opts.retries in ('inf', 'infinite'):
+            opts_retries = float('inf')
+        else:
+            try:
+                opts_retries = int(opts.retries)
+            except (TypeError, ValueError):
+                parser.error('invalid retry count specified')
     if opts.buffersize is not None:
         numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
         if numeric_buffersize is None:
@@ -238,6 +241,12 @@ def _real_main(argv=None):
             'verboseOutput': opts.verbose,
             'exec_cmd': opts.exec_cmd,
         })
+    if opts.xattr_set_filesize:
+        try:
+            import xattr
+            xattr  # Confuse flake8
+        except ImportError:
+            parser.error('setting filesize xattr requested but python-xattr is not available')
 
     ydl_opts = {
         'usenetrc': opts.usenetrc,
@@ -268,7 +277,7 @@ def _real_main(argv=None):
         'ignoreerrors': opts.ignoreerrors,
         'ratelimit': opts.ratelimit,
         'nooverwrites': opts.nooverwrites,
-        'retries': opts.retries,
+        'retries': opts_retries,
         'buffersize': opts.buffersize,
         'noresizebuffer': opts.noresizebuffer,
         'continuedl': opts.continue_dl,
@@ -286,6 +295,7 @@ def _real_main(argv=None):
         'writeannotations': opts.writeannotations,
         'writeinfojson': opts.writeinfojson,
         'writethumbnail': opts.writethumbnail,
+        'write_all_thumbnails': opts.write_all_thumbnails,
         'writesubtitles': opts.writesubtitles,
         'writeautomaticsub': opts.writeautomaticsub,
         'allsubtitles': opts.allsubtitles,
@@ -331,6 +341,9 @@ def _real_main(argv=None):
         'call_home': opts.call_home,
         'sleep_interval': opts.sleep_interval,
         'external_downloader': opts.external_downloader,
+        'list_thumbnails': opts.list_thumbnails,
+        'playlist_items': opts.playlist_items,
+        'xattr_set_filesize': opts.xattr_set_filesize,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
index c35c42c1dcd7bc28617934a3e5fd4d312bfb60dd..7bb3a948d2ebd0eaca46ca72f72dfaa2e7ffd1bd 100644 (file)
@@ -25,21 +25,23 @@ class FileDownloader(object):
 
     Available options:
 
-    verbose:           Print additional info to stdout.
-    quiet:             Do not print messages to stdout.
-    ratelimit:         Download speed limit, in bytes/sec.
-    retries:           Number of times to retry for HTTP error 5xx
-    buffersize:        Size of download buffer in bytes.
-    noresizebuffer:    Do not automatically resize the download buffer.
-    continuedl:        Try to continue downloads if possible.
-    noprogress:        Do not print the progress bar.
-    logtostderr:       Log messages to stderr instead of stdout.
-    consoletitle:      Display progress in console window's titlebar.
-    nopart:            Do not use temporary .part files.
-    updatetime:        Use the Last-modified header to set output file timestamps.
-    test:              Download only first bytes to test the downloader.
-    min_filesize:      Skip files smaller than this size
-    max_filesize:      Skip files larger than this size
+    verbose:            Print additional info to stdout.
+    quiet:              Do not print messages to stdout.
+    ratelimit:          Download speed limit, in bytes/sec.
+    retries:            Number of times to retry for HTTP error 5xx
+    buffersize:         Size of download buffer in bytes.
+    noresizebuffer:     Do not automatically resize the download buffer.
+    continuedl:         Try to continue downloads if possible.
+    noprogress:         Do not print the progress bar.
+    logtostderr:        Log messages to stderr instead of stdout.
+    consoletitle:       Display progress in console window's titlebar.
+    nopart:             Do not use temporary .part files.
+    updatetime:         Use the Last-modified header to set output file timestamps.
+    test:               Download only first bytes to test the downloader.
+    min_filesize:       Skip files smaller than this size
+    max_filesize:       Skip files larger than this size
+    xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+                        (experimenatal)
 
     Subclasses of this one must re-define the real_download method.
     """
index 7ebe400969cf3c66b7ece2046526d7085cbac43a..af9fdba7500e2cd14d7c571688249cdd8dd5c1a8 100644 (file)
@@ -7,7 +7,6 @@ import sys
 from .common import FileDownloader
 from ..utils import (
     encodeFilename,
-    std_headers,
 )
 
 
@@ -46,42 +45,6 @@ class ExternalFD(FileDownloader):
     def supports(cls, info_dict):
         return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
 
-    def _calc_headers(self, info_dict):
-        res = std_headers.copy()
-
-        ua = info_dict.get('user_agent')
-        if ua is not None:
-            res['User-Agent'] = ua
-
-        cookies = self._calc_cookies(info_dict)
-        if cookies:
-            res['Cookie'] = cookies
-
-        return res
-
-    def _calc_cookies(self, info_dict):
-        class _PseudoRequest(object):
-            def __init__(self, url):
-                self.url = url
-                self.headers = {}
-                self.unverifiable = False
-
-            def add_unredirected_header(self, k, v):
-                self.headers[k] = v
-
-            def get_full_url(self):
-                return self.url
-
-            def is_unverifiable(self):
-                return self.unverifiable
-
-            def has_header(self, h):
-                return h in self.headers
-
-        pr = _PseudoRequest(info_dict['url'])
-        self.ydl.cookiejar.add_cookie_header(pr)
-        return pr.headers.get('Cookie')
-
     def _call_downloader(self, tmpfilename, info_dict):
         """ Either overwrite this or implement _make_cmd """
         cmd = self._make_cmd(tmpfilename, info_dict)
@@ -107,7 +70,7 @@ class ExternalFD(FileDownloader):
 class CurlFD(ExternalFD):
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '-o', tmpfilename]
-        for key, val in self._calc_headers(info_dict).items():
+        for key, val in info_dict['http_headers'].items():
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += ['--', info_dict['url']]
         return cmd
@@ -116,7 +79,7 @@ class CurlFD(ExternalFD):
 class WgetFD(ExternalFD):
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
-        for key, val in self._calc_headers(info_dict).items():
+        for key, val in info_dict['http_headers'].items():
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += ['--', info_dict['url']]
         return cmd
@@ -131,7 +94,7 @@ class Aria2cFD(ExternalFD):
         if dn:
             cmd += ['--dir', dn]
         cmd += ['--out', os.path.basename(tmpfilename)]
-        for key, val in self._calc_headers(info_dict).items():
+        for key, val in info_dict['http_headers'].items():
             cmd += ['--header', '%s: %s' % (key, val)]
         cmd += ['--', info_dict['url']]
         return cmd
index e68f20c9f46a93ebfeca2ff47dc0843f4ab94874..8a1d578d54c6422df3b5ea768bb5a7547d52d32d 100644 (file)
@@ -24,10 +24,6 @@ class HttpFD(FileDownloader):
 
         # Do not include the Accept-Encoding header
         headers = {'Youtubedl-no-compression': 'True'}
-        if 'user_agent' in info_dict:
-            headers['Youtubedl-user-agent'] = info_dict['user_agent']
-        if 'http_referer' in info_dict:
-            headers['Referer'] = info_dict['http_referer']
         add_headers = info_dict.get('http_headers')
         if add_headers:
             headers.update(add_headers)
@@ -161,6 +157,14 @@ class HttpFD(FileDownloader):
                 except (OSError, IOError) as err:
                     self.report_error('unable to open for writing: %s' % str(err))
                     return False
+
+                if self.params.get('xattr_set_filesize', False) and data_len is not None:
+                    try:
+                        import xattr
+                        xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
+                    except(OSError, IOError, ImportError) as err:
+                        self.report_error('unable to set filesize xattr: %s' % str(err))
+
             try:
                 stream.write(data_block)
             except (IOError, OSError) as err:
index 7cd0482c75d7157df218071a2e22ce2904d094b6..70621946d8a8c9de4437d36755d8476f65608318 100644 (file)
@@ -129,7 +129,9 @@ class AppleTrailersIE(InfoExtractor):
                 'thumbnail': thumbnail,
                 'upload_date': upload_date,
                 'uploader_id': uploader_id,
-                'user_agent': 'QuickTime compatible (youtube-dl)',
+                'http_headers': {
+                    'User-Agent': 'QuickTime compatible (youtube-dl)',
+                },
             })
 
         return {
index f42862be31ffd047a6221403bd765e386e34e48d..f016368fa8d0890de874a774b2a4a18db60a01c6 100644 (file)
@@ -95,7 +95,7 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
         for fmt in ['windows', 'android_tablet']:
             request = compat_urllib_request.Request(
                 self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
-            request.add_header('Youtubedl-user-agent', self._USER_AGENT)
+            request.add_header('User-Agent', self._USER_AGENT)
 
             fmt_json = self._download_json(
                 request, video_id, 'Downloading %s video JSON' % fmt)
index 8bfe502143a2cbf9c076bdabaad3e7ad5d2090b6..693ba22c6dde0dd1760531108353ab48b1ed0fa1 100644 (file)
@@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor):
         # Album playlist ripped from fakeshoredrive with no metadata
         {
             'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+            'info_dict': {
+                'title': 'PPP (Pistol P Project)',
+                'id': '837572',
+            },
             'playlist': [{
                 'info_dict': {
-                    'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
-                    'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
+                    'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
+                    'id': '837577',
                     'ext': 'mp3',
+                    'uploader': 'Lil Herb a.k.a. G Herbo',
                 }
             }],
             'params': {
-                'playliststart': 8,
-                'playlistend': 8,
+                'playliststart': 9,
+                'playlistend': 9,
             }
         }
     ]
index 14b814120be3b8215a28fc00a95f87bd22e0c062..436cc515563a07d853ced1b8373461a752ed6038 100644 (file)
@@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
         # For some weird reason, blip.tv serves a video instead of subtitles
         # when we request with a common UA
         req = compat_urllib_request.Request(url)
-        req.add_header('Youtubedl-user-agent', 'youtube-dl')
+        req.add_header('User-Agent', 'youtube-dl')
         return self._download_webpage(req, None, note=False)
 
 
index 03f3f18c83012cdced0e305fe1cc02d69a85bb7c..7b7a832dc34a37b70d2c2165dfe1c28185237e89 100644 (file)
@@ -108,7 +108,6 @@ class InfoExtractor(object):
                                   (quality takes higher priority)
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
-                    * http_referer  HTTP Referer header value to set.
                     * http_method  HTTP method to use for the download.
                     * http_headers  A dictionary of additional HTTP headers
                                  to add to the request.
@@ -130,7 +129,9 @@ class InfoExtractor(object):
                     something like "4234987", title "Dancing naked mole rats",
                     and display_id "dancing-naked-mole-rats"
     thumbnails:     A list of dictionaries, with the following entries:
+                        * "id" (optional, string) - Thumbnail format ID
                         * "url"
+                        * "preference" (optional, int) - quality of the image
                         * "width" (optional, int)
                         * "height" (optional, int)
                         * "resolution" (optional, string "{width}x{height"},
index 81ceace53289709b93d7c647f6627197320381ef..1ccc1a9642bb09ed84bdd2747c665520ab3c98c4 100644 (file)
@@ -5,6 +5,7 @@ import hashlib
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
 )
@@ -16,7 +17,8 @@ from ..utils import (
 class FC2IE(InfoExtractor):
     _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
     IE_NAME = 'fc2'
-    _TEST = {
+    _NETRC_MACHINE = 'fc2'
+    _TESTS = [{
         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
         'md5': 'a6ebe8ebe0396518689d963774a54eb7',
         'info_dict': {
@@ -24,12 +26,57 @@ class FC2IE(InfoExtractor):
             'ext': 'flv',
             'title': 'Boxing again with Puff',
         },
-    }
+    }, {
+        'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
+        'info_dict': {
+            'id': '20150125cEva0hDn',
+            'ext': 'mp4',
+        },
+        'params': {
+            'username': 'ytdl@yt-dl.org',
+            'password': '(snip)',
+            'skip': 'requires actual password'
+        }
+    }]
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None or password is None:
+            return False
+
+        # Log in
+        login_form_strs = {
+            'email': username,
+            'password': password,
+            'done': 'video',
+            'Submit': ' Login ',
+        }
+
+        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+        # chokes on unicode
+        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
+        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+        request = compat_urllib_request.Request(
+            'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
+
+        login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
+        if 'mode=redirect&login=done' not in login_results:
+            self.report_warning('unable to log in: bad username or password')
+            return False
+
+        # this is also needed
+        login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
+        self._download_webpage(
+            login_redir, None, note='Login redirect', errnote='Login redirect failed')
+
+        return True
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        self._login()
         webpage = self._download_webpage(url, video_id)
         self._downloader.cookiejar.clear_session_cookies()  # must clear
+        self._login()
 
         title = self._og_search_title(webpage)
         thumbnail = self._og_search_thumbnail(webpage)
@@ -46,7 +93,12 @@ class FC2IE(InfoExtractor):
         info = compat_urlparse.parse_qs(info_webpage)
 
         if 'err_code' in info:
-            raise ExtractorError('Error code: %s' % info['err_code'][0])
+            # most of the time we can still download wideo even if err_code is 403 or 602
+            self.report_warning(
+                'Error code was: %s... but still trying' % info['err_code'][0])
+
+        if 'filepath' not in info:
+            raise ExtractorError('Cannot download file. Are you logged in?')
 
         video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
         title_info = info.get('title')
index 68e2db94385bc7f51d7f27a68d61d5e4289ba0c9..0fb29de75228f0133c0b8d54a015fbd5d90954c1 100644 (file)
@@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
     _TEST = {
         'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+        'md5': '6269e8626fa1a891bf5369b386ae996a',
         'info_dict': {
             'id': '1165642',
             'ext': 'mp4',
@@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor):
             'upload_date': '20141120',
             'duration': 3960,
         },
-        'params': {
-            'skip_download': 'rtmpdump required',
-        }
     }
 
     def _real_extract(self, url):
index 6f3d2345b6f976ff9b380a04014f20df18483e6e..e46954b47449b11be795c17478e168a6a57af0fd 100644 (file)
@@ -2,18 +2,17 @@
 from __future__ import unicode_literals
 
 import json
-import re
 
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
-    unescapeHTML,
+    js_to_json,
 )
 
 
 class KrasViewIE(InfoExtractor):
     IE_DESC = 'Красвью'
-    _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
+    _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://krasview.ru/video/512228',
@@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
-        flashvars = json.loads(self._search_regex(
-            r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
+        flashvars = json.loads(js_to_json(self._search_regex(
+            r'video_Init\(({.+?})', webpage, 'flashvars')))
 
         video_url = flashvars['url']
-        title = unescapeHTML(flashvars['title'])
-        description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
-        thumbnail = flashvars['image']
-        duration = int(flashvars['duration'])
-        filesize = int(flashvars['size'])
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage, default=None)
+        thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
+        duration = int_or_none(flashvars.get('duration'))
         width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
         height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
 
@@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
             'duration': duration,
-            'filesize': filesize,
             'width': width,
             'height': height,
         }
index 5ebc78033a4abbb98310096c279fe11459b4a791..22a7263271f5c594cf879f7cadb63689b8599579 100644 (file)
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         webpage_url = self._MOBILE_TEMPLATE % mtvn_id
         req = compat_urllib_request.Request(webpage_url)
         # Otherwise we get a webpage that would execute some javascript
-        req.add_header('Youtubedl-user-agent', 'curl/7')
+        req.add_header('User-Agent', 'curl/7')
         webpage = self._download_webpage(req, mtvn_id,
                                          'Downloading mobile page')
         metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
index 26f361c93990b6b92ff31d2447b70f7e08263d00..e94f41362de0f267e14e4e3925bd0aa45c19375f 100644 (file)
@@ -102,6 +102,7 @@ class SmotriIE(InfoExtractor):
                 'uploader_id': 'mopeder',
                 'duration': 71,
                 'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
+                'upload_date': '20150114',
             },
         },
         # swf player
index fd47e71a2febb0d1accba74bb7bfd09cf56788a7..6a7b5e49de2d348cb76b88abd57bea59113138a6 100644 (file)
@@ -1,7 +1,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    qualities,
+)
 
 
 class TestTubeIE(InfoExtractor):
@@ -46,13 +49,22 @@ class TestTubeIE(InfoExtractor):
         self._sort_formats(formats)
 
         duration = int_or_none(info.get('duration'))
+        images = info.get('images')
+        thumbnails = None
+        preference = qualities(['mini', 'small', 'medium', 'large'])
+        if images:
+            thumbnails = [{
+                'id': thumbnail_id,
+                'url': img_url,
+                'preference': preference(thumbnail_id)
+            } for thumbnail_id, img_url in images.items()]
 
         return {
             'id': video_id,
             'display_id': display_id,
             'title': info['title'],
             'description': info.get('summary'),
-            'thumbnail': info.get('images', {}).get('large'),
+            'thumbnails': thumbnails,
             'uploader': info.get('show', {}).get('name'),
             'uploader_id': info.get('show', {}).get('slug'),
             'duration': duration,
index 0182d67ec2db7fee5aa0b6f7f5457f13cca0c305..d5023775857a8bd27ee0f19cffd8176a477be4fd 100644 (file)
@@ -3,50 +3,51 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    qualities,
+)
 
 
 class UbuIE(InfoExtractor):
     _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
     _TEST = {
         'url': 'http://ubu.com/film/her_noise.html',
-        'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
+        'md5': '138d5652618bf0f03878978db9bef1ee',
         'info_dict': {
             'id': 'her_noise',
-            'ext': 'mp4',
+            'ext': 'm4v',
             'title': 'Her Noise - The Making Of (2007)',
             'duration': 3600,
         },
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_regex(
             r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
 
         duration = int_or_none(self._html_search_regex(
-            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
-        if duration:
-            duration *= 60
+            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
+            invscale=60)
 
         formats = []
-
         FORMAT_REGEXES = [
-            ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
-            ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
+            ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
+            ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
         ]
-
+        preference = qualities([fid for fid, _ in FORMAT_REGEXES])
         for format_id, format_regex in FORMAT_REGEXES:
             m = re.search(format_regex, webpage)
             if m:
                 formats.append({
                     'url': m.group(1),
                     'format_id': format_id,
+                    'preference': preference(format_id),
                 })
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
index fc6e05fe0a2d60f42b1f5eef6d2d2e90994f86b7..27303031620a8c126797bcdd6207d2f2355c74be 100644 (file)
@@ -62,5 +62,7 @@ class VideoMegaIE(InfoExtractor):
             'title': title,
             'formats': formats,
             'thumbnail': thumbnail,
-            'http_referer': iframe_url,
+            'http_headers': {
+                'Referer': iframe_url,
+            },
         }
index 45466e31b7445f8dd8da742308dcc69f2ff1152f..313b9c15ddc576c226e1f4b8ee211bc816e2ebe9 100644 (file)
@@ -169,7 +169,9 @@ class WDRMobileIE(InfoExtractor):
             'title': mobj.group('title'),
             'age_limit': int(mobj.group('age_limit')),
             'url': url,
-            'user_agent': 'mobile',
+            'http_headers': {
+                'User-Agent': 'mobile',
+            },
         }
 
 
index b38b8349fc58cb61cf78912406ad35194c0639dc..dbc6f5528f16438ed1fc0f158ba2200efbf0d774 100644 (file)
@@ -200,6 +200,10 @@ def parseOpts(overrideArguments=None):
         '--playlist-end',
         dest='playlistend', metavar='NUMBER', default=None, type=int,
         help='playlist video to end at (default is last)')
+    selection.add_option(
+        '--playlist-items',
+        dest='playlist_items', metavar='ITEM_SPEC', default=None,
+        help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
     selection.add_option(
         '--match-title',
         dest='matchtitle', metavar='REGEX',
@@ -373,7 +377,7 @@ def parseOpts(overrideArguments=None):
     downloader.add_option(
         '-R', '--retries',
         dest='retries', metavar='RETRIES', default=10,
-        help='number of retries (default is %default)')
+        help='number of retries (default is %default), or "infinite".')
     downloader.add_option(
         '--buffer-size',
         dest='buffersize', metavar='SIZE', default='1024',
@@ -390,6 +394,10 @@ def parseOpts(overrideArguments=None):
         '--playlist-reverse',
         action='store_true',
         help='Download playlist videos in reverse order')
+    downloader.add_option(
+        '--xattr-set-filesize',
+        dest='xattr_set_filesize', action='store_true',
+        help='(experimental) set file xattribute ytdl.filesize with expected filesize')
     downloader.add_option(
         '--external-downloader',
         dest='external_downloader', metavar='COMMAND',
@@ -614,10 +622,6 @@ def parseOpts(overrideArguments=None):
         '--write-annotations',
         action='store_true', dest='writeannotations', default=False,
         help='write video annotations to a .annotation file')
-    filesystem.add_option(
-        '--write-thumbnail',
-        action='store_true', dest='writethumbnail', default=False,
-        help='write thumbnail image to disk')
     filesystem.add_option(
         '--load-info',
         dest='load_info_filename', metavar='FILE',
@@ -637,6 +641,20 @@ def parseOpts(overrideArguments=None):
         action='store_true', dest='rm_cachedir',
         help='Delete all filesystem cache files')
 
+    thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
+    thumbnail.add_option(
+        '--write-thumbnail',
+        action='store_true', dest='writethumbnail', default=False,
+        help='write thumbnail image to disk')
+    thumbnail.add_option(
+        '--write-all-thumbnails',
+        action='store_true', dest='write_all_thumbnails', default=False,
+        help='write all thumbnail image formats to disk')
+    thumbnail.add_option(
+        '--list-thumbnails',
+        action='store_true', dest='list_thumbnails', default=False,
+        help='Simulate and list all available thumbnail formats')
+
     postproc = optparse.OptionGroup(parser, 'Post-processing Options')
     postproc.add_option(
         '-x', '--extract-audio',
@@ -702,6 +720,7 @@ def parseOpts(overrideArguments=None):
     parser.add_option_group(selection)
     parser.add_option_group(downloader)
     parser.add_option_group(filesystem)
+    parser.add_option_group(thumbnail)
     parser.add_option_group(verbosity)
     parser.add_option_group(workarounds)
     parser.add_option_group(video_format)
index 2970d02a1a535d3f86296456e13e8bee130d3c93..b8c52af74768053d27b4642173022af3b2c6723d 100644 (file)
@@ -606,11 +606,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             if 'Accept-encoding' in req.headers:
                 del req.headers['Accept-encoding']
             del req.headers['Youtubedl-no-compression']
-        if 'Youtubedl-user-agent' in req.headers:
-            if 'User-agent' in req.headers:
-                del req.headers['User-agent']
-            req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
-            del req.headers['Youtubedl-user-agent']
 
         if sys.version_info < (2, 7) and '#' in req.get_full_url():
             # Python 2.6 is brain-dead when it comes to fragments
@@ -1664,3 +1659,11 @@ def determine_protocol(info_dict):
         return 'f4m'
 
     return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+    """ Render a list of rows, each as a list of values """
+    table = [header_row] + data
+    max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+    format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+    return '\n'.join(format_str % tuple(row) for row in table)