Merge branch 'compat-getenv-and-expanduser' of https://github.com/dstftw/youtube...
[youtube-dl] / youtube_dl / YoutubeDL.py
index f5ca33d453a89359405d468dc994337dfec7910b..242affb5b8dbbc88ebf6806642a0e4e61931d0c3 100755 (executable)
@@ -24,10 +24,12 @@ if os.name == 'nt':
 
 from .utils import (
     compat_cookiejar,
+    compat_expanduser,
     compat_http_client,
     compat_str,
     compat_urllib_error,
     compat_urllib_request,
+    escape_url,
     ContentTooShortError,
     date_from_str,
     DateRange,
@@ -57,6 +59,7 @@ from .utils import (
     YoutubeDLHandler,
     prepend_extension,
 )
+from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
 from .downloader import get_suitable_downloader
 from .postprocessor import FFmpegMergerPP
@@ -105,6 +108,8 @@ class YoutubeDL(object):
     forcefilename:     Force printing final filename.
     forceduration:     Force printing duration.
     forcejson:         Force printing info_dict as JSON.
+    dump_single_json:  Force printing the info_dict of the whole playlist
+                       (or video) as a single JSON line.
     simulate:          Do not download the video files.
     format:            Video format code.
     format_limit:      Highest quality format to try.
@@ -133,7 +138,7 @@ class YoutubeDL(object):
     daterange:         A DateRange object, download only if the upload_date is in the range.
     skip_download:     Skip the actual download of the video file
     cachedir:          Location of the cache files in the filesystem.
-                       None to disable filesystem cache.
+                       False to disable filesystem cache.
     noplaylist:        Download single video instead of a playlist if in doubt.
     age_limit:         An integer representing the user's age in years.
                        Unsuitable videos for the given age are skipped.
@@ -162,6 +167,9 @@ class YoutubeDL(object):
     default_search:    Prepend this string if an input url is not valid.
                        'auto' for elaborate guessing
     encoding:          Use this encoding instead of the system-specified.
+    extract_flat:      Do not resolve URLs, return the immediate result.
+                       Pass in 'in_playlist' to only show this behavior for
+                       playlist items.
 
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -171,6 +179,7 @@ class YoutubeDL(object):
     The following options are used by the post processors:
     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                        otherwise prefer avconv.
+    exec_cmd:          Arbitrary command to run after downloading
     """
 
     params = None
@@ -193,6 +202,7 @@ class YoutubeDL(object):
         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
         self._err_file = sys.stderr
         self.params = params
+        self.cache = Cache(self)
 
         if params.get('bidi_workaround', False):
             try:
@@ -223,11 +233,11 @@ class YoutubeDL(object):
 
         if (sys.version_info >= (3,) and sys.platform != 'win32' and
                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
-                and not params['restrictfilenames']):
+                and not params.get('restrictfilenames', False)):
             # On Python 3, the Unicode filesystem API will throw errors (#1474)
             self.report_warning(
                 'Assuming --restrict-filenames since file system encoding '
-                'cannot encode all charactes. '
+                'cannot encode all characters. '
                 'Set the LC_ALL environment variable to fix this.')
             self.params['restrictfilenames'] = True
 
@@ -275,7 +285,7 @@ class YoutubeDL(object):
             return message
 
         assert hasattr(self, '_output_process')
-        assert type(message) == type('')
+        assert isinstance(message, compat_str)
         line_count = message.count('\n') + 1
         self._output_process.stdin.write((message + '\n').encode('utf-8'))
         self._output_process.stdin.flush()
@@ -303,7 +313,7 @@ class YoutubeDL(object):
 
     def to_stderr(self, message):
         """Print message to stderr."""
-        assert type(message) == type('')
+        assert isinstance(message, compat_str)
         if self.params.get('logger'):
             self.params['logger'].error(message)
         else:
@@ -423,7 +433,7 @@ class YoutubeDL(object):
             autonumber_templ = '%0' + str(autonumber_size) + 'd'
             template_dict['autonumber'] = autonumber_templ % self._num_downloads
             if template_dict.get('playlist_index') is not None:
-                template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
+                template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
             if template_dict.get('resolution') is None:
                 if template_dict.get('width') and template_dict.get('height'):
                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
@@ -442,7 +452,7 @@ class YoutubeDL(object):
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
-            tmpl = os.path.expanduser(outtmpl)
+            tmpl = compat_expanduser(outtmpl)
             filename = tmpl % template_dict
             return filename
         except ValueError as err:
@@ -479,7 +489,10 @@ class YoutubeDL(object):
                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
         age_limit = self.params.get('age_limit')
         if age_limit is not None:
-            if age_limit < info_dict.get('age_limit', 0):
+            actual_age_limit = info_dict.get('age_limit')
+            if actual_age_limit is None:
+                actual_age_limit = 0
+            if age_limit < actual_age_limit:
                 return 'Skipping "' + title + '" because it is age restricted'
         if self.in_download_archive(info_dict):
             return '%s has already been recorded in archive' % video_title
@@ -558,7 +571,16 @@ class YoutubeDL(object):
         Returns the resolved ie_result.
         """
 
-        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
+        result_type = ie_result.get('_type', 'video')
+
+        if result_type in ('url', 'url_transparent'):
+            extract_flat = self.params.get('extract_flat', False)
+            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
+                    extract_flat is True):
+                if self.params.get('forcejson', False):
+                    self.to_stdout(json.dumps(ie_result))
+                return ie_result
+
         if result_type == 'video':
             self.add_extra_info(ie_result, extra_info)
             return self.process_video_result(ie_result, download=download)
@@ -627,6 +649,7 @@ class YoutubeDL(object):
             for i, entry in enumerate(entries, 1):
                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
                 extra = {
+                    'n_entries': n_entries,
                     'playlist': playlist,
                     'playlist_index': i + playliststart,
                     'extractor': ie_result['extractor'],
@@ -694,7 +717,7 @@ class YoutubeDL(object):
             if video_formats:
                 return video_formats[0]
         else:
-            extensions = ['mp4', 'flv', 'webm', '3gp']
+            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
             if format_spec in extensions:
                 filter_f = lambda f: f['ext'] == format_spec
             else:
@@ -795,28 +818,29 @@ class YoutubeDL(object):
         if req_format in ('-1', 'all'):
             formats_to_download = formats
         else:
-            # We can accept formats requested in the format: 34/5/best, we pick
-            # the first that is available, starting from left
-            req_formats = req_format.split('/')
-            for rf in req_formats:
-                if re.match(r'.+?\+.+?', rf) is not None:
-                    # Two formats have been requested like '137+139'
-                    format_1, format_2 = rf.split('+')
-                    formats_info = (self.select_format(format_1, formats),
-                        self.select_format(format_2, formats))
-                    if all(formats_info):
-                        selected_format = {
-                            'requested_formats': formats_info,
-                            'format': rf,
-                            'ext': formats_info[0]['ext'],
-                        }
+            for rfstr in req_format.split(','):
+                # We can accept formats requested in the format: 34/5/best, we pick
+                # the first that is available, starting from left
+                req_formats = rfstr.split('/')
+                for rf in req_formats:
+                    if re.match(r'.+?\+.+?', rf) is not None:
+                        # Two formats have been requested like '137+139'
+                        format_1, format_2 = rf.split('+')
+                        formats_info = (self.select_format(format_1, formats),
+                            self.select_format(format_2, formats))
+                        if all(formats_info):
+                            selected_format = {
+                                'requested_formats': formats_info,
+                                'format': rf,
+                                'ext': formats_info[0]['ext'],
+                            }
+                        else:
+                            selected_format = None
                     else:
-                        selected_format = None
-                else:
-                    selected_format = self.select_format(rf, formats)
-                if selected_format is not None:
-                    formats_to_download = [selected_format]
-                    break
+                        selected_format = self.select_format(rf, formats)
+                    if selected_format is not None:
+                        formats_to_download.append(selected_format)
+                        break
         if not formats_to_download:
             raise ExtractorError('requested format not available',
                                  expected=True)
@@ -849,7 +873,7 @@ class YoutubeDL(object):
         # Keep for backwards compatibility
         info_dict['stitle'] = info_dict['title']
 
-        if not 'format' in info_dict:
+        if 'format' not in info_dict:
             info_dict['format'] = info_dict['ext']
 
         reason = self._match_entry(info_dict)
@@ -882,6 +906,8 @@ class YoutubeDL(object):
         if self.params.get('forcejson', False):
             info_dict['_filename'] = filename
             self.to_stdout(json.dumps(info_dict))
+        if self.params.get('dump_single_json', False):
+            info_dict['_filename'] = filename
 
         # Do nothing else if in simulate mode
         if self.params.get('simulate', False):
@@ -999,7 +1025,7 @@ class YoutubeDL(object):
                     if info_dict.get('requested_formats') is not None:
                         downloaded = []
                         success = True
-                        merger = FFmpegMergerPP(self)
+                        merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
                         if not merger._get_executable():
                             postprocessors = []
                             self.report_warning('You have requested multiple '
@@ -1049,12 +1075,15 @@ class YoutubeDL(object):
         for url in url_list:
             try:
                 #It also downloads the videos
-                self.extract_info(url)
+                res = self.extract_info(url)
             except UnavailableVideoError:
                 self.report_error('unable to download video')
             except MaxDownloadsReached:
                 self.to_screen('[info] Maximum number of downloaded files reached.')
                 raise
+            else:
+                if self.params.get('dump_single_json', False):
+                    self.to_stdout(json.dumps(res))
 
         return self._download_retcode
 
@@ -1228,27 +1257,44 @@ class YoutubeDL(object):
 
     def urlopen(self, req):
         """ Start an HTTP download """
+
+        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+        # always respected by websites, some tend to give out URLs with non percent-encoded
+        # non-ASCII characters (see telemb.py, ard.py [#3412])
+        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+        # To work around aforementioned issue we will replace request's original URL with
+        # percent-encoded one
+        req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
+        url = req if req_is_string else req.get_full_url()
+        url_escaped = escape_url(url)
+
+        # Substitute URL if any change after escaping
+        if url != url_escaped:
+            if req_is_string:
+                req = url_escaped
+            else:
+                req = compat_urllib_request.Request(
+                    url_escaped, data=req.data, headers=req.headers,
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):
         if not self.params.get('verbose'):
             return
 
+        if type('') is not compat_str:
+            # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
+            self.report_warning(
+                'Your Python is broken! Update to a newer and supported version')
+
         encoding_str = (
             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
                 locale.getpreferredencoding(),
                 sys.getfilesystemencoding(),
                 sys.stdout.encoding,
                 self.get_encoding()))
-        try:
-            write_string(encoding_str, encoding=None)
-        except:
-            errmsg = 'Failed to write encoding string %r' % encoding_str
-            try:
-                sys.stdout.write(errmsg)
-            except:
-                pass
-            raise IOError(errmsg)
+        write_string(encoding_str, encoding=None)
 
         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
         try: