X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=a827414dca3d00bc4b1f3573f97ea50cb165230c;hb=2cc779f497ae20d6e0e28fc546a25723cfea631a;hp=5f4c93ea370d794e62f0e6eee4f1f81cecd1300b;hpb=7b67b60773b70ac74edd3993eeea6fe9b790c664;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5f4c93ea3..a827414dc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -65,6 +65,7 @@ from .utils import ( locked_file, make_HTTPS_handler, MaxDownloadsReached, + orderedSet, PagedList, parse_filesize, PerRequestProxyHandler, @@ -87,11 +88,13 @@ from .utils import ( version_tuple, write_json_file, write_string, + YoutubeDLCookieJar, YoutubeDLCookieProcessor, YoutubeDLHandler, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER +from .extractor.openload import PhantomJSwrapper from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( @@ -209,7 +212,7 @@ class YoutubeDL(object): At the moment, this is only supported by YouTube. proxy: URL of the proxy server to use geo_verification_proxy: URL of the proxy to use for IP address verification - on geo-restricted sites. (Experimental) + on geo-restricted sites. socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi @@ -257,7 +260,7 @@ class YoutubeDL(object): - "warn": only emit a warning - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) - source_address: (Experimental) Client-side IP address to bind to. + source_address: Client-side IP address to bind to. call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download when @@ -279,11 +282,14 @@ class YoutubeDL(object): match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For - HTTP header (experimental) + HTTP header geo_bypass_country: Two-letter ISO 3166-2 country code that will be used for explicit geographic restriction bypassing via faking - X-Forwarded-For HTTP header (experimental) + X-Forwarded-For HTTP header + geo_bypass_ip_block: + IP range in CIDR notation that will be used similarly to + geo_bypass_country The following options determine which downloader is picked: external_downloader: Executable of the external downloader to call. @@ -296,13 +302,20 @@ class YoutubeDL(object): the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize, external_downloader_args, hls_use_mpegts. + xattr_set_filesize, external_downloader_args, hls_use_mpegts, + http_chunk_size. The following options are used by the post processors: - prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, - otherwise prefer avconv. + prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, + otherwise prefer ffmpeg. postprocessor_args: A list of additional command-line arguments for the postprocessor. + + The following options are used by the Youtube extractor: + youtube_include_dash_manifest: If True (default), DASH manifests and related + data will be downloaded and processed by extractor. + You can reduce network I/O by disabling it if you don't + care about DASH. """ _NUMERIC_FIELDS = set(( @@ -523,6 +536,8 @@ class YoutubeDL(object): def save_console_title(self): if not self.params.get('consoletitle', False): return + if self.params.get('simulate', False): + return if compat_os_name != 'nt' and 'TERM' in os.environ: # Save the title on stack self._write_string('\033[22;0t', self._screen_file) @@ -530,6 +545,8 @@ class YoutubeDL(object): def restore_console_title(self): if not self.params.get('consoletitle', False): return + if self.params.get('simulate', False): + return if compat_os_name != 'nt' and 'TERM' in os.environ: # Restore the title from stack self._write_string('\033[23;0t', self._screen_file) @@ -542,7 +559,7 @@ class YoutubeDL(object): self.restore_console_title() if self.params.get('cookiefile') is not None: - self.cookiejar.save() + self.cookiejar.save(ignore_discard=True, ignore_expires=True) def trouble(self, message=None, tb=None): """Determine action to take when a download problem appears. @@ -901,15 +918,25 @@ class YoutubeDL(object): yield int(item) else: yield int(string_segment) - playlistitems = iter_playlistitems(playlistitems_str) + playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) ie_entries = ie_result['entries'] + + def make_playlistitems_entries(list_ie_entries): + num_entries = len(list_ie_entries) + return [ + list_ie_entries[i - 1] for i in playlistitems + if -num_entries <= i - 1 < num_entries] + + def report_download(num_entries): + self.to_screen( + '[%s] playlist %s: Downloading %d videos' % + (ie_result['extractor'], playlist, num_entries)) + if isinstance(ie_entries, list): n_all_entries = len(ie_entries) if playlistitems: - entries = [ - ie_entries[i - 1] for i in playlistitems - if -n_all_entries <= i - 1 < n_all_entries] + entries = make_playlistitems_entries(ie_entries) else: entries = ie_entries[playliststart:playlistend] n_entries = len(entries) @@ -927,20 +954,16 @@ class YoutubeDL(object): entries = ie_entries.getslice( playliststart, playlistend) n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, n_entries)) + report_download(n_entries) else: # iterable if playlistitems: - entry_list = list(ie_entries) - entries = [entry_list[i - 1] for i in playlistitems] + entries = make_playlistitems_entries(list(itertools.islice( + ie_entries, 0, max(playlistitems)))) else: entries = list(itertools.islice( ie_entries, playliststart, playlistend)) n_entries = len(entries) - self.to_screen( - '[%s] playlist %s: Downloading %d videos' % - (ie_result['extractor'], playlist, n_entries)) + report_download(n_entries) if self.params.get('playlistreverse', False): entries = entries[::-1] @@ -961,6 +984,8 @@ class YoutubeDL(object): 'playlist': playlist, 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), + 'playlist_uploader': ie_result.get('uploader'), + 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], @@ -1016,7 +1041,7 @@ class YoutubeDL(object): '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s* - (?Pwidth|height|tbr|abr|vbr|asr|filesize|fps) + (?Pwidth|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps) \s*(?P%s)(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) $ @@ -1038,21 +1063,24 @@ class YoutubeDL(object): if not m: STR_OPERATORS = { '=': operator.eq, - '!=': operator.ne, '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) \s*(?Pext|acodec|vcodec|container|protocol|format_id) - \s*(?P%s)(?P\s*\?)? + \s*(?P!\s*)?(?P%s)(?P\s*\?)? \s*(?P[a-zA-Z0-9._-]+) \s*$ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) m = str_operator_rex.search(filter_spec) if m: comparison_value = m.group('value') - op = STR_OPERATORS[m.group('op')] + str_op = STR_OPERATORS[m.group('op')] + if m.group('negation'): + op = lambda attr, value: not str_op + else: + op = str_op if not m: raise ValueError('Invalid filter specification %r' % filter_spec) @@ -1065,22 +1093,27 @@ class YoutubeDL(object): return _filter def _default_format_spec(self, info_dict, download=True): - req_format_list = [] - def can_have_partial_formats(): + def can_merge(): + merger = FFmpegMergerPP(self) + return merger.available and merger.can_merge() + + def prefer_best(): if self.params.get('simulate', False): - return True + return False if not download: - return True - if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': return False + if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-': + return True if info_dict.get('is_live'): - return False - merger = FFmpegMergerPP(self) - return merger.available and merger.can_merge() - if can_have_partial_formats(): - req_format_list.append('bestvideo+bestaudio') - req_format_list.append('best') + return True + if not can_merge(): + return True + return False + + req_format_list = ['bestvideo+bestaudio', 'best'] + if prefer_best(): + req_format_list.reverse() return '/'.join(req_format_list) def build_format_selector(self, format_spec): @@ -1453,23 +1486,28 @@ class YoutubeDL(object): if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + for cc_kind in ('subtitles', 'automatic_captions'): + cc = info_dict.get(cc_kind) + if cc: + for _, subtitle in cc.items(): + for subtitle_format in subtitle: + if subtitle_format.get('url'): + subtitle_format['url'] = sanitize_url(subtitle_format['url']) + if subtitle_format.get('ext') is None: + subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() + + automatic_captions = info_dict.get('automatic_captions') subtitles = info_dict.get('subtitles') - if subtitles: - for _, subtitle in subtitles.items(): - for subtitle_format in subtitle: - if subtitle_format.get('url'): - subtitle_format['url'] = sanitize_url(subtitle_format['url']) - if subtitle_format.get('ext') is None: - subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() if self.params.get('listsubtitles', False): if 'automatic_captions' in info_dict: - self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') + self.list_subtitles( + info_dict['id'], automatic_captions, 'automatic captions') self.list_subtitles(info_dict['id'], subtitles, 'subtitles') return + info_dict['requested_subtitles'] = self.process_subtitles( - info_dict['id'], subtitles, - info_dict.get('automatic_captions')) + info_dict['id'], subtitles, automatic_captions) # We now pick which formats have to be downloaded if info_dict.get('formats') is None: @@ -1710,12 +1748,17 @@ class YoutubeDL(object): if filename is None: return - try: - dn = os.path.dirname(sanitize_path(encodeFilename(filename))) - if dn and not os.path.exists(dn): - os.makedirs(dn) - except (OSError, IOError) as err: - self.report_error('unable to create directory ' + error_to_compat_str(err)) + def ensure_dir_exists(path): + try: + dn = os.path.dirname(path) + if dn and not os.path.exists(dn): + os.makedirs(dn) + return True + except (OSError, IOError) as err: + self.report_error('unable to create directory ' + error_to_compat_str(err)) + return False + + if not ensure_dir_exists(sanitize_path(encodeFilename(filename))): return if self.params.get('writedescription', False): @@ -1758,29 +1801,30 @@ class YoutubeDL(object): ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] - if sub_info.get('data') is not None: - sub_data = sub_info['data'] + sub_filename = subtitles_filename(filename, sub_lang, sub_format) + if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): + self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) else: - try: - sub_data = ie._download_webpage( - sub_info['url'], info_dict['id'], note=False) - except ExtractorError as err: - self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, error_to_compat_str(err.cause))) - continue - try: - sub_filename = subtitles_filename(filename, sub_lang, sub_format) - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): - self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) + self.to_screen('[info] Writing video subtitles to: ' + sub_filename) + if sub_info.get('data') is not None: + try: + # Use newline='' to prevent conversion of newline characters + # See https://github.com/rg3/youtube-dl/issues/10268 + with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: + subfile.write(sub_info['data']) + except (OSError, IOError): + self.report_error('Cannot write subtitles file ' + sub_filename) + return else: - self.to_screen('[info] Writing video subtitles to: ' + sub_filename) - # Use newline='' to prevent conversion of newline characters - # See https://github.com/rg3/youtube-dl/issues/10268 - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: - subfile.write(sub_data) - except (OSError, IOError): - self.report_error('Cannot write subtitles file ' + sub_filename) - return + try: + sub_data = ie._request_webpage( + sub_info['url'], info_dict['id'], note=False).read() + with io.open(encodeFilename(sub_filename), 'wb') as subfile: + subfile.write(sub_data) + except (ExtractorError, IOError, OSError, ValueError) as err: + self.report_warning('Unable to download subtitle for "%s": %s' % + (sub_lang, error_to_compat_str(err))) + continue if self.params.get('writeinfojson', False): infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) @@ -1821,7 +1865,7 @@ class YoutubeDL(object): def compatible_formats(formats): video, audio = formats # Check extension - video_ext, audio_ext = audio.get('ext'), video.get('ext') + video_ext, audio_ext = video.get('ext'), audio.get('ext') if video_ext and audio_ext: COMPATIBLE_EXTS = ( ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'), @@ -1853,8 +1897,11 @@ class YoutubeDL(object): for f in requested_formats: new_info = dict(info_dict) new_info.update(f) - fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) + fname = prepend_extension( + self.prepare_filename(new_info), + 'f%s' % f['format_id'], new_info['ext']) + if not ensure_dir_exists(fname): + return downloaded.append(fname) partial_success = dl(fname, new_info) success = success and partial_success @@ -2203,11 +2250,20 @@ class YoutubeDL(object): sys.exc_clear() except Exception: pass - self._write_string('[debug] Python version %s - %s\n' % ( - platform.python_version(), platform_name())) + + def python_implementation(): + impl_name = platform.python_implementation() + if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'): + return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] + return impl_name + + self._write_string('[debug] Python version %s (%s) - %s\n' % ( + platform.python_version(), python_implementation(), + platform_name())) exe_versions = FFmpegPostProcessor.get_versions(self) exe_versions['rtmpdump'] = rtmpdump_version() + exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( '%s %s' % (exe, v) for exe, v in sorted(exe_versions.items()) @@ -2245,10 +2301,9 @@ class YoutubeDL(object): self.cookiejar = compat_cookiejar.CookieJar() else: opts_cookiefile = expand_path(opts_cookiefile) - self.cookiejar = compat_cookiejar.MozillaCookieJar( - opts_cookiefile) + self.cookiejar = YoutubeDLCookieJar(opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): - self.cookiejar.load() + self.cookiejar.load(ignore_discard=True, ignore_expires=True) cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) if opts_proxy is not None: