X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FYoutubeDL.py;h=a89a71a250e3c02cb1157bfc0970e308474e4e89;hb=e0986e31cfd57392aaf3cc84b17fbf32c6134ff6;hp=e1bd408431b0b6b5c16b0a208dbec5ad198dfc4f;hpb=874e05975b9dde433dc633f3060c3b9013775343;p=youtube-dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e1bd40843..a89a71a25 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -24,9 +24,6 @@ import time import tokenize import traceback -if os.name == 'nt': - import ctypes - from .compat import ( compat_basestring, compat_cookiejar, @@ -34,6 +31,7 @@ from .compat import ( compat_get_terminal_size, compat_http_client, compat_kwargs, + compat_os_name, compat_str, compat_tokenize_tokenize, compat_urllib_error, @@ -41,6 +39,8 @@ from .compat import ( compat_urllib_request_DataHandler, ) from .utils import ( + age_restricted, + args_to_str, ContentTooShortError, date_from_str, DateRange, @@ -60,13 +60,16 @@ from .utils import ( PagedList, parse_filesize, PerRequestProxyHandler, - PostProcessingError, platform_name, + PostProcessingError, preferredencoding, + prepend_extension, render_table, + replace_extension, SameFileError, sanitize_filename, sanitize_path, + sanitize_url, sanitized_Request, std_headers, subtitles_filename, @@ -77,16 +80,13 @@ from .utils import ( write_string, YoutubeDLCookieProcessor, YoutubeDLHandler, - prepend_extension, - replace_extension, - args_to_str, - age_restricted, ) from .cache import Cache -from .extractor import get_info_extractor, gen_extractors +from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( + FFmpegFixupM3u8PP, FFmpegFixupM4aPP, FFmpegFixupStretchedPP, FFmpegMergerPP, @@ -95,6 +95,9 @@ from .postprocessor import ( ) from .version import __version__ +if compat_os_name == 'nt': + import ctypes + class YoutubeDL(object): """YoutubeDL class. @@ -263,7 +266,7 @@ class YoutubeDL(object): the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize, external_downloader_args. + xattr_set_filesize, external_downloader_args, hls_use_mpegts. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -375,8 +378,9 @@ class YoutubeDL(object): def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) - self._ies_instances[ie.ie_key()] = ie - ie.set_downloader(self) + if not isinstance(ie, type): + self._ies_instances[ie.ie_key()] = ie + ie.set_downloader(self) def get_info_extractor(self, ie_key): """ @@ -394,7 +398,7 @@ class YoutubeDL(object): """ Add the InfoExtractors returned by gen_extractors to the end of the list """ - for ie in gen_extractors(): + for ie in gen_extractor_classes(): self.add_info_extractor(ie) def add_post_processor(self, pp): @@ -450,7 +454,7 @@ class YoutubeDL(object): def to_console_title(self, message): if not self.params.get('consoletitle', False): return - if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): + if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) @@ -521,7 +525,7 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' @@ -533,7 +537,7 @@ class YoutubeDL(object): Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' @@ -566,7 +570,7 @@ class YoutubeDL(object): elif template_dict.get('height'): template_dict['resolution'] = '%sp' % template_dict['height'] elif template_dict.get('width'): - template_dict['resolution'] = '?x%d' % template_dict['width'] + template_dict['resolution'] = '%dx?' % template_dict['width'] sanitize = lambda k, v: sanitize_filename( compat_str(v), @@ -605,12 +609,12 @@ class YoutubeDL(object): if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' - date = info_dict.get('upload_date', None) + date = info_dict.get('upload_date') if date is not None: dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) - view_count = info_dict.get('view_count', None) + view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') if min_views is not None and view_count < min_views: @@ -658,6 +662,7 @@ class YoutubeDL(object): if not ie.suitable(url): continue + ie = self.get_info_extractor(ie.ie_key()) if not ie.working(): self.report_warning('The program functionality for this site has been marked as broken, ' 'and will probably not work.') @@ -707,7 +712,6 @@ class YoutubeDL(object): It will also download the videos if 'download'. Returns the resolved ie_result. """ - result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): @@ -736,7 +740,7 @@ class YoutubeDL(object): force_properties = dict( (k, v) for k, v in ie_result.items() if v is not None) - for f in ('_type', 'url'): + for f in ('_type', 'url', 'ie_key'): if f in force_properties: del force_properties[f] new_result = info.copy() @@ -748,18 +752,18 @@ class YoutubeDL(object): new_result, download=download, extra_info=extra_info) elif result_type == 'playlist' or result_type == 'multi_video': # We process each entry in the playlist - playlist = ie_result.get('title', None) or ie_result.get('id', None) + playlist = ie_result.get('title') or ie_result.get('id') self.to_screen('[download] Downloading playlist: %s' % playlist) playlist_results = [] playliststart = self.params.get('playliststart', 1) - 1 - playlistend = self.params.get('playlistend', None) + playlistend = self.params.get('playlistend') # For backwards compatibility, interpret -1 as whole list if playlistend == -1: playlistend = None - playlistitems_str = self.params.get('playlist_items', None) + playlistitems_str = self.params.get('playlist_items') playlistitems = None if playlistitems_str is not None: def iter_playlistitems(format): @@ -783,7 +787,7 @@ class YoutubeDL(object): entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( - "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % + '[%s] playlist %s: Collected %d video ids (downloading %d of them)' % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, PagedList): if playlistitems: @@ -797,7 +801,7 @@ class YoutubeDL(object): playliststart, playlistend) n_entries = len(entries) self.to_screen( - "[%s] playlist %s: Downloading %d videos" % + '[%s] playlist %s: Downloading %d videos' % (ie_result['extractor'], playlist, n_entries)) else: # iterable if playlistitems: @@ -808,7 +812,7 @@ class YoutubeDL(object): ie_entries, playliststart, playlistend)) n_entries = len(entries) self.to_screen( - "[%s] playlist %s: Downloading %d videos" % + '[%s] playlist %s: Downloading %d videos' % (ie_result['extractor'], playlist, n_entries)) if self.params.get('playlistreverse', False): @@ -904,7 +908,7 @@ class YoutubeDL(object): '*=': lambda attr, value: value in attr, } str_operator_rex = re.compile(r'''(?x) - \s*(?Pext|acodec|vcodec|container|protocol) + \s*(?Pext|acodec|vcodec|container|protocol|format_id) \s*(?P%s)(?P\s*\?)? \s*(?P[a-zA-Z0-9._-]+) \s*$ @@ -1228,12 +1232,20 @@ class YoutubeDL(object): t.get('preference'), t.get('width'), t.get('height'), t.get('id'), t.get('url'))) for i, t in enumerate(thumbnails): + t['url'] = sanitize_url(t['url']) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('id') is None: t['id'] = '%d' % i - if thumbnails and 'thumbnail' not in info_dict: + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + return + + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnail'] = sanitize_url(thumbnail) + elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] if 'display_id' not in info_dict and 'id' in info_dict: @@ -1258,6 +1270,8 @@ class YoutubeDL(object): if subtitles: for _, subtitle in subtitles.items(): for subtitle_format in subtitle: + if subtitle_format.get('url'): + subtitle_format['url'] = sanitize_url(subtitle_format['url']) if 'ext' not in subtitle_format: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() @@ -1287,8 +1301,13 @@ class YoutubeDL(object): if 'url' not in format: raise ExtractorError('Missing "url" key in result (index %d)' % i) + format['url'] = sanitize_url(format['url']) + if format.get('format_id') is None: format['format_id'] = compat_str(i) + else: + # Sanitize format_id from characters used in format selector expression + format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id']) format_id = format['format_id'] if format_id not in formats_dict: formats_dict[format_id] = [] @@ -1331,15 +1350,11 @@ class YoutubeDL(object): if self.params.get('listformats'): self.list_formats(info_dict) return - if self.params.get('list_thumbnails'): - self.list_thumbnails(info_dict) - return req_format = self.params.get('format') if req_format is None: req_format_list = [] if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and - info_dict['extractor'] in ['youtube', 'ted'] and not info_dict.get('is_live')): merger = FFmpegMergerPP(self) if merger.available and merger.can_merge(): @@ -1630,12 +1645,14 @@ class YoutubeDL(object): self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - if success: + if success and filename != '-': # Fixup content fixup_policy = self.params.get('fixup') if fixup_policy is None: fixup_policy = 'detect_or_warn' + INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.' + stretched_ratio = info_dict.get('stretched_ratio') if stretched_ratio is not None and stretched_ratio != 1: if fixup_policy == 'warn': @@ -1648,15 +1665,18 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(stretched_pp) else: self.report_warning( - '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( - info_dict['id'], stretched_ratio)) + '%s: Non-uniform pixel ratio (%s). %s' + % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') - if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': + if (info_dict.get('requested_formats') is None and + info_dict.get('container') == 'm4a_dash'): if fixup_policy == 'warn': - self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( - info_dict['id'])) + self.report_warning( + '%s: writing DASH m4a. ' + 'Only some players support this container.' + % info_dict['id']) elif fixup_policy == 'detect_or_warn': fixup_pp = FFmpegFixupM4aPP(self) if fixup_pp.available: @@ -1664,8 +1684,27 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(fixup_pp) else: self.report_warning( - '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( - info_dict['id'])) + '%s: writing DASH m4a. ' + 'Only some players support this container. %s' + % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) + else: + assert fixup_policy in ('ignore', 'never') + + if (info_dict.get('protocol') == 'm3u8_native' or + info_dict.get('protocol') == 'm3u8' and + self.params.get('hls_prefer_native')): + if fixup_policy == 'warn': + self.report_warning('%s: malformated aac bitstream.' % ( + info_dict['id'])) + elif fixup_policy == 'detect_or_warn': + fixup_pp = FFmpegFixupM3u8PP(self) + if fixup_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(fixup_pp) + else: + self.report_warning( + '%s: malformated aac bitstream. %s' + % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') @@ -1796,7 +1835,7 @@ class YoutubeDL(object): else: res = '%sp' % format['height'] elif format.get('width') is not None: - res = '?x%d' % format['width'] + res = '%dx?' % format['width'] else: res = default return res @@ -1808,7 +1847,7 @@ class YoutubeDL(object): if fdict.get('language'): if res: res += ' ' - res += '[%s]' % fdict['language'] + res += '[%s] ' % fdict['language'] if fdict.get('format_note') is not None: res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: @@ -1829,7 +1868,9 @@ class YoutubeDL(object): if fdict.get('vbr') is not None: res += '%4dk' % fdict['vbr'] if fdict.get('fps') is not None: - res += ', %sfps' % fdict['fps'] + if res: + res += ', ' + res += '%sfps' % fdict['fps'] if fdict.get('acodec') is not None: if res: res += ', ' @@ -1872,13 +1913,8 @@ class YoutubeDL(object): def list_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if not thumbnails: - tn_url = info_dict.get('thumbnail') - if tn_url: - thumbnails = [{'id': '0', 'url': tn_url}] - else: - self.to_screen( - '[info] No thumbnails present for %s' % info_dict['id']) - return + self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) + return self.to_screen( '[info] Thumbnails for %s:' % info_dict['id']) @@ -1923,6 +1959,8 @@ class YoutubeDL(object): write_string(encoding_str, encoding=None) self._write_string('[debug] youtube-dl version ' + __version__ + '\n') + if _LAZY_LOADER: + self._write_string('[debug] Lazy loading extractors enabled' + '\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'],