2 # -*- coding: utf-8 -*-
4 from __future__ import absolute_import
28 compat_urllib_request,
47 UnavailableVideoError,
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
57 class YoutubeDL(object):
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 download_archive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
135 socket_timeout: Time to wait for unresponsive hosts, in seconds
137 The following parameters are not used by YoutubeDL itself, they are used by
139 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
140 noresizebuffer, retries, continuedl, noprogress, consoletitle
146 _download_retcode = None
147 _num_downloads = None
150 def __init__(self, params=None):
151 """Create a FileDownloader object with the given options."""
153 self._ies_instances = {}
155 self._progress_hooks = []
156 self._download_retcode = 0
157 self._num_downloads = 0
158 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
159 self.params = {} if params is None else params
161 if (sys.version_info >= (3,) and sys.platform != 'win32' and
162 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
163 and not params['restrictfilenames']):
164 # On Python 3, the Unicode filesystem API will throw errors (#1474)
166 u'Assuming --restrict-filenames since file system encoding '
167 u'cannot encode all charactes. '
168 u'Set the LC_ALL environment variable to fix this.')
169 self.params['restrictfilenames'] = True
171 self.fd = FileDownloader(self, self.params)
173 if '%(stitle)s' in self.params.get('outtmpl', ''):
174 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
178 def add_info_extractor(self, ie):
179 """Add an InfoExtractor object to the end of the list."""
181 self._ies_instances[ie.ie_key()] = ie
182 ie.set_downloader(self)
184 def get_info_extractor(self, ie_key):
186 Get an instance of an IE with name ie_key, it will try to get one from
187 the _ies list, if there's no instance it will create a new one and add
188 it to the extractor list.
190 ie = self._ies_instances.get(ie_key)
192 ie = get_info_extractor(ie_key)()
193 self.add_info_extractor(ie)
196 def add_default_info_extractors(self):
198 Add the InfoExtractors returned by gen_extractors to the end of the list
200 for ie in gen_extractors():
201 self.add_info_extractor(ie)
203 def add_post_processor(self, pp):
204 """Add a PostProcessor object to the end of the chain."""
206 pp.set_downloader(self)
208 def to_screen(self, message, skip_eol=False):
209 """Print message to stdout if not in quiet mode."""
210 if self.params.get('logger'):
211 self.params['logger'].debug(message)
212 elif not self.params.get('quiet', False):
213 terminator = [u'\n', u''][skip_eol]
214 output = message + terminator
215 write_string(output, self._screen_file)
217 def to_stderr(self, message):
218 """Print message to stderr."""
219 assert type(message) == type(u'')
220 if self.params.get('logger'):
221 self.params['logger'].error(message)
223 output = message + u'\n'
224 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
225 output = output.encode(preferredencoding())
226 sys.stderr.write(output)
228 def to_console_title(self, message):
229 if not self.params.get('consoletitle', False):
231 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
232 # c_wchar_p() might not be necessary if `message` is
233 # already of type unicode()
234 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
235 elif 'TERM' in os.environ:
236 write_string(u'\033]0;%s\007' % message, self._screen_file)
238 def save_console_title(self):
239 if not self.params.get('consoletitle', False):
241 if 'TERM' in os.environ:
242 # Save the title on stack
243 write_string(u'\033[22;0t', self._screen_file)
245 def restore_console_title(self):
246 if not self.params.get('consoletitle', False):
248 if 'TERM' in os.environ:
249 # Restore the title from stack
250 write_string(u'\033[23;0t', self._screen_file)
253 self.save_console_title()
256 def __exit__(self, *args):
257 self.restore_console_title()
259 if self.params.get('cookiefile') is not None:
260 self.cookiejar.save()
262 def trouble(self, message=None, tb=None):
263 """Determine action to take when a download problem appears.
265 Depending on if the downloader has been configured to ignore
266 download errors or not, this method may throw an exception or
267 not when errors are found, after printing the message.
269 tb, if given, is additional traceback information.
271 if message is not None:
272 self.to_stderr(message)
273 if self.params.get('verbose'):
275 if sys.exc_info()[0]: # if .trouble has been called from an except block
277 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
278 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
279 tb += compat_str(traceback.format_exc())
281 tb_data = traceback.format_list(traceback.extract_stack())
282 tb = u''.join(tb_data)
284 if not self.params.get('ignoreerrors', False):
285 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
286 exc_info = sys.exc_info()[1].exc_info
288 exc_info = sys.exc_info()
289 raise DownloadError(message, exc_info)
290 self._download_retcode = 1
292 def report_warning(self, message):
294 Print the message to stderr, it will be prefixed with 'WARNING:'
295 If stderr is a tty file the 'WARNING:' will be colored
297 if sys.stderr.isatty() and os.name != 'nt':
298 _msg_header = u'\033[0;33mWARNING:\033[0m'
300 _msg_header = u'WARNING:'
301 warning_message = u'%s %s' % (_msg_header, message)
302 self.to_stderr(warning_message)
304 def report_error(self, message, tb=None):
306 Do the same as trouble, but prefixes the message with 'ERROR:', colored
307 in red if stderr is a tty file.
309 if sys.stderr.isatty() and os.name != 'nt':
310 _msg_header = u'\033[0;31mERROR:\033[0m'
312 _msg_header = u'ERROR:'
313 error_message = u'%s %s' % (_msg_header, message)
314 self.trouble(error_message, tb)
316 def report_writedescription(self, descfn):
317 """ Report that the description file is being written """
318 self.to_screen(u'[info] Writing video description to: ' + descfn)
320 def report_writesubtitles(self, sub_filename):
321 """ Report that the subtitles file is being written """
322 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
324 def report_writeinfojson(self, infofn):
325 """ Report that the metadata file has been written """
326 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
328 def report_writeannotations(self, annofn):
329 """ Report that the annotations file has been written. """
330 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
332 def report_file_already_downloaded(self, file_name):
333 """Report file has already been fully downloaded."""
335 self.to_screen(u'[download] %s has already been downloaded' % file_name)
336 except UnicodeEncodeError:
337 self.to_screen(u'[download] The file has already been downloaded')
339 def increment_downloads(self):
340 """Increment the ordinal that assigns a number to each file."""
341 self._num_downloads += 1
343 def prepare_filename(self, info_dict):
344 """Generate the output filename."""
346 template_dict = dict(info_dict)
348 template_dict['epoch'] = int(time.time())
349 autonumber_size = self.params.get('autonumber_size')
350 if autonumber_size is None:
352 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
353 template_dict['autonumber'] = autonumber_templ % self._num_downloads
354 if template_dict.get('playlist_index') is not None:
355 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
357 sanitize = lambda k, v: sanitize_filename(
358 u'NA' if v is None else compat_str(v),
359 restricted=self.params.get('restrictfilenames'),
361 template_dict = dict((k, sanitize(k, v))
362 for k, v in template_dict.items())
364 tmpl = os.path.expanduser(self.params['outtmpl'])
365 filename = tmpl % template_dict
367 except KeyError as err:
368 self.report_error(u'Erroneous output template')
370 except ValueError as err:
371 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
374 def _match_entry(self, info_dict):
375 """ Returns None iff the file should be downloaded """
377 if 'title' in info_dict:
378 # This can happen when we're just evaluating the playlist
379 title = info_dict['title']
380 matchtitle = self.params.get('matchtitle', False)
382 if not re.search(matchtitle, title, re.IGNORECASE):
383 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
384 rejecttitle = self.params.get('rejecttitle', False)
386 if re.search(rejecttitle, title, re.IGNORECASE):
387 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
388 date = info_dict.get('upload_date', None)
390 dateRange = self.params.get('daterange', DateRange())
391 if date not in dateRange:
392 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
393 age_limit = self.params.get('age_limit')
394 if age_limit is not None:
395 if age_limit < info_dict.get('age_limit', 0):
396 return u'Skipping "' + title + '" because it is age restricted'
397 if self.in_download_archive(info_dict):
398 return (u'%s has already been recorded in archive'
399 % info_dict.get('title', info_dict.get('id', u'video')))
403 def add_extra_info(info_dict, extra_info):
404 '''Set the keys from extra_info in info dict if they are missing'''
405 for key, value in extra_info.items():
406 info_dict.setdefault(key, value)
408 def extract_info(self, url, download=True, ie_key=None, extra_info={},
411 Returns a list with a dictionary for each video we find.
412 If 'download', also downloads the videos.
413 extra_info is a dict containing the extra values to add to each result
417 ies = [self.get_info_extractor(ie_key)]
422 if not ie.suitable(url):
426 self.report_warning(u'The program functionality for this site has been marked as broken, '
427 u'and will probably not work.')
430 ie_result = ie.extract(url)
431 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
433 if isinstance(ie_result, list):
434 # Backwards compatibility: old IE result format
436 '_type': 'compat_list',
437 'entries': ie_result,
439 self.add_extra_info(ie_result,
441 'extractor': ie.IE_NAME,
443 'extractor_key': ie.ie_key(),
446 return self.process_ie_result(ie_result, download, extra_info)
449 except ExtractorError as de: # An error we somewhat expected
450 self.report_error(compat_str(de), de.format_traceback())
452 except Exception as e:
453 if self.params.get('ignoreerrors', False):
454 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
459 self.report_error(u'no suitable InfoExtractor: %s' % url)
461 def process_ie_result(self, ie_result, download=True, extra_info={}):
463 Take the result of the ie(may be modified) and resolve all unresolved
464 references (URLs, playlist items).
466 It will also download the videos if 'download'.
467 Returns the resolved ie_result.
470 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
471 if result_type == 'video':
472 self.add_extra_info(ie_result, extra_info)
473 return self.process_video_result(ie_result, download=download)
474 elif result_type == 'url':
475 # We have to add extra_info to the results because it may be
476 # contained in a playlist
477 return self.extract_info(ie_result['url'],
479 ie_key=ie_result.get('ie_key'),
480 extra_info=extra_info)
481 elif result_type == 'url_transparent':
482 # Use the information from the embedding page
483 info = self.extract_info(
484 ie_result['url'], ie_key=ie_result.get('ie_key'),
485 extra_info=extra_info, download=False, process=False)
487 def make_result(embedded_info):
488 new_result = ie_result.copy()
489 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
490 'entries', 'urlhandle', 'ie_key', 'duration',
491 'subtitles', 'annotations', 'format',
492 'thumbnail', 'thumbnails'):
495 if f in embedded_info:
496 new_result[f] = embedded_info[f]
498 new_result = make_result(info)
500 assert new_result.get('_type') != 'url_transparent'
501 if new_result.get('_type') == 'compat_list':
502 new_result['entries'] = [
503 make_result(e) for e in new_result['entries']]
505 return self.process_ie_result(
506 new_result, download=download, extra_info=extra_info)
507 elif result_type == 'playlist':
508 # We process each entry in the playlist
509 playlist = ie_result.get('title', None) or ie_result.get('id', None)
510 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
512 playlist_results = []
514 n_all_entries = len(ie_result['entries'])
515 playliststart = self.params.get('playliststart', 1) - 1
516 playlistend = self.params.get('playlistend', -1)
518 if playlistend == -1:
519 entries = ie_result['entries'][playliststart:]
521 entries = ie_result['entries'][playliststart:playlistend]
523 n_entries = len(entries)
525 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
526 (ie_result['extractor'], playlist, n_all_entries, n_entries))
528 for i, entry in enumerate(entries, 1):
529 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
531 'playlist': playlist,
532 'playlist_index': i + playliststart,
533 'extractor': ie_result['extractor'],
534 'webpage_url': ie_result['webpage_url'],
535 'extractor_key': ie_result['extractor_key'],
538 reason = self._match_entry(entry)
539 if reason is not None:
540 self.to_screen(u'[download] ' + reason)
543 entry_result = self.process_ie_result(entry,
546 playlist_results.append(entry_result)
547 ie_result['entries'] = playlist_results
549 elif result_type == 'compat_list':
551 self.add_extra_info(r,
553 'extractor': ie_result['extractor'],
554 'webpage_url': ie_result['webpage_url'],
555 'extractor_key': ie_result['extractor_key'],
558 ie_result['entries'] = [
559 self.process_ie_result(_fixup(r), download, extra_info)
560 for r in ie_result['entries']
564 raise Exception('Invalid result type: %s' % result_type)
566 def select_format(self, format_spec, available_formats):
567 if format_spec == 'best' or format_spec is None:
568 return available_formats[-1]
569 elif format_spec == 'worst':
570 return available_formats[0]
572 extensions = [u'mp4', u'flv', u'webm', u'3gp']
573 if format_spec in extensions:
574 filter_f = lambda f: f['ext'] == format_spec
576 filter_f = lambda f: f['format_id'] == format_spec
577 matches = list(filter(filter_f, available_formats))
582 def process_video_result(self, info_dict, download=True):
583 assert info_dict.get('_type', 'video') == 'video'
585 if 'playlist' not in info_dict:
586 # It isn't part of a playlist
587 info_dict['playlist'] = None
588 info_dict['playlist_index'] = None
590 # This extractors handle format selection themselves
591 if info_dict['extractor'] in [u'youtube', u'Youku']:
593 self.process_info(info_dict)
596 # We now pick which formats have to be downloaded
597 if info_dict.get('formats') is None:
598 # There's only one format available
599 formats = [info_dict]
601 formats = info_dict['formats']
603 # We check that all the formats have the format and format_id fields
604 for (i, format) in enumerate(formats):
605 if format.get('format_id') is None:
606 format['format_id'] = compat_str(i)
607 if format.get('format') is None:
608 format['format'] = u'{id} - {res}{note}'.format(
609 id=format['format_id'],
610 res=self.format_resolution(format),
611 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
613 # Automatically determine file extension if missing
614 if 'ext' not in format:
615 format['ext'] = determine_ext(format['url'])
617 if self.params.get('listformats', None):
618 self.list_formats(info_dict)
621 format_limit = self.params.get('format_limit', None)
623 formats = list(takewhile_inclusive(
624 lambda f: f['format_id'] != format_limit, formats
626 if self.params.get('prefer_free_formats'):
627 def _free_formats_key(f):
629 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
632 # We only compare the extension if they have the same height and width
633 return (f.get('height'), f.get('width'), ext_ord)
634 formats = sorted(formats, key=_free_formats_key)
636 req_format = self.params.get('format', 'best')
637 if req_format is None:
639 formats_to_download = []
640 # The -1 is for supporting YoutubeIE
641 if req_format in ('-1', 'all'):
642 formats_to_download = formats
644 # We can accept formats requestd in the format: 34/5/best, we pick
645 # the first that is available, starting from left
646 req_formats = req_format.split('/')
647 for rf in req_formats:
648 selected_format = self.select_format(rf, formats)
649 if selected_format is not None:
650 formats_to_download = [selected_format]
652 if not formats_to_download:
653 raise ExtractorError(u'requested format not available',
657 if len(formats_to_download) > 1:
658 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
659 for format in formats_to_download:
660 new_info = dict(info_dict)
661 new_info.update(format)
662 self.process_info(new_info)
663 # We update the info dict with the best quality format (backwards compatibility)
664 info_dict.update(formats_to_download[-1])
667 def process_info(self, info_dict):
668 """Process a single resolved IE result."""
670 assert info_dict.get('_type', 'video') == 'video'
671 #We increment the download the download count here to match the previous behaviour.
672 self.increment_downloads()
674 info_dict['fulltitle'] = info_dict['title']
675 if len(info_dict['title']) > 200:
676 info_dict['title'] = info_dict['title'][:197] + u'...'
678 # Keep for backwards compatibility
679 info_dict['stitle'] = info_dict['title']
681 if not 'format' in info_dict:
682 info_dict['format'] = info_dict['ext']
684 reason = self._match_entry(info_dict)
685 if reason is not None:
686 self.to_screen(u'[download] ' + reason)
689 max_downloads = self.params.get('max_downloads')
690 if max_downloads is not None:
691 if self._num_downloads > int(max_downloads):
692 raise MaxDownloadsReached()
694 filename = self.prepare_filename(info_dict)
697 if self.params.get('forcetitle', False):
698 compat_print(info_dict['fulltitle'])
699 if self.params.get('forceid', False):
700 compat_print(info_dict['id'])
701 if self.params.get('forceurl', False):
702 # For RTMP URLs, also include the playpath
703 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
704 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
705 compat_print(info_dict['thumbnail'])
706 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
707 compat_print(info_dict['description'])
708 if self.params.get('forcefilename', False) and filename is not None:
709 compat_print(filename)
710 if self.params.get('forceformat', False):
711 compat_print(info_dict['format'])
712 if self.params.get('forcejson', False):
713 compat_print(json.dumps(info_dict))
715 # Do nothing else if in simulate mode
716 if self.params.get('simulate', False):
723 dn = os.path.dirname(encodeFilename(filename))
724 if dn != '' and not os.path.exists(dn):
726 except (OSError, IOError) as err:
727 self.report_error(u'unable to create directory ' + compat_str(err))
730 if self.params.get('writedescription', False):
732 descfn = filename + u'.description'
733 self.report_writedescription(descfn)
734 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
735 descfile.write(info_dict['description'])
736 except (KeyError, TypeError):
737 self.report_warning(u'There\'s no description to write.')
738 except (OSError, IOError):
739 self.report_error(u'Cannot write description file ' + descfn)
742 if self.params.get('writeannotations', False):
744 annofn = filename + u'.annotations.xml'
745 self.report_writeannotations(annofn)
746 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
747 annofile.write(info_dict['annotations'])
748 except (KeyError, TypeError):
749 self.report_warning(u'There are no annotations to write.')
750 except (OSError, IOError):
751 self.report_error(u'Cannot write annotations file: ' + annofn)
754 subtitles_are_requested = any([self.params.get('writesubtitles', False),
755 self.params.get('writeautomaticsub')])
757 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
758 # subtitles download errors are already managed as troubles in relevant IE
759 # that way it will silently go on when used with unsupporting IE
760 subtitles = info_dict['subtitles']
761 sub_format = self.params.get('subtitlesformat', 'srt')
762 for sub_lang in subtitles.keys():
763 sub = subtitles[sub_lang]
767 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
768 self.report_writesubtitles(sub_filename)
769 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
771 except (OSError, IOError):
772 self.report_error(u'Cannot write subtitles file ' + descfn)
775 if self.params.get('writeinfojson', False):
776 infofn = os.path.splitext(filename)[0] + u'.info.json'
777 self.report_writeinfojson(infofn)
779 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
780 write_json_file(json_info_dict, encodeFilename(infofn))
781 except (OSError, IOError):
782 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
785 if self.params.get('writethumbnail', False):
786 if info_dict.get('thumbnail') is not None:
787 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
788 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
789 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
790 (info_dict['extractor'], info_dict['id']))
792 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
793 with open(thumb_filename, 'wb') as thumbf:
794 shutil.copyfileobj(uf, thumbf)
795 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
796 (info_dict['extractor'], info_dict['id'], thumb_filename))
797 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
798 self.report_warning(u'Unable to download thumbnail "%s": %s' %
799 (info_dict['thumbnail'], compat_str(err)))
801 if not self.params.get('skip_download', False):
802 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
806 success = self.fd._do_download(filename, info_dict)
807 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
808 self.report_error(u'unable to download video data: %s' % str(err))
810 except (OSError, IOError) as err:
811 raise UnavailableVideoError(err)
812 except (ContentTooShortError, ) as err:
813 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
818 self.post_process(filename, info_dict)
819 except (PostProcessingError) as err:
820 self.report_error(u'postprocessing: %s' % str(err))
823 self.record_download_archive(info_dict)
825 def download(self, url_list):
826 """Download a given list of URLs."""
827 if (len(url_list) > 1 and
828 '%' not in self.params['outtmpl']
829 and self.params.get('max_downloads') != 1):
830 raise SameFileError(self.params['outtmpl'])
834 #It also downloads the videos
835 self.extract_info(url)
836 except UnavailableVideoError:
837 self.report_error(u'unable to download video')
838 except MaxDownloadsReached:
839 self.to_screen(u'[info] Maximum number of downloaded files reached.')
842 return self._download_retcode
844 def post_process(self, filename, ie_info):
845 """Run all the postprocessors on the given file."""
847 info['filepath'] = filename
851 keep_video_wish, new_info = pp.run(info)
852 if keep_video_wish is not None:
854 keep_video = keep_video_wish
855 elif keep_video is None:
856 # No clear decision yet, let IE decide
857 keep_video = keep_video_wish
858 except PostProcessingError as e:
859 self.report_error(e.msg)
860 if keep_video is False and not self.params.get('keepvideo', False):
862 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
863 os.remove(encodeFilename(filename))
864 except (IOError, OSError):
865 self.report_warning(u'Unable to remove downloaded video file')
867 def _make_archive_id(self, info_dict):
868 # Future-proof against any change in case
869 # and backwards compatibility with prior versions
870 extractor = info_dict.get('extractor_key')
871 if extractor is None:
872 if 'id' in info_dict:
873 extractor = info_dict.get('ie_key') # key in a playlist
874 if extractor is None:
875 return None # Incomplete video information
876 return extractor.lower() + u' ' + info_dict['id']
878 def in_download_archive(self, info_dict):
879 fn = self.params.get('download_archive')
883 vid_id = self._make_archive_id(info_dict)
885 return False # Incomplete video information
888 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
889 for line in archive_file:
890 if line.strip() == vid_id:
892 except IOError as ioe:
893 if ioe.errno != errno.ENOENT:
897 def record_download_archive(self, info_dict):
898 fn = self.params.get('download_archive')
901 vid_id = self._make_archive_id(info_dict)
903 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
904 archive_file.write(vid_id + u'\n')
907 def format_resolution(format, default='unknown'):
908 if format.get('vcodec') == 'none':
910 if format.get('_resolution') is not None:
911 return format['_resolution']
912 if format.get('height') is not None:
913 if format.get('width') is not None:
914 res = u'%sx%s' % (format['width'], format['height'])
916 res = u'%sp' % format['height']
921 def list_formats(self, info_dict):
922 def format_note(fdict):
924 if fdict.get('format_note') is not None:
925 res += fdict['format_note'] + u' '
926 if (fdict.get('vcodec') is not None and
927 fdict.get('vcodec') != 'none'):
928 res += u'%-5s' % fdict['vcodec']
929 elif fdict.get('vbr') is not None:
931 if fdict.get('vbr') is not None:
932 res += u'@%4dk' % fdict['vbr']
933 if fdict.get('acodec') is not None:
936 res += u'%-5s' % fdict['acodec']
937 elif fdict.get('abr') is not None:
941 if fdict.get('abr') is not None:
942 res += u'@%3dk' % fdict['abr']
943 if fdict.get('filesize') is not None:
946 res += format_bytes(fdict['filesize'])
949 def line(format, idlen=20):
950 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
953 self.format_resolution(format),
957 formats = info_dict.get('formats', [info_dict])
958 idlen = max(len(u'format code'),
959 max(len(f['format_id']) for f in formats))
960 formats_s = [line(f, idlen) for f in formats]
962 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
963 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
966 'format_id': u'format code', 'ext': u'extension',
967 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
968 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
969 (info_dict['id'], header_line, u"\n".join(formats_s)))
971 def urlopen(self, req):
972 """ Start an HTTP download """
973 return self._opener.open(req)
975 def print_debug_header(self):
976 if not self.params.get('verbose'):
978 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
980 sp = subprocess.Popen(
981 ['git', 'rev-parse', '--short', 'HEAD'],
982 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
983 cwd=os.path.dirname(os.path.abspath(__file__)))
984 out, err = sp.communicate()
985 out = out.decode().strip()
986 if re.match('[0-9a-f]+', out):
987 write_string(u'[debug] Git HEAD: ' + out + u'\n')
993 write_string(u'[debug] Python version %s - %s' %
994 (platform.python_version(), platform_name()) + u'\n')
997 for handler in self._opener.handlers:
998 if hasattr(handler, 'proxies'):
999 proxy_map.update(handler.proxies)
1000 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1002 def _setup_opener(self):
1003 timeout_val = self.params.get('socket_timeout')
1004 timeout = 600 if timeout_val is None else float(timeout_val)
1006 opts_cookiefile = self.params.get('cookiefile')
1007 opts_proxy = self.params.get('proxy')
1009 if opts_cookiefile is None:
1010 self.cookiejar = compat_cookiejar.CookieJar()
1012 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1014 if os.access(opts_cookiefile, os.R_OK):
1015 self.cookiejar.load()
1017 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1019 if opts_proxy is not None:
1020 if opts_proxy == '':
1023 proxies = {'http': opts_proxy, 'https': opts_proxy}
1025 proxies = compat_urllib_request.getproxies()
1026 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1027 if 'http' in proxies and 'https' not in proxies:
1028 proxies['https'] = proxies['http']
1029 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1030 https_handler = make_HTTPS_handler(
1031 self.params.get('nocheckcertificate', False))
1032 opener = compat_urllib_request.build_opener(
1033 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1034 # Delete the default user-agent header, which would otherwise apply in
1035 # cases where our custom HTTP handler doesn't come into play
1036 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1037 opener.addheaders = []
1038 self._opener = opener
1040 # TODO remove this global modification
1041 compat_urllib_request.install_opener(opener)
1042 socket.setdefaulttimeout(timeout)