_ Git - youtube-dl/blob - youtube_dl/__init__.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 __authors__  = (
   5     'Ricardo Garcia Gonzalez',
   6     'Danny Colligan',
   7     'Benjamin Johnson',
   8     'Vasyl\' Vavrychuk',
   9     'Witold Baryluk',
  10     'Paweł Paprota',
  11     'Gergely Imreh',
  12     'Rogério Brito',
  13     'Philipp Hagemeister',
  14     'Sören Schulze',
  15     'Kevin Ngo',
  16     'Ori Avtalion',
  17     'shizeeg',
  18     'Filippo Valsorda',
  19     'Christian Albrecht',
  20     'Dave Vasilevsky',
  21     'Jaime Marquínez Ferrándiz',
  22     'Jeff Crouse',
  23     'Osama Khalid',
  24     'Michael Walter',
  25     'M. Yasoob Ullah Khalid',
  26     'Julien Fraichard',
  27     'Johny Mo Swag',
  28     'Axel Noack',
  29     'Albert Kim',
  30     'Pierre Rudloff',
  31     'Huarong Huo',
  32     'Ismael Mejía',
  33     'Steffan \'Ruirize\' James',
  34     'Andras Elso',
  35     'Jelle van der Waa',
  36     'Marcin Cieślak',
  37     'Anton Larionov',
  38     'Takuya Tsuchida',
  39     'Sergey M.',
  40     'Michael Orlitzky',
  41     'Chris Gahan',
  42     'Saimadhav Heblikar',
  43     'Mike Col',
  44     'Oleg Prutz',
  45     'pulpe',
  46     'Andreas Schmitz',
  47     'Michael Kaiser',
  48     'Niklas Laxström',
  49     'David Triendl',
  50     'Anthony Weems',
  51     'David Wagner',
  52     'Juan C. Olivares',
  53     'Mattias Harrysson',
  54     'phaer',
  55     'Sainyam Kapoor',
  56     'Nicolas Évrard',
  57     'Jason Normore',
  58     'Hoje Lee',
  59     'Adam Thalhammer',
  60     'Georg Jähnig',
  61     'Ralf Haring',
  62     'Koki Takahashi',
  63     'Ariset Llerena',
  64     'Adam Malcontenti-Wilson',
  65     'Tobias Bell',
  66     'Naglis Jonaitis',
  67     'Charles Chen',
  68     'Hassaan Ali',
  69     'Dobrosław Żybort',
  70     'David Fabijan',
  71     'Sebastian Haas',
  72     'Alexander Kirk',
  73     'Erik Johnson',
  74     'Keith Beckman',
  75     'Ole Ernst',
  76     'Aaron McDaniel (mcd1992)',
  77     'Magnus Kolstad',
  78     'Hari Padmanaban',
  79     'Carlos Ramos',
  80     '5moufl',
  81     'lenaten',
  82     'Dennis Scheiba',
  83     'Damon Timm',
  84     'winwon',
  85     'Xavier Beynon',
  86     'Gabriel Schubiner',
  87 )
  88
  89 __license__ = 'Public Domain'
  90
  91 import codecs
  92 import io
  93 import os
  94 import random
  95 import sys
  96
  97
  98 from .options import (
  99     parseOpts,
 100 )
 101 from .utils import (
 102     compat_expanduser,
 103     compat_getpass,
 104     compat_print,
 105     DateRange,
 106     DEFAULT_OUTTMPL,
 107     decodeOption,
 108     DownloadError,
 109     MaxDownloadsReached,
 110     preferredencoding,
 111     read_batch_urls,
 112     SameFileError,
 113     setproctitle,
 114     std_headers,
 115     write_string,
 116 )
 117 from .update import update_self
 118 from .downloader import (
 119     FileDownloader,
 120 )
 121 from .extractor import gen_extractors
 122 from .YoutubeDL import YoutubeDL
 123 from .postprocessor import (
 124     AtomicParsleyPP,
 125     FFmpegAudioFixPP,
 126     FFmpegMetadataPP,
 127     FFmpegVideoConvertor,
 128     FFmpegExtractAudioPP,
 129     FFmpegEmbedSubtitlePP,
 130     XAttrMetadataPP,
 131     ExecAfterDownloadPP,
 132 )
 133
 134
 135 def _real_main(argv=None):
 136     # Compatibility fixes for Windows
 137     if sys.platform == 'win32':
 138         # https://github.com/rg3/youtube-dl/issues/820
 139         codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
 140
 141     setproctitle(u'youtube-dl')
 142
 143     parser, opts, args = parseOpts(argv)
 144
 145     # Set user agent
 146     if opts.user_agent is not None:
 147         std_headers['User-Agent'] = opts.user_agent
 148
 149     # Set referer
 150     if opts.referer is not None:
 151         std_headers['Referer'] = opts.referer
 152
 153     # Custom HTTP headers
 154     if opts.headers is not None:
 155         for h in opts.headers:
 156             if h.find(':', 1) < 0:
 157                 parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
 158             key, value = h.split(':', 2)
 159             if opts.verbose:
 160                 write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
 161             std_headers[key] = value
 162
 163     # Dump user agent
 164     if opts.dump_user_agent:
 165         compat_print(std_headers['User-Agent'])
 166         sys.exit(0)
 167
 168     # Batch file verification
 169     batch_urls = []
 170     if opts.batchfile is not None:
 171         try:
 172             if opts.batchfile == '-':
 173                 batchfd = sys.stdin
 174             else:
 175                 batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
 176             batch_urls = read_batch_urls(batchfd)
 177             if opts.verbose:
 178                 write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
 179         except IOError:
 180             sys.exit(u'ERROR: batch file could not be read')
 181     all_urls = batch_urls + args
 182     all_urls = [url.strip() for url in all_urls]
 183     _enc = preferredencoding()
 184     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
 185
 186     extractors = gen_extractors()
 187
 188     if opts.list_extractors:
 189         for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
 190             compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
 191             matchedUrls = [url for url in all_urls if ie.suitable(url)]
 192             for mu in matchedUrls:
 193                 compat_print(u'  ' + mu)
 194         sys.exit(0)
 195     if opts.list_extractor_descriptions:
 196         for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
 197             if not ie._WORKING:
 198                 continue
 199             desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
 200             if desc is False:
 201                 continue
 202             if hasattr(ie, 'SEARCH_KEY'):
 203                 _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny')
 204                 _COUNTS = (u'', u'5', u'10', u'all')
 205                 desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
 206             compat_print(desc)
 207         sys.exit(0)
 208
 209
 210     # Conflicting, missing and erroneous options
 211     if opts.usenetrc and (opts.username is not None or opts.password is not None):
 212         parser.error(u'using .netrc conflicts with giving username/password')
 213     if opts.password is not None and opts.username is None:
 214         parser.error(u'account username missing\n')
 215     if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
 216         parser.error(u'using output template conflicts with using title, video ID or auto number')
 217     if opts.usetitle and opts.useid:
 218         parser.error(u'using title conflicts with using video ID')
 219     if opts.username is not None and opts.password is None:
 220         opts.password = compat_getpass(u'Type account password and press [Return]: ')
 221     if opts.ratelimit is not None:
 222         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
 223         if numeric_limit is None:
 224             parser.error(u'invalid rate limit specified')
 225         opts.ratelimit = numeric_limit
 226     if opts.min_filesize is not None:
 227         numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
 228         if numeric_limit is None:
 229             parser.error(u'invalid min_filesize specified')
 230         opts.min_filesize = numeric_limit
 231     if opts.max_filesize is not None:
 232         numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
 233         if numeric_limit is None:
 234             parser.error(u'invalid max_filesize specified')
 235         opts.max_filesize = numeric_limit
 236     if opts.retries is not None:
 237         try:
 238             opts.retries = int(opts.retries)
 239         except (TypeError, ValueError):
 240             parser.error(u'invalid retry count specified')
 241     if opts.buffersize is not None:
 242         numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
 243         if numeric_buffersize is None:
 244             parser.error(u'invalid buffer size specified')
 245         opts.buffersize = numeric_buffersize
 246     if opts.playliststart <= 0:
 247         raise ValueError(u'Playlist start must be positive')
 248     if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
 249         raise ValueError(u'Playlist end must be greater than playlist start')
 250     if opts.extractaudio:
 251         if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
 252             parser.error(u'invalid audio format specified')
 253     if opts.audioquality:
 254         opts.audioquality = opts.audioquality.strip('k').strip('K')
 255         if not opts.audioquality.isdigit():
 256             parser.error(u'invalid audio quality specified')
 257     if opts.recodevideo is not None:
 258         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
 259             parser.error(u'invalid video recode format specified')
 260     if opts.date is not None:
 261         date = DateRange.day(opts.date)
 262     else:
 263         date = DateRange(opts.dateafter, opts.datebefore)
 264
 265     # Do not download videos when there are audio-only formats
 266     if opts.extractaudio and not opts.keepvideo and opts.format is None:
 267         opts.format = 'bestaudio/best'
 268
 269     # --all-sub automatically sets --write-sub if --write-auto-sub is not given
 270     # this was the old behaviour if only --all-sub was given.
 271     if opts.allsubtitles and (opts.writeautomaticsub == False):
 272         opts.writesubtitles = True
 273
 274     if sys.version_info < (3,):
 275         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
 276         if opts.outtmpl is not None:
 277             opts.outtmpl = opts.outtmpl.decode(preferredencoding())
 278     outtmpl =((opts.outtmpl is not None and opts.outtmpl)
 279             or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
 280             or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
 281             or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
 282             or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
 283             or (opts.useid and u'%(id)s.%(ext)s')
 284             or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
 285             or DEFAULT_OUTTMPL)
 286     if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
 287         parser.error(u'Cannot download a video and extract audio into the same'
 288                      u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
 289                      u' template'.format(outtmpl))
 290
 291     any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
 292     download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
 293
 294     ydl_opts = {
 295         'usenetrc': opts.usenetrc,
 296         'username': opts.username,
 297         'password': opts.password,
 298         'twofactor': opts.twofactor,
 299         'videopassword': opts.videopassword,
 300         'quiet': (opts.quiet or any_printing),
 301         'no_warnings': opts.no_warnings,
 302         'forceurl': opts.geturl,
 303         'forcetitle': opts.gettitle,
 304         'forceid': opts.getid,
 305         'forcethumbnail': opts.getthumbnail,
 306         'forcedescription': opts.getdescription,
 307         'forceduration': opts.getduration,
 308         'forcefilename': opts.getfilename,
 309         'forceformat': opts.getformat,
 310         'forcejson': opts.dumpjson,
 311         'dump_single_json': opts.dump_single_json,
 312         'simulate': opts.simulate or any_printing,
 313         'skip_download': opts.skip_download,
 314         'format': opts.format,
 315         'format_limit': opts.format_limit,
 316         'listformats': opts.listformats,
 317         'outtmpl': outtmpl,
 318         'autonumber_size': opts.autonumber_size,
 319         'restrictfilenames': opts.restrictfilenames,
 320         'ignoreerrors': opts.ignoreerrors,
 321         'ratelimit': opts.ratelimit,
 322         'nooverwrites': opts.nooverwrites,
 323         'retries': opts.retries,
 324         'buffersize': opts.buffersize,
 325         'noresizebuffer': opts.noresizebuffer,
 326         'continuedl': opts.continue_dl,
 327         'noprogress': opts.noprogress,
 328         'progress_with_newline': opts.progress_with_newline,
 329         'playliststart': opts.playliststart,
 330         'playlistend': opts.playlistend,
 331         'noplaylist': opts.noplaylist,
 332         'logtostderr': opts.outtmpl == '-',
 333         'consoletitle': opts.consoletitle,
 334         'nopart': opts.nopart,
 335         'updatetime': opts.updatetime,
 336         'writedescription': opts.writedescription,
 337         'writeannotations': opts.writeannotations,
 338         'writeinfojson': opts.writeinfojson,
 339         'writethumbnail': opts.writethumbnail,
 340         'writesubtitles': opts.writesubtitles,
 341         'writeautomaticsub': opts.writeautomaticsub,
 342         'allsubtitles': opts.allsubtitles,
 343         'listsubtitles': opts.listsubtitles,
 344         'subtitlesformat': opts.subtitlesformat,
 345         'subtitleslangs': opts.subtitleslangs,
 346         'matchtitle': decodeOption(opts.matchtitle),
 347         'rejecttitle': decodeOption(opts.rejecttitle),
 348         'max_downloads': opts.max_downloads,
 349         'prefer_free_formats': opts.prefer_free_formats,
 350         'verbose': opts.verbose,
 351         'dump_intermediate_pages': opts.dump_intermediate_pages,
 352         'write_pages': opts.write_pages,
 353         'test': opts.test,
 354         'keepvideo': opts.keepvideo,
 355         'min_filesize': opts.min_filesize,
 356         'max_filesize': opts.max_filesize,
 357         'min_views': opts.min_views,
 358         'max_views': opts.max_views,
 359         'daterange': date,
 360         'cachedir': opts.cachedir,
 361         'youtube_print_sig_code': opts.youtube_print_sig_code,
 362         'age_limit': opts.age_limit,
 363         'download_archive': download_archive_fn,
 364         'cookiefile': opts.cookiefile,
 365         'nocheckcertificate': opts.no_check_certificate,
 366         'prefer_insecure': opts.prefer_insecure,
 367         'proxy': opts.proxy,
 368         'socket_timeout': opts.socket_timeout,
 369         'bidi_workaround': opts.bidi_workaround,
 370         'debug_printtraffic': opts.debug_printtraffic,
 371         'prefer_ffmpeg': opts.prefer_ffmpeg,
 372         'include_ads': opts.include_ads,
 373         'default_search': opts.default_search,
 374         'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
 375         'encoding': opts.encoding,
 376         'exec_cmd': opts.exec_cmd,
 377         'extract_flat': opts.extract_flat,
 378     }
 379
 380     with YoutubeDL(ydl_opts) as ydl:
 381         ydl.print_debug_header()
 382         ydl.add_default_info_extractors()
 383
 384         # PostProcessors
 385         # Add the metadata pp first, the other pps will copy it
 386         if opts.addmetadata:
 387             ydl.add_post_processor(FFmpegMetadataPP())
 388         if opts.extractaudio:
 389             ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
 390         if opts.recodevideo:
 391             ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
 392         if opts.embedsubtitles:
 393             ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
 394         if opts.xattrs:
 395             ydl.add_post_processor(XAttrMetadataPP())
 396         if opts.embedthumbnail:
 397             if not opts.addmetadata:
 398                 ydl.add_post_processor(FFmpegAudioFixPP())
 399             ydl.add_post_processor(AtomicParsleyPP())
 400
 401
 402         # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
 403         # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
 404         if opts.exec_cmd:
 405             ydl.add_post_processor(ExecAfterDownloadPP(
 406                 verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
 407
 408         # Update version
 409         if opts.update_self:
 410             update_self(ydl.to_screen, opts.verbose)
 411
 412         # Remove cache dir
 413         if opts.rm_cachedir:
 414             ydl.cache.remove()
 415
 416         # Maybe do nothing
 417         if (len(all_urls) < 1) and (opts.load_info_filename is None):
 418             if not (opts.update_self or opts.rm_cachedir):
 419                 parser.error(u'you must provide at least one URL')
 420             else:
 421                 sys.exit()
 422
 423         try:
 424             if opts.load_info_filename is not None:
 425                 retcode = ydl.download_with_info_file(opts.load_info_filename)
 426             else:
 427                 retcode = ydl.download(all_urls)
 428         except MaxDownloadsReached:
 429             ydl.to_screen(u'--max-download limit reached, aborting.')
 430             retcode = 101
 431
 432     sys.exit(retcode)
 433
 434
 435 def main(argv=None):
 436     try:
 437         _real_main(argv)
 438     except DownloadError:
 439         sys.exit(1)
 440     except SameFileError:
 441         sys.exit(u'ERROR: fixed output name but more than one file to download')
 442     except KeyboardInterrupt:
 443         sys.exit(u'\nERROR: Interrupted by user')