import io
import json
import os
+import platform
import re
import shutil
+import subprocess
import socket
import sys
import time
import ctypes
from .utils import (
+ compat_cookiejar,
compat_http_client,
compat_print,
compat_str,
encodeFilename,
ExtractorError,
locked_file,
+ make_HTTPS_handler,
MaxDownloadsReached,
PostProcessingError,
+ platform_name,
preferredencoding,
SameFileError,
sanitize_filename,
UnavailableVideoError,
write_json_file,
write_string,
+ YoutubeDLHandler,
)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
+from .version import __version__
class YoutubeDL(object):
playlistend: Playlist item to end at.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
+ logger: Log messages to a logging.Logger instance.
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
downloadarchive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
+ cookiefile: File name where cookies should be read from and dumped to.
+ nocheckcertificate:Do not verify SSL certificates
+ proxy: URL of the proxy server to use
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
if '%(stitle)s' in self.params['outtmpl']:
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+ self._setup_opener()
+
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
- if not self.params.get('quiet', False):
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ elif not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
write_string(output, self._screen_file)
def to_stderr(self, message):
"""Print message to stderr."""
assert type(message) == type(u'')
- output = message + u'\n'
- if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
- output = output.encode(preferredencoding())
- sys.stderr.write(output)
+ if self.params.get('logger'):
+ self.params['logger'].error(message)
+ else:
+ output = message + u'\n'
+ if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
+ output = output.encode(preferredencoding())
+ sys.stderr.write(output)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
def __exit__(self, *args):
self.restore_console_title()
+
+ if self.params.get('cookiefile') is not None:
+ self.cookiejar.save()
def fixed_template(self):
"""Checks if the output template is fixed."""
def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """
- title = info_dict['title']
- matchtitle = self.params.get('matchtitle', False)
- if matchtitle:
- if not re.search(matchtitle, title, re.IGNORECASE):
- return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
- rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle:
- if re.search(rejecttitle, title, re.IGNORECASE):
- return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None)
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
- return (u'%(title)s has already been recorded in archive'
- % info_dict)
+ return (u'%s has already been recorded in archive'
+ % info_dict.get('title', info_dict.get('id', u'video')))
return None
@staticmethod
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist':
- self.add_extra_info(ie_result, extra_info)
+
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
}
+
+ reason = self._match_entry(entry)
+ if reason is not None:
+ self.to_screen(u'[download] ' + reason)
+ continue
+
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
# Forced printings
if self.params.get('forcetitle', False):
- compat_print(info_dict['title'])
+ compat_print(info_dict['fulltitle'])
if self.params.get('forceid', False):
compat_print(info_dict['id'])
if self.params.get('forceurl', False):
for url in url_list:
try:
#It also downloads the videos
- videos = self.extract_info(url)
+ self.extract_info(url)
except UnavailableVideoError:
self.report_error(u'unable to download video')
except MaxDownloadsReached:
fn = self.params.get('download_archive')
if fn is None:
return False
- vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+ extractor = info_dict.get('extractor_id')
+ if extractor is None:
+ if 'id' in info_dict:
+ extractor = info_dict.get('ie_key') # key in a playlist
+ if extractor is None:
+ return False # Incomplete video information
+ # Future-proof against any change in case
+ # and backwards compatibility with prior versions
+ extractor = extractor.lower()
+ vid_id = extractor + u' ' + info_dict['id']
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
'_resolution': u'resolution', 'format_note': u'note'})
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, u"\n".join(formats_s)))
+
+ def urlopen(self, req):
+ """ Start an HTTP download """
+ return self._opener.open(req)
+
+ def print_debug_header(self):
+ if not self.params.get('verbose'):
+ return
+ write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
+ try:
+ sp = subprocess.Popen(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ cwd=os.path.dirname(os.path.abspath(__file__)))
+ out, err = sp.communicate()
+ out = out.decode().strip()
+ if re.match('[0-9a-f]+', out):
+ write_string(u'[debug] Git HEAD: ' + out + u'\n')
+ except:
+ try:
+ sys.exc_clear()
+ except:
+ pass
+ write_string(u'[debug] Python version %s - %s' %
+ (platform.python_version(), platform_name()) + u'\n')
+
+ proxy_map = {}
+ for handler in self._opener.handlers:
+ if hasattr(handler, 'proxies'):
+ proxy_map.update(handler.proxies)
+ write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
+
+ def _setup_opener(self, timeout=300):
+ opts_cookiefile = self.params.get('cookiefile')
+ opts_proxy = self.params.get('proxy')
+
+ if opts_cookiefile is None:
+ self.cookiejar = compat_cookiejar.CookieJar()
+ else:
+ self.cookiejar = compat_cookiejar.MozillaCookieJar(
+ opts_cookiefile)
+ if os.access(opts_cookiefile, os.R_OK):
+ self.cookiejar.load()
+
+ cookie_processor = compat_urllib_request.HTTPCookieProcessor(
+ self.cookiejar)
+ if opts_proxy is not None:
+ if opts_proxy == '':
+ proxies = {}
+ else:
+ proxies = {'http': opts_proxy, 'https': opts_proxy}
+ else:
+ proxies = compat_urllib_request.getproxies()
+ # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+ if 'http' in proxies and 'https' not in proxies:
+ proxies['https'] = proxies['http']
+ proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+ https_handler = make_HTTPS_handler(
+ self.params.get('nocheckcertificate', False))
+ opener = compat_urllib_request.build_opener(
+ https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+ opener.addheaders = []
+ self._opener = opener
+
+ # TODO remove this global modification
+ compat_urllib_request.install_opener(opener)
+ socket.setdefaulttimeout(timeout)
'Jelle van der Waa',
'Marcin Cieślak',
'Anton Larionov',
+ 'Takuya Tsuchida',
)
__license__ = 'Public Domain'
import codecs
-import collections
import getpass
import optparse
import os
import random
import re
import shlex
-import socket
import subprocess
import sys
-import traceback
-import platform
from .utils import (
- compat_cookiejar,
compat_print,
- compat_str,
- compat_urllib_request,
DateRange,
decodeOption,
determine_ext,
DownloadError,
get_cachedir,
- make_HTTPS_handler,
MaxDownloadsReached,
- platform_name,
preferredencoding,
SameFileError,
std_headers,
write_string,
- YoutubeDLHandler,
)
from .update import update_self
-from .version import __version__
from .FileDownloader import (
FileDownloader,
)
from .extractor import gen_extractors
+from .version import __version__
from .YoutubeDL import YoutubeDL
from .PostProcessor import (
FFmpegMetadataPP,
parser, opts, args = parseOpts(argv)
- # Open appropriate CookieJar
- if opts.cookiefile is None:
- jar = compat_cookiejar.CookieJar()
- else:
- try:
- jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
- if os.access(opts.cookiefile, os.R_OK):
- jar.load()
- except (IOError, OSError) as err:
- if opts.verbose:
- traceback.print_exc()
- write_string(u'ERROR: unable to open cookie file\n')
- sys.exit(101)
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
all_urls = batchurls + args
all_urls = [url.strip() for url in all_urls]
- opener = _setup_opener(jar=jar, opts=opts)
-
extractors = gen_extractors()
if opts.list_extractors:
if opts.retries is not None:
try:
opts.retries = int(opts.retries)
- except (TypeError, ValueError) as err:
+ except (TypeError, ValueError):
parser.error(u'invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
- except (TypeError, ValueError) as err:
+ except (TypeError, ValueError):
parser.error(u'invalid playlist start number specified')
try:
opts.playlistend = int(opts.playlistend)
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
raise ValueError(u'Playlist end must be greater than playlist start')
- except (TypeError, ValueError) as err:
+ except (TypeError, ValueError):
parser.error(u'invalid playlist end number specified')
if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': opts.download_archive,
+ 'cookiefile': opts.cookiefile,
+ 'nocheckcertificate': opts.no_check_certificate,
}
with YoutubeDL(ydl_opts) as ydl:
- if opts.verbose:
- write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
- try:
- sp = subprocess.Popen(
- ['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- write_string(u'[debug] Git HEAD: ' + out + u'\n')
- except:
- try:
- sys.exc_clear()
- except:
- pass
- write_string(u'[debug] Python version %s - %s' %
- (platform.python_version(), platform_name()) + u'\n')
-
- proxy_map = {}
- for handler in opener.handlers:
- if hasattr(handler, 'proxies'):
- proxy_map.update(handler.proxies)
- write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
-
+ ydl.print_debug_header()
ydl.add_default_info_extractors()
# PostProcessors
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
- # Dump cookie jar if requested
- if opts.cookiefile is not None:
- try:
- jar.save()
- except (IOError, OSError):
- sys.exit(u'ERROR: unable to save cookie jar')
-
sys.exit(retcode)
-def _setup_opener(jar=None, opts=None, timeout=300):
- if opts is None:
- FakeOptions = collections.namedtuple(
- 'FakeOptions', ['proxy', 'no_check_certificate'])
- opts = FakeOptions(proxy=None, no_check_certificate=False)
-
- cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
- if opts.proxy is not None:
- if opts.proxy == '':
- proxies = {}
- else:
- proxies = {'http': opts.proxy, 'https': opts.proxy}
- else:
- proxies = compat_urllib_request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
- if 'http' in proxies and 'https' not in proxies:
- proxies['https'] = proxies['http']
- proxy_handler = compat_urllib_request.ProxyHandler(proxies)
- https_handler = make_HTTPS_handler(opts)
- opener = compat_urllib_request.build_opener(
- https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
- # Delete the default user-agent header, which would otherwise apply in
- # cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
- opener.addheaders = []
- compat_urllib_request.install_opener(opener)
- socket.setdefaulttimeout(timeout)
- return opener
-
-
def main(argv=None):
try:
_real_main(argv)
import socket
import sys
import netrc
+ import xml.etree.ElementTree
from ..utils import (
compat_http_client,
compat_urllib_error,
- compat_urllib_request,
compat_str,
clean_html,
unescapeHTML,
)
+
class InfoExtractor(object):
"""Information Extractor class.
elif note is not False:
self.to_screen(u'%s: %s' % (video_id, note))
try:
- return compat_urllib_request.urlopen(url_or_request)
+ return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
""" Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
+ def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
+ """Return the xml as an xml.etree.ElementTree.Element"""
+ xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+ return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
self.to_screen(u'Logging in')
#Methods for following #608
- def url_result(self, url, ie=None):
+ def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
+ if video_id is not None:
+ video_info['id'] = video_id
return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""