2 # -*- coding: utf-8 -*-
4 from __future__ import unicode_literals
32 import xml.etree.ElementTree
42 compat_socket_create_connection,
46 compat_urllib_parse_urlparse,
47 compat_urllib_request,
53 # This is not clearly defined otherwise
54 compiled_regex_type = type(re.compile(''))
57 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
58 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
59 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
60 'Accept-Encoding': 'gzip, deflate',
61 'Accept-Language': 'en-us,en;q=0.5',
65 ENGLISH_MONTH_NAMES = [
66 'January', 'February', 'March', 'April', 'May', 'June',
67 'July', 'August', 'September', 'October', 'November', 'December']
70 def preferredencoding():
71 """Get preferred encoding.
73 Returns the best encoding scheme for the system, based on
74 locale.getpreferredencoding() and some further tweaks.
77 pref = locale.getpreferredencoding()
85 def write_json_file(obj, fn):
86 """ Encode obj as JSON and write it to fn, atomically if possible """
88 fn = encodeFilename(fn)
89 if sys.version_info < (3, 0) and sys.platform != 'win32':
90 encoding = get_filesystem_encoding()
91 # os.path.basename returns a bytes object, but NamedTemporaryFile
92 # will fail if the filename contains non ascii characters unless we
93 # use a unicode object
94 path_basename = lambda f: os.path.basename(fn).decode(encoding)
95 # the same for os.path.dirname
96 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
98 path_basename = os.path.basename
99 path_dirname = os.path.dirname
103 'prefix': path_basename(fn) + '.',
104 'dir': path_dirname(fn),
108 # In Python 2.x, json.dump expects a bytestream.
109 # In Python 3.x, it writes to a character stream
110 if sys.version_info < (3, 0):
118 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
123 if sys.platform == 'win32':
124 # Need to remove existing file on Windows, else os.rename raises
125 # WindowsError or FileExistsError.
130 os.rename(tf.name, fn)
139 if sys.version_info >= (2, 7):
140 def find_xpath_attr(node, xpath, key, val):
141 """ Find the xpath xpath[@key=val] """
142 assert re.match(r'^[a-zA-Z-]+$', key)
143 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
144 expr = xpath + "[@%s='%s']" % (key, val)
145 return node.find(expr)
147 def find_xpath_attr(node, xpath, key, val):
148 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
149 # .//node does not match if a node is a direct child of . !
150 if isinstance(xpath, compat_str):
151 xpath = xpath.encode('ascii')
153 for f in node.findall(xpath):
154 if f.attrib.get(key) == val:
158 # On python2.6 the xml.etree.ElementTree.Element methods don't support
159 # the namespace parameter
162 def xpath_with_ns(path, ns_map):
163 components = [c.split(':') for c in path.split('/')]
167 replaced.append(c[0])
170 replaced.append('{%s}%s' % (ns_map[ns], tag))
171 return '/'.join(replaced)
174 def xpath_text(node, xpath, name=None, fatal=False):
175 if sys.version_info < (2, 7): # Crazy 2.6
176 xpath = xpath.encode('ascii')
179 if n is None or n.text is None:
181 name = xpath if name is None else name
182 raise ExtractorError('Could not find XML element %s' % name)
188 def get_element_by_id(id, html):
189 """Return the content of the tag with the specified ID in the passed HTML document"""
190 return get_element_by_attribute("id", id, html)
193 def get_element_by_attribute(attribute, value, html):
194 """Return the content of the tag with the specified attribute in the passed HTML document"""
196 m = re.search(r'''(?xs)
198 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
200 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
204 ''' % (re.escape(attribute), re.escape(value)), html)
208 res = m.group('content')
210 if res.startswith('"') or res.startswith("'"):
213 return unescapeHTML(res)
216 def clean_html(html):
217 """Clean an HTML snippet into a readable string"""
219 if html is None: # Convenience for sanitizing descriptions etc.
223 html = html.replace('\n', ' ')
224 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
225 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
227 html = re.sub('<.*?>', '', html)
228 # Replace html entities
229 html = unescapeHTML(html)
233 def sanitize_open(filename, open_mode):
234 """Try to open the given filename, and slightly tweak it if this fails.
236 Attempts to open the given filename. If this fails, it tries to change
237 the filename slightly, step by step, until it's either able to open it
238 or it fails and raises a final exception, like the standard open()
241 It returns the tuple (stream, definitive_file_name).
245 if sys.platform == 'win32':
247 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
248 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
249 stream = open(encodeFilename(filename), open_mode)
250 return (stream, filename)
251 except (IOError, OSError) as err:
252 if err.errno in (errno.EACCES,):
255 # In case of error, try to remove win32 forbidden chars
256 alt_filename = sanitize_path(filename)
257 if alt_filename == filename:
260 # An exception here should be caught in the caller
261 stream = open(encodeFilename(alt_filename), open_mode)
262 return (stream, alt_filename)
265 def timeconvert(timestr):
266 """Convert RFC 2822 defined time string into system timestamp"""
268 timetuple = email.utils.parsedate_tz(timestr)
269 if timetuple is not None:
270 timestamp = email.utils.mktime_tz(timetuple)
274 def sanitize_filename(s, restricted=False, is_id=False):
275 """Sanitizes a string so it could be used as part of a filename.
276 If restricted is set, use a stricter subset of allowed characters.
277 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
279 def replace_insane(char):
280 if char == '?' or ord(char) < 32 or ord(char) == 127:
283 return '' if restricted else '\''
285 return '_-' if restricted else ' -'
286 elif char in '\\/|*<>':
288 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
290 if restricted and ord(char) > 127:
295 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
296 result = ''.join(map(replace_insane, s))
298 while '__' in result:
299 result = result.replace('__', '_')
300 result = result.strip('_')
301 # Common case of "Foreign band name - English song title"
302 if restricted and result.startswith('-_'):
304 if result.startswith('-'):
305 result = '_' + result[len('-'):]
306 result = result.lstrip('.')
312 def sanitize_path(s):
313 """Sanitizes and normalizes path on Windows"""
314 if sys.platform != 'win32':
316 drive_or_unc, _ = os.path.splitdrive(s)
317 if sys.version_info < (2, 7) and not drive_or_unc:
318 drive_or_unc, _ = os.path.splitunc(s)
319 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
323 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
324 for path_part in norm_path]
326 sanitized_path.insert(0, drive_or_unc + os.path.sep)
327 return os.path.join(*sanitized_path)
330 def sanitize_url_path_consecutive_slashes(url):
331 """Collapses consecutive slashes in URLs' path"""
332 parsed_url = list(compat_urlparse.urlparse(url))
333 parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
334 return compat_urlparse.urlunparse(parsed_url)
337 def orderedSet(iterable):
338 """ Remove all duplicates from the input iterable """
346 def _htmlentity_transform(entity):
347 """Transforms an HTML entity to a character."""
348 # Known non-numeric HTML entity
349 if entity in compat_html_entities.name2codepoint:
350 return compat_chr(compat_html_entities.name2codepoint[entity])
352 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
354 numstr = mobj.group(1)
355 if numstr.startswith('x'):
357 numstr = '0%s' % numstr
360 return compat_chr(int(numstr, base))
362 # Unknown entity in name, return its literal representation
363 return ('&%s;' % entity)
369 assert type(s) == compat_str
372 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
375 def get_subprocess_encoding():
376 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
377 # For subprocess calls, encode with locale encoding
378 # Refer to http://stackoverflow.com/a/9951851/35070
379 encoding = preferredencoding()
381 encoding = sys.getfilesystemencoding()
387 def encodeFilename(s, for_subprocess=False):
389 @param s The name of the file
392 assert type(s) == compat_str
394 # Python 3 has a Unicode API
395 if sys.version_info >= (3, 0):
398 # Pass '' directly to use Unicode APIs on Windows 2000 and up
399 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
400 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
401 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
404 return s.encode(get_subprocess_encoding(), 'ignore')
407 def decodeFilename(b, for_subprocess=False):
409 if sys.version_info >= (3, 0):
412 if not isinstance(b, bytes):
415 return b.decode(get_subprocess_encoding(), 'ignore')
418 def encodeArgument(s):
419 if not isinstance(s, compat_str):
420 # Legacy code that uses byte strings
421 # Uncomment the following line after fixing all post processors
422 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
423 s = s.decode('ascii')
424 return encodeFilename(s, True)
427 def decodeArgument(b):
428 return decodeFilename(b, True)
431 def decodeOption(optval):
434 if isinstance(optval, bytes):
435 optval = optval.decode(preferredencoding())
437 assert isinstance(optval, compat_str)
441 def formatSeconds(secs):
443 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
445 return '%d:%02d' % (secs // 60, secs % 60)
450 def make_HTTPS_handler(params, **kwargs):
451 opts_no_check_certificate = params.get('nocheckcertificate', False)
452 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
453 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
454 if opts_no_check_certificate:
455 context.check_hostname = False
456 context.verify_mode = ssl.CERT_NONE
458 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
461 # (create_default_context present but HTTPSHandler has no context=)
464 if sys.version_info < (3, 2):
465 return YoutubeDLHTTPSHandler(params, **kwargs)
467 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
468 context.verify_mode = (ssl.CERT_NONE
469 if opts_no_check_certificate
470 else ssl.CERT_REQUIRED)
471 context.set_default_verify_paths()
472 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
475 def bug_reports_message():
476 if ytdl_is_updateable():
477 update_cmd = 'type youtube-dl -U to update'
479 update_cmd = 'see https://yt-dl.org/update on how to update'
480 msg = '; please report this issue on https://yt-dl.org/bug .'
481 msg += ' Make sure you are using the latest version; %s.' % update_cmd
482 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
486 class ExtractorError(Exception):
487 """Error during info extraction."""
489 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
490 """ tb, if given, is the original traceback (so that it can be printed out).
491 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
494 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
496 if video_id is not None:
497 msg = video_id + ': ' + msg
499 msg += ' (caused by %r)' % cause
501 msg += bug_reports_message()
502 super(ExtractorError, self).__init__(msg)
505 self.exc_info = sys.exc_info() # preserve original exception
507 self.video_id = video_id
509 def format_traceback(self):
510 if self.traceback is None:
512 return ''.join(traceback.format_tb(self.traceback))
515 class UnsupportedError(ExtractorError):
516 def __init__(self, url):
517 super(UnsupportedError, self).__init__(
518 'Unsupported URL: %s' % url, expected=True)
522 class RegexNotFoundError(ExtractorError):
523 """Error when a regex didn't match"""
527 class DownloadError(Exception):
528 """Download Error exception.
530 This exception may be thrown by FileDownloader objects if they are not
531 configured to continue on errors. They will contain the appropriate
535 def __init__(self, msg, exc_info=None):
536 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
537 super(DownloadError, self).__init__(msg)
538 self.exc_info = exc_info
541 class SameFileError(Exception):
542 """Same File exception.
544 This exception will be thrown by FileDownloader objects if they detect
545 multiple files would have to be downloaded to the same file on disk.
550 class PostProcessingError(Exception):
551 """Post Processing exception.
553 This exception may be raised by PostProcessor's .run() method to
554 indicate an error in the postprocessing task.
557 def __init__(self, msg):
561 class MaxDownloadsReached(Exception):
562 """ --max-downloads limit has been reached. """
566 class UnavailableVideoError(Exception):
567 """Unavailable Format exception.
569 This exception will be thrown when a video is requested
570 in a format that is not available for that video.
575 class ContentTooShortError(Exception):
576 """Content Too Short exception.
578 This exception may be raised by FileDownloader objects when a file they
579 download is too small for what the server announced first, indicating
580 the connection was probably interrupted.
586 def __init__(self, downloaded, expected):
587 self.downloaded = downloaded
588 self.expected = expected
591 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
592 hc = http_class(*args, **kwargs)
593 source_address = ydl_handler._params.get('source_address')
594 if source_address is not None:
595 sa = (source_address, 0)
596 if hasattr(hc, 'source_address'): # Python 2.7+
597 hc.source_address = sa
599 def _hc_connect(self, *args, **kwargs):
600 sock = compat_socket_create_connection(
601 (self.host, self.port), self.timeout, sa)
603 self.sock = ssl.wrap_socket(
604 sock, self.key_file, self.cert_file,
605 ssl_version=ssl.PROTOCOL_TLSv1)
608 hc.connect = functools.partial(_hc_connect, hc)
613 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
614 """Handler for HTTP requests and responses.
616 This class, when installed with an OpenerDirector, automatically adds
617 the standard headers to every HTTP request and handles gzipped and
618 deflated responses from web servers. If compression is to be avoided in
619 a particular request, the original request in the program code only has
620 to include the HTTP header "Youtubedl-No-Compression", which will be
621 removed before making the real request.
623 Part of this code was copied from:
625 http://techknack.net/python-urllib2-handlers/
627 Andrew Rowls, the author of that code, agreed to release it to the
631 def __init__(self, params, *args, **kwargs):
632 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
633 self._params = params
635 def http_open(self, req):
636 return self.do_open(functools.partial(
637 _create_http_connection, self, compat_http_client.HTTPConnection, False),
643 return zlib.decompress(data, -zlib.MAX_WBITS)
645 return zlib.decompress(data)
648 def addinfourl_wrapper(stream, headers, url, code):
649 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
650 return compat_urllib_request.addinfourl(stream, headers, url, code)
651 ret = compat_urllib_request.addinfourl(stream, headers, url)
655 def http_request(self, req):
656 for h, v in std_headers.items():
657 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
658 # The dict keys are capitalized because of this bug by urllib
659 if h.capitalize() not in req.headers:
661 if 'Youtubedl-no-compression' in req.headers:
662 if 'Accept-encoding' in req.headers:
663 del req.headers['Accept-encoding']
664 del req.headers['Youtubedl-no-compression']
666 if sys.version_info < (2, 7) and '#' in req.get_full_url():
667 # Python 2.6 is brain-dead when it comes to fragments
668 req._Request__original = req._Request__original.partition('#')[0]
669 req._Request__r_type = req._Request__r_type.partition('#')[0]
673 def http_response(self, req, resp):
676 if resp.headers.get('Content-encoding', '') == 'gzip':
677 content = resp.read()
678 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
680 uncompressed = io.BytesIO(gz.read())
681 except IOError as original_ioerror:
682 # There may be junk add the end of the file
683 # See http://stackoverflow.com/q/4928560/35070 for details
684 for i in range(1, 1024):
686 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
687 uncompressed = io.BytesIO(gz.read())
692 raise original_ioerror
693 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
694 resp.msg = old_resp.msg
696 if resp.headers.get('Content-encoding', '') == 'deflate':
697 gz = io.BytesIO(self.deflate(resp.read()))
698 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
699 resp.msg = old_resp.msg
702 https_request = http_request
703 https_response = http_response
706 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
707 def __init__(self, params, https_conn_class=None, *args, **kwargs):
708 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
709 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
710 self._params = params
712 def https_open(self, req):
714 if hasattr(self, '_context'): # python > 2.6
715 kwargs['context'] = self._context
716 if hasattr(self, '_check_hostname'): # python 3.x
717 kwargs['check_hostname'] = self._check_hostname
718 return self.do_open(functools.partial(
719 _create_http_connection, self, self._https_conn_class, True),
723 def parse_iso8601(date_str, delimiter='T', timezone=None):
724 """ Return a UNIX timestamp from the given date """
731 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
734 timezone = datetime.timedelta()
736 date_str = date_str[:-len(m.group(0))]
737 if not m.group('sign'):
738 timezone = datetime.timedelta()
740 sign = 1 if m.group('sign') == '+' else -1
741 timezone = datetime.timedelta(
742 hours=sign * int(m.group('hours')),
743 minutes=sign * int(m.group('minutes')))
744 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
745 dt = datetime.datetime.strptime(date_str, date_format) - timezone
746 return calendar.timegm(dt.timetuple())
749 def unified_strdate(date_str, day_first=True):
750 """Return a string with the date in the format YYYYMMDD"""
756 date_str = date_str.replace(',', ' ')
757 # %z (UTC offset) is only supported in python>=3.2
758 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
759 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
760 # Remove AM/PM + timezone
761 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
763 format_expressions = [
768 '%b %dst %Y %I:%M%p',
769 '%b %dnd %Y %I:%M%p',
770 '%b %dth %Y %I:%M%p',
776 '%Y-%m-%d %H:%M:%S.%f',
779 '%Y-%m-%dT%H:%M:%SZ',
780 '%Y-%m-%dT%H:%M:%S.%fZ',
781 '%Y-%m-%dT%H:%M:%S.%f0Z',
783 '%Y-%m-%dT%H:%M:%S.%f',
787 format_expressions.extend([
795 format_expressions.extend([
802 for expression in format_expressions:
804 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
807 if upload_date is None:
808 timetuple = email.utils.parsedate_tz(date_str)
810 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
814 def determine_ext(url, default_ext='unknown_video'):
817 guess = url.partition('?')[0].rpartition('.')[2]
818 if re.match(r'^[A-Za-z0-9]+$', guess):
824 def subtitles_filename(filename, sub_lang, sub_format):
825 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
828 def date_from_str(date_str):
830 Return a datetime object from a string in the format YYYYMMDD or
831 (now|today)[+-][0-9](day|week|month|year)(s)?"""
832 today = datetime.date.today()
833 if date_str in ('now', 'today'):
835 if date_str == 'yesterday':
836 return today - datetime.timedelta(days=1)
837 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
838 if match is not None:
839 sign = match.group('sign')
840 time = int(match.group('time'))
843 unit = match.group('unit')
844 # A bad aproximation?
852 delta = datetime.timedelta(**{unit: time})
854 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
857 def hyphenate_date(date_str):
859 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
860 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
861 if match is not None:
862 return '-'.join(match.groups())
867 class DateRange(object):
868 """Represents a time interval between two dates"""
870 def __init__(self, start=None, end=None):
871 """start and end must be strings in the format accepted by date"""
872 if start is not None:
873 self.start = date_from_str(start)
875 self.start = datetime.datetime.min.date()
877 self.end = date_from_str(end)
879 self.end = datetime.datetime.max.date()
880 if self.start > self.end:
881 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
885 """Returns a range that only contains the given day"""
888 def __contains__(self, date):
889 """Check if the date is in the range"""
890 if not isinstance(date, datetime.date):
891 date = date_from_str(date)
892 return self.start <= date <= self.end
895 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
899 """ Returns the platform name as a compat_str """
900 res = platform.platform()
901 if isinstance(res, bytes):
902 res = res.decode(preferredencoding())
904 assert isinstance(res, compat_str)
908 def _windows_write_string(s, out):
909 """ Returns True if the string was written using special methods,
910 False if it has yet to be written out."""
911 # Adapted from http://stackoverflow.com/a/3259271/35070
914 import ctypes.wintypes
922 fileno = out.fileno()
923 except AttributeError:
924 # If the output stream doesn't have a fileno, it's virtual
926 except io.UnsupportedOperation:
927 # Some strange Windows pseudo files?
929 if fileno not in WIN_OUTPUT_IDS:
932 GetStdHandle = ctypes.WINFUNCTYPE(
933 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
934 (b"GetStdHandle", ctypes.windll.kernel32))
935 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
937 WriteConsoleW = ctypes.WINFUNCTYPE(
938 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
939 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
940 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
941 written = ctypes.wintypes.DWORD(0)
943 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
944 FILE_TYPE_CHAR = 0x0002
945 FILE_TYPE_REMOTE = 0x8000
946 GetConsoleMode = ctypes.WINFUNCTYPE(
947 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
948 ctypes.POINTER(ctypes.wintypes.DWORD))(
949 (b"GetConsoleMode", ctypes.windll.kernel32))
950 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
952 def not_a_console(handle):
953 if handle == INVALID_HANDLE_VALUE or handle is None:
955 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
956 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
961 def next_nonbmp_pos(s):
963 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
964 except StopIteration:
968 count = min(next_nonbmp_pos(s), 1024)
971 h, s, count if count else 2, ctypes.byref(written), None)
973 raise OSError('Failed to write string')
974 if not count: # We just wrote a non-BMP character
975 assert written.value == 2
978 assert written.value > 0
979 s = s[written.value:]
983 def write_string(s, out=None, encoding=None):
986 assert type(s) == compat_str
988 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
989 if _windows_write_string(s, out):
992 if ('b' in getattr(out, 'mode', '') or
993 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
994 byt = s.encode(encoding or preferredencoding(), 'ignore')
996 elif hasattr(out, 'buffer'):
997 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
998 byt = s.encode(enc, 'ignore')
999 out.buffer.write(byt)
1005 def bytes_to_intlist(bs):
1008 if isinstance(bs[0], int): # Python 3
1011 return [ord(c) for c in bs]
1014 def intlist_to_bytes(xs):
1017 return struct_pack('%dB' % len(xs), *xs)
1020 # Cross-platform file locking
1021 if sys.platform == 'win32':
1022 import ctypes.wintypes
1025 class OVERLAPPED(ctypes.Structure):
1027 ('Internal', ctypes.wintypes.LPVOID),
1028 ('InternalHigh', ctypes.wintypes.LPVOID),
1029 ('Offset', ctypes.wintypes.DWORD),
1030 ('OffsetHigh', ctypes.wintypes.DWORD),
1031 ('hEvent', ctypes.wintypes.HANDLE),
1034 kernel32 = ctypes.windll.kernel32
1035 LockFileEx = kernel32.LockFileEx
1036 LockFileEx.argtypes = [
1037 ctypes.wintypes.HANDLE, # hFile
1038 ctypes.wintypes.DWORD, # dwFlags
1039 ctypes.wintypes.DWORD, # dwReserved
1040 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1041 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1042 ctypes.POINTER(OVERLAPPED) # Overlapped
1044 LockFileEx.restype = ctypes.wintypes.BOOL
1045 UnlockFileEx = kernel32.UnlockFileEx
1046 UnlockFileEx.argtypes = [
1047 ctypes.wintypes.HANDLE, # hFile
1048 ctypes.wintypes.DWORD, # dwReserved
1049 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1050 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1051 ctypes.POINTER(OVERLAPPED) # Overlapped
1053 UnlockFileEx.restype = ctypes.wintypes.BOOL
1054 whole_low = 0xffffffff
1055 whole_high = 0x7fffffff
1057 def _lock_file(f, exclusive):
1058 overlapped = OVERLAPPED()
1059 overlapped.Offset = 0
1060 overlapped.OffsetHigh = 0
1061 overlapped.hEvent = 0
1062 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1063 handle = msvcrt.get_osfhandle(f.fileno())
1064 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1065 whole_low, whole_high, f._lock_file_overlapped_p):
1066 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1068 def _unlock_file(f):
1069 assert f._lock_file_overlapped_p
1070 handle = msvcrt.get_osfhandle(f.fileno())
1071 if not UnlockFileEx(handle, 0,
1072 whole_low, whole_high, f._lock_file_overlapped_p):
1073 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1078 def _lock_file(f, exclusive):
1079 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1081 def _unlock_file(f):
1082 fcntl.flock(f, fcntl.LOCK_UN)
1085 class locked_file(object):
1086 def __init__(self, filename, mode, encoding=None):
1087 assert mode in ['r', 'a', 'w']
1088 self.f = io.open(filename, mode, encoding=encoding)
1091 def __enter__(self):
1092 exclusive = self.mode != 'r'
1094 _lock_file(self.f, exclusive)
1100 def __exit__(self, etype, value, traceback):
1102 _unlock_file(self.f)
1109 def write(self, *args):
1110 return self.f.write(*args)
1112 def read(self, *args):
1113 return self.f.read(*args)
1116 def get_filesystem_encoding():
1117 encoding = sys.getfilesystemencoding()
1118 return encoding if encoding is not None else 'utf-8'
1121 def shell_quote(args):
1123 encoding = get_filesystem_encoding()
1125 if isinstance(a, bytes):
1126 # We may get a filename encoded with 'encodeFilename'
1127 a = a.decode(encoding)
1128 quoted_args.append(pipes.quote(a))
1129 return ' '.join(quoted_args)
1132 def smuggle_url(url, data):
1133 """ Pass additional data in a URL for internal use. """
1135 sdata = compat_urllib_parse.urlencode(
1136 {'__youtubedl_smuggle': json.dumps(data)})
1137 return url + '#' + sdata
1140 def unsmuggle_url(smug_url, default=None):
1141 if '#__youtubedl_smuggle' not in smug_url:
1142 return smug_url, default
1143 url, _, sdata = smug_url.rpartition('#')
1144 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1145 data = json.loads(jsond)
1149 def format_bytes(bytes):
1152 if type(bytes) is str:
1153 bytes = float(bytes)
1157 exponent = int(math.log(bytes, 1024.0))
1158 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1159 converted = float(bytes) / float(1024 ** exponent)
1160 return '%.2f%s' % (converted, suffix)
1163 def parse_filesize(s):
1167 # The lower-case forms are of course incorrect and inofficial,
1168 # but we support those too
1206 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
1208 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
1212 num_str = m.group('num').replace(',', '.')
1213 mult = _UNIT_TABLE[m.group('unit')]
1214 return int(float(num_str) * mult)
1217 def month_by_name(name):
1218 """ Return the number of a month by (locale-independently) English name """
1221 return ENGLISH_MONTH_NAMES.index(name) + 1
1226 def month_by_abbreviation(abbrev):
1227 """ Return the number of a month by (locale-independently) English
1231 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
1236 def fix_xml_ampersands(xml_str):
1237 """Replace all the '&' by '&' in XML"""
1239 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1244 def setproctitle(title):
1245 assert isinstance(title, compat_str)
1247 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1250 title_bytes = title.encode('utf-8')
1251 buf = ctypes.create_string_buffer(len(title_bytes))
1252 buf.value = title_bytes
1254 libc.prctl(15, buf, 0, 0, 0)
1255 except AttributeError:
1256 return # Strange libc, just skip this
1259 def remove_start(s, start):
1260 if s.startswith(start):
1261 return s[len(start):]
1265 def remove_end(s, end):
1267 return s[:-len(end)]
1271 def url_basename(url):
1272 path = compat_urlparse.urlparse(url).path
1273 return path.strip('/').split('/')[-1]
1276 class HEADRequest(compat_urllib_request.Request):
1277 def get_method(self):
1281 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1284 v = getattr(v, get_attr, None)
1287 return default if v is None else (int(v) * invscale // scale)
1290 def str_or_none(v, default=None):
1291 return default if v is None else compat_str(v)
1294 def str_to_int(int_str):
1295 """ A more relaxed version of int_or_none """
1298 int_str = re.sub(r'[,\.\+]', '', int_str)
1302 def float_or_none(v, scale=1, invscale=1, default=None):
1303 return default if v is None else (float(v) * invscale / scale)
1306 def parse_duration(s):
1307 if not isinstance(s, compat_basestring):
1315 (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
1316 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1318 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
1321 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1322 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1324 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1326 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1331 if m.group('only_mins'):
1332 return float_or_none(m.group('only_mins'), invscale=60)
1333 if m.group('only_hours'):
1334 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1336 res += int(m.group('secs'))
1337 if m.group('mins_reversed'):
1338 res += int(m.group('mins_reversed')) * 60
1340 res += int(m.group('mins')) * 60
1341 if m.group('hours'):
1342 res += int(m.group('hours')) * 60 * 60
1343 if m.group('hours_reversed'):
1344 res += int(m.group('hours_reversed')) * 60 * 60
1346 res += int(m.group('days')) * 24 * 60 * 60
1348 res += float(m.group('ms'))
1352 def prepend_extension(filename, ext, expected_real_ext=None):
1353 name, real_ext = os.path.splitext(filename)
1355 '{0}.{1}{2}'.format(name, ext, real_ext)
1356 if not expected_real_ext or real_ext[1:] == expected_real_ext
1357 else '{0}.{1}'.format(filename, ext))
1360 def replace_extension(filename, ext, expected_real_ext=None):
1361 name, real_ext = os.path.splitext(filename)
1362 return '{0}.{1}'.format(
1363 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1367 def check_executable(exe, args=[]):
1368 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1369 args can be a list of arguments for a short output (like -version) """
1371 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1377 def get_exe_version(exe, args=['--version'],
1378 version_re=None, unrecognized='present'):
1379 """ Returns the version of the specified executable,
1380 or False if the executable is not present """
1382 out, _ = subprocess.Popen(
1384 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1387 if isinstance(out, bytes): # Python 2.x
1388 out = out.decode('ascii', 'ignore')
1389 return detect_exe_version(out, version_re, unrecognized)
1392 def detect_exe_version(output, version_re=None, unrecognized='present'):
1393 assert isinstance(output, compat_str)
1394 if version_re is None:
1395 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1396 m = re.search(version_re, output)
1403 class PagedList(object):
1405 # This is only useful for tests
1406 return len(self.getslice())
1409 class OnDemandPagedList(PagedList):
1410 def __init__(self, pagefunc, pagesize):
1411 self._pagefunc = pagefunc
1412 self._pagesize = pagesize
1414 def getslice(self, start=0, end=None):
1416 for pagenum in itertools.count(start // self._pagesize):
1417 firstid = pagenum * self._pagesize
1418 nextfirstid = pagenum * self._pagesize + self._pagesize
1419 if start >= nextfirstid:
1422 page_results = list(self._pagefunc(pagenum))
1425 start % self._pagesize
1426 if firstid <= start < nextfirstid
1430 ((end - 1) % self._pagesize) + 1
1431 if (end is not None and firstid <= end <= nextfirstid)
1434 if startv != 0 or endv is not None:
1435 page_results = page_results[startv:endv]
1436 res.extend(page_results)
1438 # A little optimization - if current page is not "full", ie. does
1439 # not contain page_size videos then we can assume that this page
1440 # is the last one - there are no more ids on further pages -
1441 # i.e. no need to query again.
1442 if len(page_results) + startv < self._pagesize:
1445 # If we got the whole page, but the next page is not interesting,
1446 # break out early as well
1447 if end == nextfirstid:
1452 class InAdvancePagedList(PagedList):
1453 def __init__(self, pagefunc, pagecount, pagesize):
1454 self._pagefunc = pagefunc
1455 self._pagecount = pagecount
1456 self._pagesize = pagesize
1458 def getslice(self, start=0, end=None):
1460 start_page = start // self._pagesize
1462 self._pagecount if end is None else (end // self._pagesize + 1))
1463 skip_elems = start - start_page * self._pagesize
1464 only_more = None if end is None else end - start
1465 for pagenum in range(start_page, end_page):
1466 page = list(self._pagefunc(pagenum))
1468 page = page[skip_elems:]
1470 if only_more is not None:
1471 if len(page) < only_more:
1472 only_more -= len(page)
1474 page = page[:only_more]
1481 def uppercase_escape(s):
1482 unicode_escape = codecs.getdecoder('unicode_escape')
1484 r'\\U[0-9a-fA-F]{8}',
1485 lambda m: unicode_escape(m.group(0))[0],
1489 def lowercase_escape(s):
1490 unicode_escape = codecs.getdecoder('unicode_escape')
1492 r'\\u[0-9a-fA-F]{4}',
1493 lambda m: unicode_escape(m.group(0))[0],
1497 def escape_rfc3986(s):
1498 """Escape non-ASCII characters as suggested by RFC 3986"""
1499 if sys.version_info < (3, 0) and isinstance(s, compat_str):
1500 s = s.encode('utf-8')
1501 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1504 def escape_url(url):
1505 """Escape URL as suggested by RFC 3986"""
1506 url_parsed = compat_urllib_parse_urlparse(url)
1507 return url_parsed._replace(
1508 path=escape_rfc3986(url_parsed.path),
1509 params=escape_rfc3986(url_parsed.params),
1510 query=escape_rfc3986(url_parsed.query),
1511 fragment=escape_rfc3986(url_parsed.fragment)
1515 struct.pack('!I', 0)
1517 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1518 def struct_pack(spec, *args):
1519 if isinstance(spec, compat_str):
1520 spec = spec.encode('ascii')
1521 return struct.pack(spec, *args)
1523 def struct_unpack(spec, *args):
1524 if isinstance(spec, compat_str):
1525 spec = spec.encode('ascii')
1526 return struct.unpack(spec, *args)
1528 struct_pack = struct.pack
1529 struct_unpack = struct.unpack
1532 def read_batch_urls(batch_fd):
1534 if not isinstance(url, compat_str):
1535 url = url.decode('utf-8', 'replace')
1536 BOM_UTF8 = '\xef\xbb\xbf'
1537 if url.startswith(BOM_UTF8):
1538 url = url[len(BOM_UTF8):]
1540 if url.startswith(('#', ';', ']')):
1544 with contextlib.closing(batch_fd) as fd:
1545 return [url for url in map(fixup, fd) if url]
1548 def urlencode_postdata(*args, **kargs):
1549 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1553 etree_iter = xml.etree.ElementTree.Element.iter
1554 except AttributeError: # Python <=2.6
1555 etree_iter = lambda n: n.findall('.//*')
1559 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1560 def doctype(self, name, pubid, system):
1561 pass # Ignore doctypes
1563 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1564 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1565 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1566 # Fix up XML parser in Python 2.x
1567 if sys.version_info < (3, 0):
1568 for n in etree_iter(tree):
1569 if n.text is not None:
1570 if not isinstance(n.text, compat_str):
1571 n.text = n.text.decode('utf-8')
1584 def parse_age_limit(s):
1587 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1588 return int(m.group('age')) if m else US_RATINGS.get(s, None)
1591 def strip_jsonp(code):
1593 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1596 def js_to_json(code):
1599 if v in ('true', 'false', 'null'):
1601 if v.startswith('"'):
1603 if v.startswith("'"):
1605 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1612 res = re.sub(r'''(?x)
1613 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1614 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
1615 [a-zA-Z_][.a-zA-Z_0-9]*
1617 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
1621 def qualities(quality_ids):
1622 """ Get a numeric quality value out of a list of possible values """
1625 return quality_ids.index(qid)
1631 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1634 def limit_length(s, length):
1635 """ Add ellipses to overly long strings """
1640 return s[:length - len(ELLIPSES)] + ELLIPSES
1644 def version_tuple(v):
1645 return tuple(int(e) for e in re.split(r'[-.]', v))
1648 def is_outdated_version(version, limit, assume_new=True):
1650 return not assume_new
1652 return version_tuple(version) < version_tuple(limit)
1654 return not assume_new
1657 def ytdl_is_updateable():
1658 """ Returns if youtube-dl can be updated with -U """
1659 from zipimport import zipimporter
1661 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
1664 def args_to_str(args):
1665 # Get a short string representation for a subprocess command
1666 return ' '.join(shlex_quote(a) for a in args)
1669 def mimetype2ext(mt):
1670 _, _, res = mt.rpartition('/')
1674 'x-mp4-fragmented': 'mp4',
1678 def urlhandle_detect_ext(url_handle):
1681 getheader = lambda h: url_handle.headers[h]
1682 except AttributeError: # Python < 3
1683 getheader = url_handle.info().getheader
1685 cd = getheader('Content-Disposition')
1687 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1689 e = determine_ext(m.group('filename'), default_ext=None)
1693 return mimetype2ext(getheader('Content-Type'))
1696 def age_restricted(content_limit, age_limit):
1697 """ Returns True iff the content should be blocked """
1699 if age_limit is None: # No limit set
1701 if content_limit is None:
1702 return False # Content available for everyone
1703 return age_limit < content_limit
1706 def is_html(first_bytes):
1707 """ Detect whether a file contains HTML by examining its first bytes. """
1710 (b'\xef\xbb\xbf', 'utf-8'),
1711 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1712 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1713 (b'\xff\xfe', 'utf-16-le'),
1714 (b'\xfe\xff', 'utf-16-be'),
1716 for bom, enc in BOMS:
1717 if first_bytes.startswith(bom):
1718 s = first_bytes[len(bom):].decode(enc, 'replace')
1721 s = first_bytes.decode('utf-8', 'replace')
1723 return re.match(r'^\s*<', s)
1726 def determine_protocol(info_dict):
1727 protocol = info_dict.get('protocol')
1728 if protocol is not None:
1731 url = info_dict['url']
1732 if url.startswith('rtmp'):
1734 elif url.startswith('mms'):
1736 elif url.startswith('rtsp'):
1739 ext = determine_ext(url)
1745 return compat_urllib_parse_urlparse(url).scheme
1748 def render_table(header_row, data):
1749 """ Render a list of rows, each as a list of values """
1750 table = [header_row] + data
1751 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1752 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1753 return '\n'.join(format_str % tuple(row) for row in table)
1756 def _match_one(filter_part, dct):
1757 COMPARISON_OPERATORS = {
1765 operator_rex = re.compile(r'''(?x)\s*
1767 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1769 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1770 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1773 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1774 m = operator_rex.search(filter_part)
1776 op = COMPARISON_OPERATORS[m.group('op')]
1777 if m.group('strval') is not None:
1778 if m.group('op') not in ('=', '!='):
1780 'Operator %s does not support string values!' % m.group('op'))
1781 comparison_value = m.group('strval')
1784 comparison_value = int(m.group('intval'))
1786 comparison_value = parse_filesize(m.group('intval'))
1787 if comparison_value is None:
1788 comparison_value = parse_filesize(m.group('intval') + 'B')
1789 if comparison_value is None:
1791 'Invalid integer value %r in filter part %r' % (
1792 m.group('intval'), filter_part))
1793 actual_value = dct.get(m.group('key'))
1794 if actual_value is None:
1795 return m.group('none_inclusive')
1796 return op(actual_value, comparison_value)
1799 '': lambda v: v is not None,
1800 '!': lambda v: v is None,
1802 operator_rex = re.compile(r'''(?x)\s*
1803 (?P<op>%s)\s*(?P<key>[a-z_]+)
1805 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1806 m = operator_rex.search(filter_part)
1808 op = UNARY_OPERATORS[m.group('op')]
1809 actual_value = dct.get(m.group('key'))
1810 return op(actual_value)
1812 raise ValueError('Invalid filter part %r' % filter_part)
1815 def match_str(filter_str, dct):
1816 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1819 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1822 def match_filter_func(filter_str):
1823 def _match_func(info_dict):
1824 if match_str(filter_str, info_dict):
1827 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1828 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1832 def parse_dfxp_time_expr(time_expr):
1836 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
1838 return float(mobj.group('time_offset'))
1840 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
1842 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
1845 def format_srt_time(seconds):
1846 (mins, secs) = divmod(seconds, 60)
1847 (hours, mins) = divmod(mins, 60)
1848 millisecs = (secs - int(secs)) * 1000
1850 return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
1853 def dfxp2srt(dfxp_data):
1854 _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
1856 def parse_node(node):
1857 str_or_empty = functools.partial(str_or_none, default='')
1859 out = str_or_empty(node.text)
1862 if child.tag == _x('ttml:br'):
1863 out += '\n' + str_or_empty(child.tail)
1864 elif child.tag == _x('ttml:span'):
1865 out += str_or_empty(parse_node(child))
1867 out += str_or_empty(xml.etree.ElementTree.tostring(child))
1871 dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
1873 paras = dfxp.findall(_x('.//ttml:p'))
1875 for para, index in zip(paras, itertools.count(1)):
1876 out.append('%d\n%s --> %s\n%s\n\n' % (
1878 format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
1879 format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
1885 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
1886 def __init__(self, proxies=None):
1887 # Set default handlers
1888 for type in ('http', 'https'):
1889 setattr(self, '%s_open' % type,
1890 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
1891 meth(r, proxy, type))
1892 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
1894 def proxy_open(self, req, proxy, type):
1895 req_proxy = req.headers.get('Ytdl-request-proxy')
1896 if req_proxy is not None:
1898 del req.headers['Ytdl-request-proxy']
1900 if proxy == '__noproxy__':
1901 return None # No Proxy
1902 return compat_urllib_request.ProxyHandler.proxy_open(
1903 self, req, proxy, type)