2 # -*- coding: utf-8 -*-
4 from __future__ import unicode_literals
32 import xml.etree.ElementTree
42 compat_socket_create_connection,
46 compat_urllib_parse_urlparse,
47 compat_urllib_request,
53 # This is not clearly defined otherwise
54 compiled_regex_type = type(re.compile(''))
57 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
58 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
59 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
60 'Accept-Encoding': 'gzip, deflate',
61 'Accept-Language': 'en-us,en;q=0.5',
65 ENGLISH_MONTH_NAMES = [
66 'January', 'February', 'March', 'April', 'May', 'June',
67 'July', 'August', 'September', 'October', 'November', 'December']
70 def preferredencoding():
71 """Get preferred encoding.
73 Returns the best encoding scheme for the system, based on
74 locale.getpreferredencoding() and some further tweaks.
77 pref = locale.getpreferredencoding()
85 def write_json_file(obj, fn):
86 """ Encode obj as JSON and write it to fn, atomically if possible """
88 fn = encodeFilename(fn)
89 if sys.version_info < (3, 0) and sys.platform != 'win32':
90 encoding = get_filesystem_encoding()
91 # os.path.basename returns a bytes object, but NamedTemporaryFile
92 # will fail if the filename contains non ascii characters unless we
93 # use a unicode object
94 path_basename = lambda f: os.path.basename(fn).decode(encoding)
95 # the same for os.path.dirname
96 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
98 path_basename = os.path.basename
99 path_dirname = os.path.dirname
103 'prefix': path_basename(fn) + '.',
104 'dir': path_dirname(fn),
108 # In Python 2.x, json.dump expects a bytestream.
109 # In Python 3.x, it writes to a character stream
110 if sys.version_info < (3, 0):
118 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
123 if sys.platform == 'win32':
124 # Need to remove existing file on Windows, else os.rename raises
125 # WindowsError or FileExistsError.
130 os.rename(tf.name, fn)
139 if sys.version_info >= (2, 7):
140 def find_xpath_attr(node, xpath, key, val):
141 """ Find the xpath xpath[@key=val] """
142 assert re.match(r'^[a-zA-Z-]+$', key)
143 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
144 expr = xpath + "[@%s='%s']" % (key, val)
145 return node.find(expr)
147 def find_xpath_attr(node, xpath, key, val):
148 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
149 # .//node does not match if a node is a direct child of . !
150 if isinstance(xpath, compat_str):
151 xpath = xpath.encode('ascii')
153 for f in node.findall(xpath):
154 if f.attrib.get(key) == val:
158 # On python2.6 the xml.etree.ElementTree.Element methods don't support
159 # the namespace parameter
162 def xpath_with_ns(path, ns_map):
163 components = [c.split(':') for c in path.split('/')]
167 replaced.append(c[0])
170 replaced.append('{%s}%s' % (ns_map[ns], tag))
171 return '/'.join(replaced)
174 def xpath_text(node, xpath, name=None, fatal=False):
175 if sys.version_info < (2, 7): # Crazy 2.6
176 xpath = xpath.encode('ascii')
179 if n is None or n.text is None:
181 name = xpath if name is None else name
182 raise ExtractorError('Could not find XML element %s' % name)
188 def get_element_by_id(id, html):
189 """Return the content of the tag with the specified ID in the passed HTML document"""
190 return get_element_by_attribute("id", id, html)
193 def get_element_by_attribute(attribute, value, html):
194 """Return the content of the tag with the specified attribute in the passed HTML document"""
196 m = re.search(r'''(?xs)
198 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
200 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
204 ''' % (re.escape(attribute), re.escape(value)), html)
208 res = m.group('content')
210 if res.startswith('"') or res.startswith("'"):
213 return unescapeHTML(res)
216 def clean_html(html):
217 """Clean an HTML snippet into a readable string"""
219 if html is None: # Convenience for sanitizing descriptions etc.
223 html = html.replace('\n', ' ')
224 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
225 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
227 html = re.sub('<.*?>', '', html)
228 # Replace html entities
229 html = unescapeHTML(html)
233 def sanitize_open(filename, open_mode):
234 """Try to open the given filename, and slightly tweak it if this fails.
236 Attempts to open the given filename. If this fails, it tries to change
237 the filename slightly, step by step, until it's either able to open it
238 or it fails and raises a final exception, like the standard open()
241 It returns the tuple (stream, definitive_file_name).
245 if sys.platform == 'win32':
247 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
248 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
249 stream = open(encodeFilename(filename), open_mode)
250 return (stream, filename)
251 except (IOError, OSError) as err:
252 if err.errno in (errno.EACCES,):
255 # In case of error, try to remove win32 forbidden chars
256 alt_filename = sanitize_path(filename)
257 if alt_filename == filename:
260 # An exception here should be caught in the caller
261 stream = open(encodeFilename(alt_filename), open_mode)
262 return (stream, alt_filename)
265 def timeconvert(timestr):
266 """Convert RFC 2822 defined time string into system timestamp"""
268 timetuple = email.utils.parsedate_tz(timestr)
269 if timetuple is not None:
270 timestamp = email.utils.mktime_tz(timetuple)
274 def sanitize_filename(s, restricted=False, is_id=False):
275 """Sanitizes a string so it could be used as part of a filename.
276 If restricted is set, use a stricter subset of allowed characters.
277 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
279 def replace_insane(char):
280 if char == '?' or ord(char) < 32 or ord(char) == 127:
283 return '' if restricted else '\''
285 return '_-' if restricted else ' -'
286 elif char in '\\/|*<>':
288 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
290 if restricted and ord(char) > 127:
295 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
296 result = ''.join(map(replace_insane, s))
298 while '__' in result:
299 result = result.replace('__', '_')
300 result = result.strip('_')
301 # Common case of "Foreign band name - English song title"
302 if restricted and result.startswith('-_'):
304 if result.startswith('-'):
305 result = '_' + result[len('-'):]
306 result = result.lstrip('.')
312 def sanitize_path(s):
313 """Sanitizes and normalizes path on Windows"""
314 if sys.platform != 'win32':
316 drive_or_unc, _ = os.path.splitdrive(s)
317 if sys.version_info < (2, 7) and not drive_or_unc:
318 drive_or_unc, _ = os.path.splitunc(s)
319 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
323 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
324 for path_part in norm_path]
326 sanitized_path.insert(0, drive_or_unc + os.path.sep)
327 return os.path.join(*sanitized_path)
330 def sanitize_url_path_consecutive_slashes(url):
331 """Collapses consecutive slashes in URLs' path"""
332 parsed_url = list(compat_urlparse.urlparse(url))
333 parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
334 return compat_urlparse.urlunparse(parsed_url)
337 def orderedSet(iterable):
338 """ Remove all duplicates from the input iterable """
346 def _htmlentity_transform(entity):
347 """Transforms an HTML entity to a character."""
348 # Known non-numeric HTML entity
349 if entity in compat_html_entities.name2codepoint:
350 return compat_chr(compat_html_entities.name2codepoint[entity])
352 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
354 numstr = mobj.group(1)
355 if numstr.startswith('x'):
357 numstr = '0%s' % numstr
360 return compat_chr(int(numstr, base))
362 # Unknown entity in name, return its literal representation
363 return ('&%s;' % entity)
369 assert type(s) == compat_str
372 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
375 def get_subprocess_encoding():
376 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
377 # For subprocess calls, encode with locale encoding
378 # Refer to http://stackoverflow.com/a/9951851/35070
379 encoding = preferredencoding()
381 encoding = sys.getfilesystemencoding()
387 def encodeFilename(s, for_subprocess=False):
389 @param s The name of the file
392 assert type(s) == compat_str
394 # Python 3 has a Unicode API
395 if sys.version_info >= (3, 0):
398 # Pass '' directly to use Unicode APIs on Windows 2000 and up
399 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
400 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
401 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
404 return s.encode(get_subprocess_encoding(), 'ignore')
407 def decodeFilename(b, for_subprocess=False):
409 if sys.version_info >= (3, 0):
412 if not isinstance(b, bytes):
415 return b.decode(get_subprocess_encoding(), 'ignore')
418 def encodeArgument(s):
419 if not isinstance(s, compat_str):
420 # Legacy code that uses byte strings
421 # Uncomment the following line after fixing all post processors
422 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
423 s = s.decode('ascii')
424 return encodeFilename(s, True)
427 def decodeArgument(b):
428 return decodeFilename(b, True)
431 def decodeOption(optval):
434 if isinstance(optval, bytes):
435 optval = optval.decode(preferredencoding())
437 assert isinstance(optval, compat_str)
441 def formatSeconds(secs):
443 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
445 return '%d:%02d' % (secs // 60, secs % 60)
450 def make_HTTPS_handler(params, **kwargs):
451 opts_no_check_certificate = params.get('nocheckcertificate', False)
452 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
453 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
454 if opts_no_check_certificate:
455 context.check_hostname = False
456 context.verify_mode = ssl.CERT_NONE
458 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
461 # (create_default_context present but HTTPSHandler has no context=)
464 if sys.version_info < (3, 2):
465 return YoutubeDLHTTPSHandler(params, **kwargs)
467 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
468 context.verify_mode = (ssl.CERT_NONE
469 if opts_no_check_certificate
470 else ssl.CERT_REQUIRED)
471 context.set_default_verify_paths()
472 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
475 def bug_reports_message():
476 if ytdl_is_updateable():
477 update_cmd = 'type youtube-dl -U to update'
479 update_cmd = 'see https://yt-dl.org/update on how to update'
480 msg = '; please report this issue on https://yt-dl.org/bug .'
481 msg += ' Make sure you are using the latest version; %s.' % update_cmd
482 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
486 class ExtractorError(Exception):
487 """Error during info extraction."""
489 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
490 """ tb, if given, is the original traceback (so that it can be printed out).
491 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
494 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
496 if video_id is not None:
497 msg = video_id + ': ' + msg
499 msg += ' (caused by %r)' % cause
501 msg += bug_reports_message()
502 super(ExtractorError, self).__init__(msg)
505 self.exc_info = sys.exc_info() # preserve original exception
507 self.video_id = video_id
509 def format_traceback(self):
510 if self.traceback is None:
512 return ''.join(traceback.format_tb(self.traceback))
515 class UnsupportedError(ExtractorError):
516 def __init__(self, url):
517 super(UnsupportedError, self).__init__(
518 'Unsupported URL: %s' % url, expected=True)
522 class RegexNotFoundError(ExtractorError):
523 """Error when a regex didn't match"""
527 class DownloadError(Exception):
528 """Download Error exception.
530 This exception may be thrown by FileDownloader objects if they are not
531 configured to continue on errors. They will contain the appropriate
535 def __init__(self, msg, exc_info=None):
536 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
537 super(DownloadError, self).__init__(msg)
538 self.exc_info = exc_info
541 class SameFileError(Exception):
542 """Same File exception.
544 This exception will be thrown by FileDownloader objects if they detect
545 multiple files would have to be downloaded to the same file on disk.
550 class PostProcessingError(Exception):
551 """Post Processing exception.
553 This exception may be raised by PostProcessor's .run() method to
554 indicate an error in the postprocessing task.
557 def __init__(self, msg):
561 class MaxDownloadsReached(Exception):
562 """ --max-downloads limit has been reached. """
566 class UnavailableVideoError(Exception):
567 """Unavailable Format exception.
569 This exception will be thrown when a video is requested
570 in a format that is not available for that video.
575 class ContentTooShortError(Exception):
576 """Content Too Short exception.
578 This exception may be raised by FileDownloader objects when a file they
579 download is too small for what the server announced first, indicating
580 the connection was probably interrupted.
586 def __init__(self, downloaded, expected):
587 self.downloaded = downloaded
588 self.expected = expected
591 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
592 hc = http_class(*args, **kwargs)
593 source_address = ydl_handler._params.get('source_address')
594 if source_address is not None:
595 sa = (source_address, 0)
596 if hasattr(hc, 'source_address'): # Python 2.7+
597 hc.source_address = sa
599 def _hc_connect(self, *args, **kwargs):
600 sock = compat_socket_create_connection(
601 (self.host, self.port), self.timeout, sa)
603 self.sock = ssl.wrap_socket(
604 sock, self.key_file, self.cert_file,
605 ssl_version=ssl.PROTOCOL_TLSv1)
608 hc.connect = functools.partial(_hc_connect, hc)
613 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
614 """Handler for HTTP requests and responses.
616 This class, when installed with an OpenerDirector, automatically adds
617 the standard headers to every HTTP request and handles gzipped and
618 deflated responses from web servers. If compression is to be avoided in
619 a particular request, the original request in the program code only has
620 to include the HTTP header "Youtubedl-No-Compression", which will be
621 removed before making the real request.
623 Part of this code was copied from:
625 http://techknack.net/python-urllib2-handlers/
627 Andrew Rowls, the author of that code, agreed to release it to the
631 def __init__(self, params, *args, **kwargs):
632 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
633 self._params = params
635 def http_open(self, req):
636 return self.do_open(functools.partial(
637 _create_http_connection, self, compat_http_client.HTTPConnection, False),
643 return zlib.decompress(data, -zlib.MAX_WBITS)
645 return zlib.decompress(data)
648 def addinfourl_wrapper(stream, headers, url, code):
649 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
650 return compat_urllib_request.addinfourl(stream, headers, url, code)
651 ret = compat_urllib_request.addinfourl(stream, headers, url)
655 def http_request(self, req):
656 for h, v in std_headers.items():
657 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
658 # The dict keys are capitalized because of this bug by urllib
659 if h.capitalize() not in req.headers:
661 if 'Youtubedl-no-compression' in req.headers:
662 if 'Accept-encoding' in req.headers:
663 del req.headers['Accept-encoding']
664 del req.headers['Youtubedl-no-compression']
666 if sys.version_info < (2, 7) and '#' in req.get_full_url():
667 # Python 2.6 is brain-dead when it comes to fragments
668 req._Request__original = req._Request__original.partition('#')[0]
669 req._Request__r_type = req._Request__r_type.partition('#')[0]
673 def http_response(self, req, resp):
676 if resp.headers.get('Content-encoding', '') == 'gzip':
677 content = resp.read()
678 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
680 uncompressed = io.BytesIO(gz.read())
681 except IOError as original_ioerror:
682 # There may be junk add the end of the file
683 # See http://stackoverflow.com/q/4928560/35070 for details
684 for i in range(1, 1024):
686 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
687 uncompressed = io.BytesIO(gz.read())
692 raise original_ioerror
693 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
694 resp.msg = old_resp.msg
696 if resp.headers.get('Content-encoding', '') == 'deflate':
697 gz = io.BytesIO(self.deflate(resp.read()))
698 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
699 resp.msg = old_resp.msg
702 https_request = http_request
703 https_response = http_response
706 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
707 def __init__(self, params, https_conn_class=None, *args, **kwargs):
708 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
709 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
710 self._params = params
712 def https_open(self, req):
714 if hasattr(self, '_context'): # python > 2.6
715 kwargs['context'] = self._context
716 if hasattr(self, '_check_hostname'): # python 3.x
717 kwargs['check_hostname'] = self._check_hostname
718 return self.do_open(functools.partial(
719 _create_http_connection, self, self._https_conn_class, True),
723 def parse_iso8601(date_str, delimiter='T', timezone=None):
724 """ Return a UNIX timestamp from the given date """
731 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
734 timezone = datetime.timedelta()
736 date_str = date_str[:-len(m.group(0))]
737 if not m.group('sign'):
738 timezone = datetime.timedelta()
740 sign = 1 if m.group('sign') == '+' else -1
741 timezone = datetime.timedelta(
742 hours=sign * int(m.group('hours')),
743 minutes=sign * int(m.group('minutes')))
744 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
745 dt = datetime.datetime.strptime(date_str, date_format) - timezone
746 return calendar.timegm(dt.timetuple())
749 def unified_strdate(date_str, day_first=True):
750 """Return a string with the date in the format YYYYMMDD"""
756 date_str = date_str.replace(',', ' ')
757 # %z (UTC offset) is only supported in python>=3.2
758 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
759 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
760 # Remove AM/PM + timezone
761 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
763 format_expressions = [
768 '%b %dst %Y %I:%M%p',
769 '%b %dnd %Y %I:%M%p',
770 '%b %dth %Y %I:%M%p',
776 '%Y-%m-%d %H:%M:%S.%f',
779 '%Y-%m-%dT%H:%M:%SZ',
780 '%Y-%m-%dT%H:%M:%S.%fZ',
781 '%Y-%m-%dT%H:%M:%S.%f0Z',
783 '%Y-%m-%dT%H:%M:%S.%f',
787 format_expressions.extend([
795 format_expressions.extend([
802 for expression in format_expressions:
804 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
807 if upload_date is None:
808 timetuple = email.utils.parsedate_tz(date_str)
810 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
814 def determine_ext(url, default_ext='unknown_video'):
817 guess = url.partition('?')[0].rpartition('.')[2]
818 if re.match(r'^[A-Za-z0-9]+$', guess):
824 def subtitles_filename(filename, sub_lang, sub_format):
825 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
828 def date_from_str(date_str):
830 Return a datetime object from a string in the format YYYYMMDD or
831 (now|today)[+-][0-9](day|week|month|year)(s)?"""
832 today = datetime.date.today()
833 if date_str in ('now', 'today'):
835 if date_str == 'yesterday':
836 return today - datetime.timedelta(days=1)
837 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
838 if match is not None:
839 sign = match.group('sign')
840 time = int(match.group('time'))
843 unit = match.group('unit')
844 # A bad aproximation?
852 delta = datetime.timedelta(**{unit: time})
854 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
857 def hyphenate_date(date_str):
859 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
860 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
861 if match is not None:
862 return '-'.join(match.groups())
867 class DateRange(object):
868 """Represents a time interval between two dates"""
870 def __init__(self, start=None, end=None):
871 """start and end must be strings in the format accepted by date"""
872 if start is not None:
873 self.start = date_from_str(start)
875 self.start = datetime.datetime.min.date()
877 self.end = date_from_str(end)
879 self.end = datetime.datetime.max.date()
880 if self.start > self.end:
881 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
885 """Returns a range that only contains the given day"""
888 def __contains__(self, date):
889 """Check if the date is in the range"""
890 if not isinstance(date, datetime.date):
891 date = date_from_str(date)
892 return self.start <= date <= self.end
895 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
899 """ Returns the platform name as a compat_str """
900 res = platform.platform()
901 if isinstance(res, bytes):
902 res = res.decode(preferredencoding())
904 assert isinstance(res, compat_str)
908 def _windows_write_string(s, out):
909 """ Returns True if the string was written using special methods,
910 False if it has yet to be written out."""
911 # Adapted from http://stackoverflow.com/a/3259271/35070
914 import ctypes.wintypes
922 fileno = out.fileno()
923 except AttributeError:
924 # If the output stream doesn't have a fileno, it's virtual
926 except io.UnsupportedOperation:
927 # Some strange Windows pseudo files?
929 if fileno not in WIN_OUTPUT_IDS:
932 GetStdHandle = ctypes.WINFUNCTYPE(
933 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
934 (b"GetStdHandle", ctypes.windll.kernel32))
935 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
937 WriteConsoleW = ctypes.WINFUNCTYPE(
938 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
939 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
940 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
941 written = ctypes.wintypes.DWORD(0)
943 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
944 FILE_TYPE_CHAR = 0x0002
945 FILE_TYPE_REMOTE = 0x8000
946 GetConsoleMode = ctypes.WINFUNCTYPE(
947 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
948 ctypes.POINTER(ctypes.wintypes.DWORD))(
949 (b"GetConsoleMode", ctypes.windll.kernel32))
950 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
952 def not_a_console(handle):
953 if handle == INVALID_HANDLE_VALUE or handle is None:
955 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
956 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
961 def next_nonbmp_pos(s):
963 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
964 except StopIteration:
968 count = min(next_nonbmp_pos(s), 1024)
971 h, s, count if count else 2, ctypes.byref(written), None)
973 raise OSError('Failed to write string')
974 if not count: # We just wrote a non-BMP character
975 assert written.value == 2
978 assert written.value > 0
979 s = s[written.value:]
983 def write_string(s, out=None, encoding=None):
986 assert type(s) == compat_str
988 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
989 if _windows_write_string(s, out):
992 if ('b' in getattr(out, 'mode', '') or
993 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
994 byt = s.encode(encoding or preferredencoding(), 'ignore')
996 elif hasattr(out, 'buffer'):
997 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
998 byt = s.encode(enc, 'ignore')
999 out.buffer.write(byt)
1005 def bytes_to_intlist(bs):
1008 if isinstance(bs[0], int): # Python 3
1011 return [ord(c) for c in bs]
1014 def intlist_to_bytes(xs):
1017 return struct_pack('%dB' % len(xs), *xs)
1020 # Cross-platform file locking
1021 if sys.platform == 'win32':
1022 import ctypes.wintypes
1025 class OVERLAPPED(ctypes.Structure):
1027 ('Internal', ctypes.wintypes.LPVOID),
1028 ('InternalHigh', ctypes.wintypes.LPVOID),
1029 ('Offset', ctypes.wintypes.DWORD),
1030 ('OffsetHigh', ctypes.wintypes.DWORD),
1031 ('hEvent', ctypes.wintypes.HANDLE),
1034 kernel32 = ctypes.windll.kernel32
1035 LockFileEx = kernel32.LockFileEx
1036 LockFileEx.argtypes = [
1037 ctypes.wintypes.HANDLE, # hFile
1038 ctypes.wintypes.DWORD, # dwFlags
1039 ctypes.wintypes.DWORD, # dwReserved
1040 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1041 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1042 ctypes.POINTER(OVERLAPPED) # Overlapped
1044 LockFileEx.restype = ctypes.wintypes.BOOL
1045 UnlockFileEx = kernel32.UnlockFileEx
1046 UnlockFileEx.argtypes = [
1047 ctypes.wintypes.HANDLE, # hFile
1048 ctypes.wintypes.DWORD, # dwReserved
1049 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1050 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1051 ctypes.POINTER(OVERLAPPED) # Overlapped
1053 UnlockFileEx.restype = ctypes.wintypes.BOOL
1054 whole_low = 0xffffffff
1055 whole_high = 0x7fffffff
1057 def _lock_file(f, exclusive):
1058 overlapped = OVERLAPPED()
1059 overlapped.Offset = 0
1060 overlapped.OffsetHigh = 0
1061 overlapped.hEvent = 0
1062 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1063 handle = msvcrt.get_osfhandle(f.fileno())
1064 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1065 whole_low, whole_high, f._lock_file_overlapped_p):
1066 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1068 def _unlock_file(f):
1069 assert f._lock_file_overlapped_p
1070 handle = msvcrt.get_osfhandle(f.fileno())
1071 if not UnlockFileEx(handle, 0,
1072 whole_low, whole_high, f._lock_file_overlapped_p):
1073 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1078 def _lock_file(f, exclusive):
1079 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1081 def _unlock_file(f):
1082 fcntl.flock(f, fcntl.LOCK_UN)
1085 class locked_file(object):
1086 def __init__(self, filename, mode, encoding=None):
1087 assert mode in ['r', 'a', 'w']
1088 self.f = io.open(filename, mode, encoding=encoding)
1091 def __enter__(self):
1092 exclusive = self.mode != 'r'
1094 _lock_file(self.f, exclusive)
1100 def __exit__(self, etype, value, traceback):
1102 _unlock_file(self.f)
1109 def write(self, *args):
1110 return self.f.write(*args)
1112 def read(self, *args):
1113 return self.f.read(*args)
1116 def get_filesystem_encoding():
1117 encoding = sys.getfilesystemencoding()
1118 return encoding if encoding is not None else 'utf-8'
1121 def shell_quote(args):
1123 encoding = get_filesystem_encoding()
1125 if isinstance(a, bytes):
1126 # We may get a filename encoded with 'encodeFilename'
1127 a = a.decode(encoding)
1128 quoted_args.append(pipes.quote(a))
1129 return ' '.join(quoted_args)
1132 def smuggle_url(url, data):
1133 """ Pass additional data in a URL for internal use. """
1135 sdata = compat_urllib_parse.urlencode(
1136 {'__youtubedl_smuggle': json.dumps(data)})
1137 return url + '#' + sdata
1140 def unsmuggle_url(smug_url, default=None):
1141 if '#__youtubedl_smuggle' not in smug_url:
1142 return smug_url, default
1143 url, _, sdata = smug_url.rpartition('#')
1144 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1145 data = json.loads(jsond)
1149 def format_bytes(bytes):
1152 if type(bytes) is str:
1153 bytes = float(bytes)
1157 exponent = int(math.log(bytes, 1024.0))
1158 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1159 converted = float(bytes) / float(1024 ** exponent)
1160 return '%.2f%s' % (converted, suffix)
1163 def parse_filesize(s):
1167 # The lower-case forms are of course incorrect and inofficial,
1168 # but we support those too
1206 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
1208 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
1212 num_str = m.group('num').replace(',', '.')
1213 mult = _UNIT_TABLE[m.group('unit')]
1214 return int(float(num_str) * mult)
1217 def month_by_name(name):
1218 """ Return the number of a month by (locale-independently) English name """
1221 return ENGLISH_MONTH_NAMES.index(name) + 1
1226 def month_by_abbreviation(abbrev):
1227 """ Return the number of a month by (locale-independently) English
1231 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
1236 def fix_xml_ampersands(xml_str):
1237 """Replace all the '&' by '&' in XML"""
1239 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1244 def setproctitle(title):
1245 assert isinstance(title, compat_str)
1247 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1250 title_bytes = title.encode('utf-8')
1251 buf = ctypes.create_string_buffer(len(title_bytes))
1252 buf.value = title_bytes
1254 libc.prctl(15, buf, 0, 0, 0)
1255 except AttributeError:
1256 return # Strange libc, just skip this
1259 def remove_start(s, start):
1260 if s.startswith(start):
1261 return s[len(start):]
1265 def remove_end(s, end):
1267 return s[:-len(end)]
1271 def url_basename(url):
1272 path = compat_urlparse.urlparse(url).path
1273 return path.strip('/').split('/')[-1]
1276 class HEADRequest(compat_urllib_request.Request):
1277 def get_method(self):
1281 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1284 v = getattr(v, get_attr, None)
1287 return default if v is None else (int(v) * invscale // scale)
1290 def str_or_none(v, default=None):
1291 return default if v is None else compat_str(v)
1294 def str_to_int(int_str):
1295 """ A more relaxed version of int_or_none """
1298 int_str = re.sub(r'[,\.\+]', '', int_str)
1302 def float_or_none(v, scale=1, invscale=1, default=None):
1303 return default if v is None else (float(v) * invscale / scale)
1306 def parse_duration(s):
1307 if not isinstance(s, compat_basestring):
1315 (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
1316 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1318 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
1321 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1322 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1324 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1326 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1331 if m.group('only_mins'):
1332 return float_or_none(m.group('only_mins'), invscale=60)
1333 if m.group('only_hours'):
1334 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1336 res += int(m.group('secs'))
1337 if m.group('mins_reversed'):
1338 res += int(m.group('mins_reversed')) * 60
1340 res += int(m.group('mins')) * 60
1341 if m.group('hours'):
1342 res += int(m.group('hours')) * 60 * 60
1343 if m.group('hours_reversed'):
1344 res += int(m.group('hours_reversed')) * 60 * 60
1346 res += int(m.group('days')) * 24 * 60 * 60
1348 res += float(m.group('ms'))
1352 def prepend_extension(filename, ext, expected_real_ext=None):
1353 name, real_ext = os.path.splitext(filename)
1355 '{0}.{1}{2}'.format(name, ext, real_ext)
1356 if not expected_real_ext or real_ext[1:] == expected_real_ext
1357 else '{0}.{1}'.format(filename, ext))
1360 def check_executable(exe, args=[]):
1361 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1362 args can be a list of arguments for a short output (like -version) """
1364 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1370 def get_exe_version(exe, args=['--version'],
1371 version_re=None, unrecognized='present'):
1372 """ Returns the version of the specified executable,
1373 or False if the executable is not present """
1375 out, _ = subprocess.Popen(
1377 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1380 if isinstance(out, bytes): # Python 2.x
1381 out = out.decode('ascii', 'ignore')
1382 return detect_exe_version(out, version_re, unrecognized)
1385 def detect_exe_version(output, version_re=None, unrecognized='present'):
1386 assert isinstance(output, compat_str)
1387 if version_re is None:
1388 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1389 m = re.search(version_re, output)
1396 class PagedList(object):
1398 # This is only useful for tests
1399 return len(self.getslice())
1402 class OnDemandPagedList(PagedList):
1403 def __init__(self, pagefunc, pagesize):
1404 self._pagefunc = pagefunc
1405 self._pagesize = pagesize
1407 def getslice(self, start=0, end=None):
1409 for pagenum in itertools.count(start // self._pagesize):
1410 firstid = pagenum * self._pagesize
1411 nextfirstid = pagenum * self._pagesize + self._pagesize
1412 if start >= nextfirstid:
1415 page_results = list(self._pagefunc(pagenum))
1418 start % self._pagesize
1419 if firstid <= start < nextfirstid
1423 ((end - 1) % self._pagesize) + 1
1424 if (end is not None and firstid <= end <= nextfirstid)
1427 if startv != 0 or endv is not None:
1428 page_results = page_results[startv:endv]
1429 res.extend(page_results)
1431 # A little optimization - if current page is not "full", ie. does
1432 # not contain page_size videos then we can assume that this page
1433 # is the last one - there are no more ids on further pages -
1434 # i.e. no need to query again.
1435 if len(page_results) + startv < self._pagesize:
1438 # If we got the whole page, but the next page is not interesting,
1439 # break out early as well
1440 if end == nextfirstid:
1445 class InAdvancePagedList(PagedList):
1446 def __init__(self, pagefunc, pagecount, pagesize):
1447 self._pagefunc = pagefunc
1448 self._pagecount = pagecount
1449 self._pagesize = pagesize
1451 def getslice(self, start=0, end=None):
1453 start_page = start // self._pagesize
1455 self._pagecount if end is None else (end // self._pagesize + 1))
1456 skip_elems = start - start_page * self._pagesize
1457 only_more = None if end is None else end - start
1458 for pagenum in range(start_page, end_page):
1459 page = list(self._pagefunc(pagenum))
1461 page = page[skip_elems:]
1463 if only_more is not None:
1464 if len(page) < only_more:
1465 only_more -= len(page)
1467 page = page[:only_more]
1474 def uppercase_escape(s):
1475 unicode_escape = codecs.getdecoder('unicode_escape')
1477 r'\\U[0-9a-fA-F]{8}',
1478 lambda m: unicode_escape(m.group(0))[0],
1482 def escape_rfc3986(s):
1483 """Escape non-ASCII characters as suggested by RFC 3986"""
1484 if sys.version_info < (3, 0) and isinstance(s, compat_str):
1485 s = s.encode('utf-8')
1486 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1489 def escape_url(url):
1490 """Escape URL as suggested by RFC 3986"""
1491 url_parsed = compat_urllib_parse_urlparse(url)
1492 return url_parsed._replace(
1493 path=escape_rfc3986(url_parsed.path),
1494 params=escape_rfc3986(url_parsed.params),
1495 query=escape_rfc3986(url_parsed.query),
1496 fragment=escape_rfc3986(url_parsed.fragment)
1500 struct.pack('!I', 0)
1502 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1503 def struct_pack(spec, *args):
1504 if isinstance(spec, compat_str):
1505 spec = spec.encode('ascii')
1506 return struct.pack(spec, *args)
1508 def struct_unpack(spec, *args):
1509 if isinstance(spec, compat_str):
1510 spec = spec.encode('ascii')
1511 return struct.unpack(spec, *args)
1513 struct_pack = struct.pack
1514 struct_unpack = struct.unpack
1517 def read_batch_urls(batch_fd):
1519 if not isinstance(url, compat_str):
1520 url = url.decode('utf-8', 'replace')
1521 BOM_UTF8 = '\xef\xbb\xbf'
1522 if url.startswith(BOM_UTF8):
1523 url = url[len(BOM_UTF8):]
1525 if url.startswith(('#', ';', ']')):
1529 with contextlib.closing(batch_fd) as fd:
1530 return [url for url in map(fixup, fd) if url]
1533 def urlencode_postdata(*args, **kargs):
1534 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1538 etree_iter = xml.etree.ElementTree.Element.iter
1539 except AttributeError: # Python <=2.6
1540 etree_iter = lambda n: n.findall('.//*')
1544 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1545 def doctype(self, name, pubid, system):
1546 pass # Ignore doctypes
1548 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1549 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1550 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1551 # Fix up XML parser in Python 2.x
1552 if sys.version_info < (3, 0):
1553 for n in etree_iter(tree):
1554 if n.text is not None:
1555 if not isinstance(n.text, compat_str):
1556 n.text = n.text.decode('utf-8')
1569 def parse_age_limit(s):
1572 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1573 return int(m.group('age')) if m else US_RATINGS.get(s, None)
1576 def strip_jsonp(code):
1578 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1581 def js_to_json(code):
1584 if v in ('true', 'false', 'null'):
1586 if v.startswith('"'):
1588 if v.startswith("'"):
1590 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1597 res = re.sub(r'''(?x)
1598 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1599 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
1600 [a-zA-Z_][.a-zA-Z_0-9]*
1602 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
1606 def qualities(quality_ids):
1607 """ Get a numeric quality value out of a list of possible values """
1610 return quality_ids.index(qid)
1616 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1619 def limit_length(s, length):
1620 """ Add ellipses to overly long strings """
1625 return s[:length - len(ELLIPSES)] + ELLIPSES
1629 def version_tuple(v):
1630 return tuple(int(e) for e in re.split(r'[-.]', v))
1633 def is_outdated_version(version, limit, assume_new=True):
1635 return not assume_new
1637 return version_tuple(version) < version_tuple(limit)
1639 return not assume_new
1642 def ytdl_is_updateable():
1643 """ Returns if youtube-dl can be updated with -U """
1644 from zipimport import zipimporter
1646 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
1649 def args_to_str(args):
1650 # Get a short string representation for a subprocess command
1651 return ' '.join(shlex_quote(a) for a in args)
1654 def mimetype2ext(mt):
1655 _, _, res = mt.rpartition('/')
1659 'x-mp4-fragmented': 'mp4',
1663 def urlhandle_detect_ext(url_handle):
1666 getheader = lambda h: url_handle.headers[h]
1667 except AttributeError: # Python < 3
1668 getheader = url_handle.info().getheader
1670 cd = getheader('Content-Disposition')
1672 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1674 e = determine_ext(m.group('filename'), default_ext=None)
1678 return mimetype2ext(getheader('Content-Type'))
1681 def age_restricted(content_limit, age_limit):
1682 """ Returns True iff the content should be blocked """
1684 if age_limit is None: # No limit set
1686 if content_limit is None:
1687 return False # Content available for everyone
1688 return age_limit < content_limit
1691 def is_html(first_bytes):
1692 """ Detect whether a file contains HTML by examining its first bytes. """
1695 (b'\xef\xbb\xbf', 'utf-8'),
1696 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1697 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1698 (b'\xff\xfe', 'utf-16-le'),
1699 (b'\xfe\xff', 'utf-16-be'),
1701 for bom, enc in BOMS:
1702 if first_bytes.startswith(bom):
1703 s = first_bytes[len(bom):].decode(enc, 'replace')
1706 s = first_bytes.decode('utf-8', 'replace')
1708 return re.match(r'^\s*<', s)
1711 def determine_protocol(info_dict):
1712 protocol = info_dict.get('protocol')
1713 if protocol is not None:
1716 url = info_dict['url']
1717 if url.startswith('rtmp'):
1719 elif url.startswith('mms'):
1721 elif url.startswith('rtsp'):
1724 ext = determine_ext(url)
1730 return compat_urllib_parse_urlparse(url).scheme
1733 def render_table(header_row, data):
1734 """ Render a list of rows, each as a list of values """
1735 table = [header_row] + data
1736 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1737 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1738 return '\n'.join(format_str % tuple(row) for row in table)
1741 def _match_one(filter_part, dct):
1742 COMPARISON_OPERATORS = {
1750 operator_rex = re.compile(r'''(?x)\s*
1752 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1754 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1755 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1758 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1759 m = operator_rex.search(filter_part)
1761 op = COMPARISON_OPERATORS[m.group('op')]
1762 if m.group('strval') is not None:
1763 if m.group('op') not in ('=', '!='):
1765 'Operator %s does not support string values!' % m.group('op'))
1766 comparison_value = m.group('strval')
1769 comparison_value = int(m.group('intval'))
1771 comparison_value = parse_filesize(m.group('intval'))
1772 if comparison_value is None:
1773 comparison_value = parse_filesize(m.group('intval') + 'B')
1774 if comparison_value is None:
1776 'Invalid integer value %r in filter part %r' % (
1777 m.group('intval'), filter_part))
1778 actual_value = dct.get(m.group('key'))
1779 if actual_value is None:
1780 return m.group('none_inclusive')
1781 return op(actual_value, comparison_value)
1784 '': lambda v: v is not None,
1785 '!': lambda v: v is None,
1787 operator_rex = re.compile(r'''(?x)\s*
1788 (?P<op>%s)\s*(?P<key>[a-z_]+)
1790 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1791 m = operator_rex.search(filter_part)
1793 op = UNARY_OPERATORS[m.group('op')]
1794 actual_value = dct.get(m.group('key'))
1795 return op(actual_value)
1797 raise ValueError('Invalid filter part %r' % filter_part)
1800 def match_str(filter_str, dct):
1801 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1804 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1807 def match_filter_func(filter_str):
1808 def _match_func(info_dict):
1809 if match_str(filter_str, info_dict):
1812 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1813 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1817 def parse_dfxp_time_expr(time_expr):
1821 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
1823 return float(mobj.group('time_offset'))
1825 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
1827 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
1830 def format_srt_time(seconds):
1831 (mins, secs) = divmod(seconds, 60)
1832 (hours, mins) = divmod(mins, 60)
1833 millisecs = (secs - int(secs)) * 1000
1835 return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
1838 def dfxp2srt(dfxp_data):
1839 _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
1841 def parse_node(node):
1842 str_or_empty = functools.partial(str_or_none, default='')
1844 out = str_or_empty(node.text)
1847 if child.tag == _x('ttml:br'):
1848 out += '\n' + str_or_empty(child.tail)
1849 elif child.tag == _x('ttml:span'):
1850 out += str_or_empty(parse_node(child))
1852 out += str_or_empty(xml.etree.ElementTree.tostring(child))
1856 dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
1858 paras = dfxp.findall(_x('.//ttml:p'))
1860 for para, index in zip(paras, itertools.count(1)):
1861 out.append('%d\n%s --> %s\n%s\n\n' % (
1863 format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
1864 format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
1870 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
1871 def __init__(self, proxies=None):
1872 # Set default handlers
1873 for type in ('http', 'https'):
1874 setattr(self, '%s_open' % type,
1875 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
1876 meth(r, proxy, type))
1877 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
1879 def proxy_open(self, req, proxy, type):
1880 req_proxy = req.headers.get('Ytdl-request-proxy')
1881 if req_proxy is not None:
1883 del req.headers['Ytdl-request-proxy']
1885 if proxy == '__noproxy__':
1886 return None # No Proxy
1887 return compat_urllib_request.ProxyHandler.proxy_open(
1888 self, req, proxy, type)