2 # -*- coding: utf-8 -*-
4 from __future__ import unicode_literals
32 import xml.etree.ElementTree
41 compat_socket_create_connection,
45 compat_urllib_parse_urlparse,
46 compat_urllib_request,
52 # This is not clearly defined otherwise
53 compiled_regex_type = type(re.compile(''))
56 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
57 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
58 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
59 'Accept-Encoding': 'gzip, deflate',
60 'Accept-Language': 'en-us,en;q=0.5',
64 ENGLISH_MONTH_NAMES = [
65 'January', 'February', 'March', 'April', 'May', 'June',
66 'July', 'August', 'September', 'October', 'November', 'December']
69 def preferredencoding():
70 """Get preferred encoding.
72 Returns the best encoding scheme for the system, based on
73 locale.getpreferredencoding() and some further tweaks.
76 pref = locale.getpreferredencoding()
84 def write_json_file(obj, fn):
85 """ Encode obj as JSON and write it to fn, atomically if possible """
87 fn = encodeFilename(fn)
88 if sys.version_info < (3, 0) and sys.platform != 'win32':
89 encoding = get_filesystem_encoding()
90 # os.path.basename returns a bytes object, but NamedTemporaryFile
91 # will fail if the filename contains non ascii characters unless we
92 # use a unicode object
93 path_basename = lambda f: os.path.basename(fn).decode(encoding)
94 # the same for os.path.dirname
95 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
97 path_basename = os.path.basename
98 path_dirname = os.path.dirname
102 'prefix': path_basename(fn) + '.',
103 'dir': path_dirname(fn),
107 # In Python 2.x, json.dump expects a bytestream.
108 # In Python 3.x, it writes to a character stream
109 if sys.version_info < (3, 0):
117 tf = tempfile.NamedTemporaryFile(**args)
122 if sys.platform == 'win32':
123 # Need to remove existing file on Windows, else os.rename raises
124 # WindowsError or FileExistsError.
129 os.rename(tf.name, fn)
138 if sys.version_info >= (2, 7):
139 def find_xpath_attr(node, xpath, key, val):
140 """ Find the xpath xpath[@key=val] """
141 assert re.match(r'^[a-zA-Z-]+$', key)
142 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
143 expr = xpath + "[@%s='%s']" % (key, val)
144 return node.find(expr)
146 def find_xpath_attr(node, xpath, key, val):
147 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
148 # .//node does not match if a node is a direct child of . !
149 if isinstance(xpath, compat_str):
150 xpath = xpath.encode('ascii')
152 for f in node.findall(xpath):
153 if f.attrib.get(key) == val:
157 # On python2.6 the xml.etree.ElementTree.Element methods don't support
158 # the namespace parameter
161 def xpath_with_ns(path, ns_map):
162 components = [c.split(':') for c in path.split('/')]
166 replaced.append(c[0])
169 replaced.append('{%s}%s' % (ns_map[ns], tag))
170 return '/'.join(replaced)
173 def xpath_text(node, xpath, name=None, fatal=False):
174 if sys.version_info < (2, 7): # Crazy 2.6
175 xpath = xpath.encode('ascii')
178 if n is None or n.text is None:
180 name = xpath if name is None else name
181 raise ExtractorError('Could not find XML element %s' % name)
187 def get_element_by_id(id, html):
188 """Return the content of the tag with the specified ID in the passed HTML document"""
189 return get_element_by_attribute("id", id, html)
192 def get_element_by_attribute(attribute, value, html):
193 """Return the content of the tag with the specified attribute in the passed HTML document"""
195 m = re.search(r'''(?xs)
197 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
199 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
203 ''' % (re.escape(attribute), re.escape(value)), html)
207 res = m.group('content')
209 if res.startswith('"') or res.startswith("'"):
212 return unescapeHTML(res)
215 def clean_html(html):
216 """Clean an HTML snippet into a readable string"""
218 if html is None: # Convenience for sanitizing descriptions etc.
222 html = html.replace('\n', ' ')
223 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
224 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
226 html = re.sub('<.*?>', '', html)
227 # Replace html entities
228 html = unescapeHTML(html)
232 def sanitize_open(filename, open_mode):
233 """Try to open the given filename, and slightly tweak it if this fails.
235 Attempts to open the given filename. If this fails, it tries to change
236 the filename slightly, step by step, until it's either able to open it
237 or it fails and raises a final exception, like the standard open()
240 It returns the tuple (stream, definitive_file_name).
244 if sys.platform == 'win32':
246 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
247 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
248 stream = open(encodeFilename(filename), open_mode)
249 return (stream, filename)
250 except (IOError, OSError) as err:
251 if err.errno in (errno.EACCES,):
254 # In case of error, try to remove win32 forbidden chars
255 alt_filename = sanitize_path(filename)
256 if alt_filename == filename:
259 # An exception here should be caught in the caller
260 stream = open(encodeFilename(alt_filename), open_mode)
261 return (stream, alt_filename)
264 def timeconvert(timestr):
265 """Convert RFC 2822 defined time string into system timestamp"""
267 timetuple = email.utils.parsedate_tz(timestr)
268 if timetuple is not None:
269 timestamp = email.utils.mktime_tz(timetuple)
273 def sanitize_filename(s, restricted=False, is_id=False):
274 """Sanitizes a string so it could be used as part of a filename.
275 If restricted is set, use a stricter subset of allowed characters.
276 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
278 def replace_insane(char):
279 if char == '?' or ord(char) < 32 or ord(char) == 127:
282 return '' if restricted else '\''
284 return '_-' if restricted else ' -'
285 elif char in '\\/|*<>':
287 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
289 if restricted and ord(char) > 127:
294 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
295 result = ''.join(map(replace_insane, s))
297 while '__' in result:
298 result = result.replace('__', '_')
299 result = result.strip('_')
300 # Common case of "Foreign band name - English song title"
301 if restricted and result.startswith('-_'):
303 if result.startswith('-'):
304 result = '_' + result[len('-'):]
305 result = result.lstrip('.')
311 def sanitize_path(s):
312 """Sanitizes and normalizes path on Windows"""
313 if sys.platform != 'win32':
315 drive_or_unc, _ = os.path.splitdrive(s)
316 if sys.version_info < (2, 7) and not drive_or_unc:
317 drive_or_unc, _ = os.path.splitunc(s)
318 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
322 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
323 for path_part in norm_path]
325 sanitized_path.insert(0, drive_or_unc + os.path.sep)
326 return os.path.join(*sanitized_path)
329 def sanitize_url_path_consecutive_slashes(url):
330 """Collapses consecutive slashes in URLs' path"""
331 parsed_url = list(compat_urlparse.urlparse(url))
332 parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
333 return compat_urlparse.urlunparse(parsed_url)
336 def orderedSet(iterable):
337 """ Remove all duplicates from the input iterable """
345 def _htmlentity_transform(entity):
346 """Transforms an HTML entity to a character."""
347 # Known non-numeric HTML entity
348 if entity in compat_html_entities.name2codepoint:
349 return compat_chr(compat_html_entities.name2codepoint[entity])
351 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
353 numstr = mobj.group(1)
354 if numstr.startswith('x'):
356 numstr = '0%s' % numstr
359 return compat_chr(int(numstr, base))
361 # Unknown entity in name, return its literal representation
362 return ('&%s;' % entity)
368 assert type(s) == compat_str
371 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
374 def get_subprocess_encoding():
375 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
376 # For subprocess calls, encode with locale encoding
377 # Refer to http://stackoverflow.com/a/9951851/35070
378 encoding = preferredencoding()
380 encoding = sys.getfilesystemencoding()
386 def encodeFilename(s, for_subprocess=False):
388 @param s The name of the file
391 assert type(s) == compat_str
393 # Python 3 has a Unicode API
394 if sys.version_info >= (3, 0):
397 # Pass '' directly to use Unicode APIs on Windows 2000 and up
398 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
399 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
400 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
403 return s.encode(get_subprocess_encoding(), 'ignore')
406 def decodeFilename(b, for_subprocess=False):
408 if sys.version_info >= (3, 0):
411 if not isinstance(b, bytes):
414 return b.decode(get_subprocess_encoding(), 'ignore')
417 def encodeArgument(s):
418 if not isinstance(s, compat_str):
419 # Legacy code that uses byte strings
420 # Uncomment the following line after fixing all post processors
421 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
422 s = s.decode('ascii')
423 return encodeFilename(s, True)
426 def decodeArgument(b):
427 return decodeFilename(b, True)
430 def decodeOption(optval):
433 if isinstance(optval, bytes):
434 optval = optval.decode(preferredencoding())
436 assert isinstance(optval, compat_str)
440 def formatSeconds(secs):
442 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
444 return '%d:%02d' % (secs // 60, secs % 60)
449 def make_HTTPS_handler(params, **kwargs):
450 opts_no_check_certificate = params.get('nocheckcertificate', False)
451 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
452 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
453 if opts_no_check_certificate:
454 context.check_hostname = False
455 context.verify_mode = ssl.CERT_NONE
457 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
460 # (create_default_context present but HTTPSHandler has no context=)
463 if sys.version_info < (3, 2):
464 return YoutubeDLHTTPSHandler(params, **kwargs)
466 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
467 context.verify_mode = (ssl.CERT_NONE
468 if opts_no_check_certificate
469 else ssl.CERT_REQUIRED)
470 context.set_default_verify_paths()
471 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
474 def bug_reports_message():
475 if ytdl_is_updateable():
476 update_cmd = 'type youtube-dl -U to update'
478 update_cmd = 'see https://yt-dl.org/update on how to update'
479 msg = '; please report this issue on https://yt-dl.org/bug .'
480 msg += ' Make sure you are using the latest version; %s.' % update_cmd
481 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
485 class ExtractorError(Exception):
486 """Error during info extraction."""
488 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
489 """ tb, if given, is the original traceback (so that it can be printed out).
490 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
493 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
495 if video_id is not None:
496 msg = video_id + ': ' + msg
498 msg += ' (caused by %r)' % cause
500 msg += bug_reports_message()
501 super(ExtractorError, self).__init__(msg)
504 self.exc_info = sys.exc_info() # preserve original exception
506 self.video_id = video_id
508 def format_traceback(self):
509 if self.traceback is None:
511 return ''.join(traceback.format_tb(self.traceback))
514 class UnsupportedError(ExtractorError):
515 def __init__(self, url):
516 super(UnsupportedError, self).__init__(
517 'Unsupported URL: %s' % url, expected=True)
521 class RegexNotFoundError(ExtractorError):
522 """Error when a regex didn't match"""
526 class DownloadError(Exception):
527 """Download Error exception.
529 This exception may be thrown by FileDownloader objects if they are not
530 configured to continue on errors. They will contain the appropriate
534 def __init__(self, msg, exc_info=None):
535 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
536 super(DownloadError, self).__init__(msg)
537 self.exc_info = exc_info
540 class SameFileError(Exception):
541 """Same File exception.
543 This exception will be thrown by FileDownloader objects if they detect
544 multiple files would have to be downloaded to the same file on disk.
549 class PostProcessingError(Exception):
550 """Post Processing exception.
552 This exception may be raised by PostProcessor's .run() method to
553 indicate an error in the postprocessing task.
556 def __init__(self, msg):
560 class MaxDownloadsReached(Exception):
561 """ --max-downloads limit has been reached. """
565 class UnavailableVideoError(Exception):
566 """Unavailable Format exception.
568 This exception will be thrown when a video is requested
569 in a format that is not available for that video.
574 class ContentTooShortError(Exception):
575 """Content Too Short exception.
577 This exception may be raised by FileDownloader objects when a file they
578 download is too small for what the server announced first, indicating
579 the connection was probably interrupted.
585 def __init__(self, downloaded, expected):
586 self.downloaded = downloaded
587 self.expected = expected
590 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
591 hc = http_class(*args, **kwargs)
592 source_address = ydl_handler._params.get('source_address')
593 if source_address is not None:
594 sa = (source_address, 0)
595 if hasattr(hc, 'source_address'): # Python 2.7+
596 hc.source_address = sa
598 def _hc_connect(self, *args, **kwargs):
599 sock = compat_socket_create_connection(
600 (self.host, self.port), self.timeout, sa)
602 self.sock = ssl.wrap_socket(
603 sock, self.key_file, self.cert_file,
604 ssl_version=ssl.PROTOCOL_TLSv1)
607 hc.connect = functools.partial(_hc_connect, hc)
612 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
613 """Handler for HTTP requests and responses.
615 This class, when installed with an OpenerDirector, automatically adds
616 the standard headers to every HTTP request and handles gzipped and
617 deflated responses from web servers. If compression is to be avoided in
618 a particular request, the original request in the program code only has
619 to include the HTTP header "Youtubedl-No-Compression", which will be
620 removed before making the real request.
622 Part of this code was copied from:
624 http://techknack.net/python-urllib2-handlers/
626 Andrew Rowls, the author of that code, agreed to release it to the
630 def __init__(self, params, *args, **kwargs):
631 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
632 self._params = params
634 def http_open(self, req):
635 return self.do_open(functools.partial(
636 _create_http_connection, self, compat_http_client.HTTPConnection, False),
642 return zlib.decompress(data, -zlib.MAX_WBITS)
644 return zlib.decompress(data)
647 def addinfourl_wrapper(stream, headers, url, code):
648 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
649 return compat_urllib_request.addinfourl(stream, headers, url, code)
650 ret = compat_urllib_request.addinfourl(stream, headers, url)
654 def http_request(self, req):
655 for h, v in std_headers.items():
656 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
657 # The dict keys are capitalized because of this bug by urllib
658 if h.capitalize() not in req.headers:
660 if 'Youtubedl-no-compression' in req.headers:
661 if 'Accept-encoding' in req.headers:
662 del req.headers['Accept-encoding']
663 del req.headers['Youtubedl-no-compression']
665 if sys.version_info < (2, 7) and '#' in req.get_full_url():
666 # Python 2.6 is brain-dead when it comes to fragments
667 req._Request__original = req._Request__original.partition('#')[0]
668 req._Request__r_type = req._Request__r_type.partition('#')[0]
672 def http_response(self, req, resp):
675 if resp.headers.get('Content-encoding', '') == 'gzip':
676 content = resp.read()
677 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
679 uncompressed = io.BytesIO(gz.read())
680 except IOError as original_ioerror:
681 # There may be junk add the end of the file
682 # See http://stackoverflow.com/q/4928560/35070 for details
683 for i in range(1, 1024):
685 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
686 uncompressed = io.BytesIO(gz.read())
691 raise original_ioerror
692 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
693 resp.msg = old_resp.msg
695 if resp.headers.get('Content-encoding', '') == 'deflate':
696 gz = io.BytesIO(self.deflate(resp.read()))
697 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
698 resp.msg = old_resp.msg
701 https_request = http_request
702 https_response = http_response
705 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
706 def __init__(self, params, https_conn_class=None, *args, **kwargs):
707 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
708 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
709 self._params = params
711 def https_open(self, req):
713 if hasattr(self, '_context'): # python > 2.6
714 kwargs['context'] = self._context
715 if hasattr(self, '_check_hostname'): # python 3.x
716 kwargs['check_hostname'] = self._check_hostname
717 return self.do_open(functools.partial(
718 _create_http_connection, self, self._https_conn_class, True),
722 def parse_iso8601(date_str, delimiter='T', timezone=None):
723 """ Return a UNIX timestamp from the given date """
730 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
733 timezone = datetime.timedelta()
735 date_str = date_str[:-len(m.group(0))]
736 if not m.group('sign'):
737 timezone = datetime.timedelta()
739 sign = 1 if m.group('sign') == '+' else -1
740 timezone = datetime.timedelta(
741 hours=sign * int(m.group('hours')),
742 minutes=sign * int(m.group('minutes')))
743 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
744 dt = datetime.datetime.strptime(date_str, date_format) - timezone
745 return calendar.timegm(dt.timetuple())
748 def unified_strdate(date_str, day_first=True):
749 """Return a string with the date in the format YYYYMMDD"""
755 date_str = date_str.replace(',', ' ')
756 # %z (UTC offset) is only supported in python>=3.2
757 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
758 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
759 # Remove AM/PM + timezone
760 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
762 format_expressions = [
767 '%b %dst %Y %I:%M%p',
768 '%b %dnd %Y %I:%M%p',
769 '%b %dth %Y %I:%M%p',
775 '%Y-%m-%d %H:%M:%S.%f',
778 '%Y-%m-%dT%H:%M:%SZ',
779 '%Y-%m-%dT%H:%M:%S.%fZ',
780 '%Y-%m-%dT%H:%M:%S.%f0Z',
782 '%Y-%m-%dT%H:%M:%S.%f',
786 format_expressions.extend([
794 format_expressions.extend([
801 for expression in format_expressions:
803 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
806 if upload_date is None:
807 timetuple = email.utils.parsedate_tz(date_str)
809 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
813 def determine_ext(url, default_ext='unknown_video'):
816 guess = url.partition('?')[0].rpartition('.')[2]
817 if re.match(r'^[A-Za-z0-9]+$', guess):
823 def subtitles_filename(filename, sub_lang, sub_format):
824 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
827 def date_from_str(date_str):
829 Return a datetime object from a string in the format YYYYMMDD or
830 (now|today)[+-][0-9](day|week|month|year)(s)?"""
831 today = datetime.date.today()
832 if date_str in ('now', 'today'):
834 if date_str == 'yesterday':
835 return today - datetime.timedelta(days=1)
836 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
837 if match is not None:
838 sign = match.group('sign')
839 time = int(match.group('time'))
842 unit = match.group('unit')
843 # A bad aproximation?
851 delta = datetime.timedelta(**{unit: time})
853 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
856 def hyphenate_date(date_str):
858 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
859 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
860 if match is not None:
861 return '-'.join(match.groups())
866 class DateRange(object):
867 """Represents a time interval between two dates"""
869 def __init__(self, start=None, end=None):
870 """start and end must be strings in the format accepted by date"""
871 if start is not None:
872 self.start = date_from_str(start)
874 self.start = datetime.datetime.min.date()
876 self.end = date_from_str(end)
878 self.end = datetime.datetime.max.date()
879 if self.start > self.end:
880 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
884 """Returns a range that only contains the given day"""
887 def __contains__(self, date):
888 """Check if the date is in the range"""
889 if not isinstance(date, datetime.date):
890 date = date_from_str(date)
891 return self.start <= date <= self.end
894 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
898 """ Returns the platform name as a compat_str """
899 res = platform.platform()
900 if isinstance(res, bytes):
901 res = res.decode(preferredencoding())
903 assert isinstance(res, compat_str)
907 def _windows_write_string(s, out):
908 """ Returns True if the string was written using special methods,
909 False if it has yet to be written out."""
910 # Adapted from http://stackoverflow.com/a/3259271/35070
913 import ctypes.wintypes
921 fileno = out.fileno()
922 except AttributeError:
923 # If the output stream doesn't have a fileno, it's virtual
925 except io.UnsupportedOperation:
926 # Some strange Windows pseudo files?
928 if fileno not in WIN_OUTPUT_IDS:
931 GetStdHandle = ctypes.WINFUNCTYPE(
932 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
933 (b"GetStdHandle", ctypes.windll.kernel32))
934 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
936 WriteConsoleW = ctypes.WINFUNCTYPE(
937 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
938 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
939 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
940 written = ctypes.wintypes.DWORD(0)
942 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
943 FILE_TYPE_CHAR = 0x0002
944 FILE_TYPE_REMOTE = 0x8000
945 GetConsoleMode = ctypes.WINFUNCTYPE(
946 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
947 ctypes.POINTER(ctypes.wintypes.DWORD))(
948 (b"GetConsoleMode", ctypes.windll.kernel32))
949 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
951 def not_a_console(handle):
952 if handle == INVALID_HANDLE_VALUE or handle is None:
954 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
955 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
960 def next_nonbmp_pos(s):
962 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
963 except StopIteration:
967 count = min(next_nonbmp_pos(s), 1024)
970 h, s, count if count else 2, ctypes.byref(written), None)
972 raise OSError('Failed to write string')
973 if not count: # We just wrote a non-BMP character
974 assert written.value == 2
977 assert written.value > 0
978 s = s[written.value:]
982 def write_string(s, out=None, encoding=None):
985 assert type(s) == compat_str
987 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
988 if _windows_write_string(s, out):
991 if ('b' in getattr(out, 'mode', '') or
992 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
993 byt = s.encode(encoding or preferredencoding(), 'ignore')
995 elif hasattr(out, 'buffer'):
996 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
997 byt = s.encode(enc, 'ignore')
998 out.buffer.write(byt)
1004 def bytes_to_intlist(bs):
1007 if isinstance(bs[0], int): # Python 3
1010 return [ord(c) for c in bs]
1013 def intlist_to_bytes(xs):
1016 return struct_pack('%dB' % len(xs), *xs)
1019 # Cross-platform file locking
1020 if sys.platform == 'win32':
1021 import ctypes.wintypes
1024 class OVERLAPPED(ctypes.Structure):
1026 ('Internal', ctypes.wintypes.LPVOID),
1027 ('InternalHigh', ctypes.wintypes.LPVOID),
1028 ('Offset', ctypes.wintypes.DWORD),
1029 ('OffsetHigh', ctypes.wintypes.DWORD),
1030 ('hEvent', ctypes.wintypes.HANDLE),
1033 kernel32 = ctypes.windll.kernel32
1034 LockFileEx = kernel32.LockFileEx
1035 LockFileEx.argtypes = [
1036 ctypes.wintypes.HANDLE, # hFile
1037 ctypes.wintypes.DWORD, # dwFlags
1038 ctypes.wintypes.DWORD, # dwReserved
1039 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1040 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1041 ctypes.POINTER(OVERLAPPED) # Overlapped
1043 LockFileEx.restype = ctypes.wintypes.BOOL
1044 UnlockFileEx = kernel32.UnlockFileEx
1045 UnlockFileEx.argtypes = [
1046 ctypes.wintypes.HANDLE, # hFile
1047 ctypes.wintypes.DWORD, # dwReserved
1048 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1049 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1050 ctypes.POINTER(OVERLAPPED) # Overlapped
1052 UnlockFileEx.restype = ctypes.wintypes.BOOL
1053 whole_low = 0xffffffff
1054 whole_high = 0x7fffffff
1056 def _lock_file(f, exclusive):
1057 overlapped = OVERLAPPED()
1058 overlapped.Offset = 0
1059 overlapped.OffsetHigh = 0
1060 overlapped.hEvent = 0
1061 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1062 handle = msvcrt.get_osfhandle(f.fileno())
1063 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1064 whole_low, whole_high, f._lock_file_overlapped_p):
1065 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1067 def _unlock_file(f):
1068 assert f._lock_file_overlapped_p
1069 handle = msvcrt.get_osfhandle(f.fileno())
1070 if not UnlockFileEx(handle, 0,
1071 whole_low, whole_high, f._lock_file_overlapped_p):
1072 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1077 def _lock_file(f, exclusive):
1078 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1080 def _unlock_file(f):
1081 fcntl.flock(f, fcntl.LOCK_UN)
1084 class locked_file(object):
1085 def __init__(self, filename, mode, encoding=None):
1086 assert mode in ['r', 'a', 'w']
1087 self.f = io.open(filename, mode, encoding=encoding)
1090 def __enter__(self):
1091 exclusive = self.mode != 'r'
1093 _lock_file(self.f, exclusive)
1099 def __exit__(self, etype, value, traceback):
1101 _unlock_file(self.f)
1108 def write(self, *args):
1109 return self.f.write(*args)
1111 def read(self, *args):
1112 return self.f.read(*args)
1115 def get_filesystem_encoding():
1116 encoding = sys.getfilesystemencoding()
1117 return encoding if encoding is not None else 'utf-8'
1120 def shell_quote(args):
1122 encoding = get_filesystem_encoding()
1124 if isinstance(a, bytes):
1125 # We may get a filename encoded with 'encodeFilename'
1126 a = a.decode(encoding)
1127 quoted_args.append(pipes.quote(a))
1128 return ' '.join(quoted_args)
1131 def takewhile_inclusive(pred, seq):
1132 """ Like itertools.takewhile, but include the latest evaluated element
1133 (the first element so that Not pred(e)) """
1140 def smuggle_url(url, data):
1141 """ Pass additional data in a URL for internal use. """
1143 sdata = compat_urllib_parse.urlencode(
1144 {'__youtubedl_smuggle': json.dumps(data)})
1145 return url + '#' + sdata
1148 def unsmuggle_url(smug_url, default=None):
1149 if '#__youtubedl_smuggle' not in smug_url:
1150 return smug_url, default
1151 url, _, sdata = smug_url.rpartition('#')
1152 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1153 data = json.loads(jsond)
1157 def format_bytes(bytes):
1160 if type(bytes) is str:
1161 bytes = float(bytes)
1165 exponent = int(math.log(bytes, 1024.0))
1166 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1167 converted = float(bytes) / float(1024 ** exponent)
1168 return '%.2f%s' % (converted, suffix)
1171 def parse_filesize(s):
1175 # The lower-case forms are of course incorrect and inofficial,
1176 # but we support those too
1214 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
1216 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
1220 num_str = m.group('num').replace(',', '.')
1221 mult = _UNIT_TABLE[m.group('unit')]
1222 return int(float(num_str) * mult)
1225 def month_by_name(name):
1226 """ Return the number of a month by (locale-independently) English name """
1229 return ENGLISH_MONTH_NAMES.index(name) + 1
1234 def month_by_abbreviation(abbrev):
1235 """ Return the number of a month by (locale-independently) English
1239 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
1244 def fix_xml_ampersands(xml_str):
1245 """Replace all the '&' by '&' in XML"""
1247 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1252 def setproctitle(title):
1253 assert isinstance(title, compat_str)
1255 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1258 title_bytes = title.encode('utf-8')
1259 buf = ctypes.create_string_buffer(len(title_bytes))
1260 buf.value = title_bytes
1262 libc.prctl(15, buf, 0, 0, 0)
1263 except AttributeError:
1264 return # Strange libc, just skip this
1267 def remove_start(s, start):
1268 if s.startswith(start):
1269 return s[len(start):]
1273 def remove_end(s, end):
1275 return s[:-len(end)]
1279 def url_basename(url):
1280 path = compat_urlparse.urlparse(url).path
1281 return path.strip('/').split('/')[-1]
1284 class HEADRequest(compat_urllib_request.Request):
1285 def get_method(self):
1289 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1292 v = getattr(v, get_attr, None)
1295 return default if v is None else (int(v) * invscale // scale)
1298 def str_or_none(v, default=None):
1299 return default if v is None else compat_str(v)
1302 def str_to_int(int_str):
1303 """ A more relaxed version of int_or_none """
1306 int_str = re.sub(r'[,\.\+]', '', int_str)
1310 def float_or_none(v, scale=1, invscale=1, default=None):
1311 return default if v is None else (float(v) * invscale / scale)
1314 def parse_duration(s):
1315 if not isinstance(s, compat_basestring):
1323 (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
1324 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1326 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
1329 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1330 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1332 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1334 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1339 if m.group('only_mins'):
1340 return float_or_none(m.group('only_mins'), invscale=60)
1341 if m.group('only_hours'):
1342 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1344 res += int(m.group('secs'))
1345 if m.group('mins_reversed'):
1346 res += int(m.group('mins_reversed')) * 60
1348 res += int(m.group('mins')) * 60
1349 if m.group('hours'):
1350 res += int(m.group('hours')) * 60 * 60
1351 if m.group('hours_reversed'):
1352 res += int(m.group('hours_reversed')) * 60 * 60
1354 res += int(m.group('days')) * 24 * 60 * 60
1356 res += float(m.group('ms'))
1360 def prepend_extension(filename, ext):
1361 name, real_ext = os.path.splitext(filename)
1362 return '{0}.{1}{2}'.format(name, ext, real_ext)
1365 def check_executable(exe, args=[]):
1366 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1367 args can be a list of arguments for a short output (like -version) """
1369 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1375 def get_exe_version(exe, args=['--version'],
1376 version_re=None, unrecognized='present'):
1377 """ Returns the version of the specified executable,
1378 or False if the executable is not present """
1380 out, _ = subprocess.Popen(
1382 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1385 if isinstance(out, bytes): # Python 2.x
1386 out = out.decode('ascii', 'ignore')
1387 return detect_exe_version(out, version_re, unrecognized)
1390 def detect_exe_version(output, version_re=None, unrecognized='present'):
1391 assert isinstance(output, compat_str)
1392 if version_re is None:
1393 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1394 m = re.search(version_re, output)
1401 class PagedList(object):
1403 # This is only useful for tests
1404 return len(self.getslice())
1407 class OnDemandPagedList(PagedList):
1408 def __init__(self, pagefunc, pagesize):
1409 self._pagefunc = pagefunc
1410 self._pagesize = pagesize
1412 def getslice(self, start=0, end=None):
1414 for pagenum in itertools.count(start // self._pagesize):
1415 firstid = pagenum * self._pagesize
1416 nextfirstid = pagenum * self._pagesize + self._pagesize
1417 if start >= nextfirstid:
1420 page_results = list(self._pagefunc(pagenum))
1423 start % self._pagesize
1424 if firstid <= start < nextfirstid
1428 ((end - 1) % self._pagesize) + 1
1429 if (end is not None and firstid <= end <= nextfirstid)
1432 if startv != 0 or endv is not None:
1433 page_results = page_results[startv:endv]
1434 res.extend(page_results)
1436 # A little optimization - if current page is not "full", ie. does
1437 # not contain page_size videos then we can assume that this page
1438 # is the last one - there are no more ids on further pages -
1439 # i.e. no need to query again.
1440 if len(page_results) + startv < self._pagesize:
1443 # If we got the whole page, but the next page is not interesting,
1444 # break out early as well
1445 if end == nextfirstid:
1450 class InAdvancePagedList(PagedList):
1451 def __init__(self, pagefunc, pagecount, pagesize):
1452 self._pagefunc = pagefunc
1453 self._pagecount = pagecount
1454 self._pagesize = pagesize
1456 def getslice(self, start=0, end=None):
1458 start_page = start // self._pagesize
1460 self._pagecount if end is None else (end // self._pagesize + 1))
1461 skip_elems = start - start_page * self._pagesize
1462 only_more = None if end is None else end - start
1463 for pagenum in range(start_page, end_page):
1464 page = list(self._pagefunc(pagenum))
1466 page = page[skip_elems:]
1468 if only_more is not None:
1469 if len(page) < only_more:
1470 only_more -= len(page)
1472 page = page[:only_more]
1479 def uppercase_escape(s):
1480 unicode_escape = codecs.getdecoder('unicode_escape')
1482 r'\\U[0-9a-fA-F]{8}',
1483 lambda m: unicode_escape(m.group(0))[0],
1487 def escape_rfc3986(s):
1488 """Escape non-ASCII characters as suggested by RFC 3986"""
1489 if sys.version_info < (3, 0) and isinstance(s, compat_str):
1490 s = s.encode('utf-8')
1491 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1494 def escape_url(url):
1495 """Escape URL as suggested by RFC 3986"""
1496 url_parsed = compat_urllib_parse_urlparse(url)
1497 return url_parsed._replace(
1498 path=escape_rfc3986(url_parsed.path),
1499 params=escape_rfc3986(url_parsed.params),
1500 query=escape_rfc3986(url_parsed.query),
1501 fragment=escape_rfc3986(url_parsed.fragment)
1505 struct.pack('!I', 0)
1507 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1508 def struct_pack(spec, *args):
1509 if isinstance(spec, compat_str):
1510 spec = spec.encode('ascii')
1511 return struct.pack(spec, *args)
1513 def struct_unpack(spec, *args):
1514 if isinstance(spec, compat_str):
1515 spec = spec.encode('ascii')
1516 return struct.unpack(spec, *args)
1518 struct_pack = struct.pack
1519 struct_unpack = struct.unpack
1522 def read_batch_urls(batch_fd):
1524 if not isinstance(url, compat_str):
1525 url = url.decode('utf-8', 'replace')
1526 BOM_UTF8 = '\xef\xbb\xbf'
1527 if url.startswith(BOM_UTF8):
1528 url = url[len(BOM_UTF8):]
1530 if url.startswith(('#', ';', ']')):
1534 with contextlib.closing(batch_fd) as fd:
1535 return [url for url in map(fixup, fd) if url]
1538 def urlencode_postdata(*args, **kargs):
1539 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1543 etree_iter = xml.etree.ElementTree.Element.iter
1544 except AttributeError: # Python <=2.6
1545 etree_iter = lambda n: n.findall('.//*')
1549 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1550 def doctype(self, name, pubid, system):
1551 pass # Ignore doctypes
1553 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1554 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1555 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1556 # Fix up XML parser in Python 2.x
1557 if sys.version_info < (3, 0):
1558 for n in etree_iter(tree):
1559 if n.text is not None:
1560 if not isinstance(n.text, compat_str):
1561 n.text = n.text.decode('utf-8')
1574 def parse_age_limit(s):
1577 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1578 return int(m.group('age')) if m else US_RATINGS.get(s, None)
1581 def strip_jsonp(code):
1583 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1586 def js_to_json(code):
1589 if v in ('true', 'false', 'null'):
1591 if v.startswith('"'):
1593 if v.startswith("'"):
1595 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1602 res = re.sub(r'''(?x)
1603 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1604 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
1605 [a-zA-Z_][.a-zA-Z_0-9]*
1607 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
1611 def qualities(quality_ids):
1612 """ Get a numeric quality value out of a list of possible values """
1615 return quality_ids.index(qid)
1621 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1624 def limit_length(s, length):
1625 """ Add ellipses to overly long strings """
1630 return s[:length - len(ELLIPSES)] + ELLIPSES
1634 def version_tuple(v):
1635 return tuple(int(e) for e in re.split(r'[-.]', v))
1638 def is_outdated_version(version, limit, assume_new=True):
1640 return not assume_new
1642 return version_tuple(version) < version_tuple(limit)
1644 return not assume_new
1647 def ytdl_is_updateable():
1648 """ Returns if youtube-dl can be updated with -U """
1649 from zipimport import zipimporter
1651 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
1654 def args_to_str(args):
1655 # Get a short string representation for a subprocess command
1656 return ' '.join(shlex_quote(a) for a in args)
1659 def mimetype2ext(mt):
1660 _, _, res = mt.rpartition('/')
1664 'x-mp4-fragmented': 'mp4',
1668 def urlhandle_detect_ext(url_handle):
1671 getheader = lambda h: url_handle.headers[h]
1672 except AttributeError: # Python < 3
1673 getheader = url_handle.info().getheader
1675 cd = getheader('Content-Disposition')
1677 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1679 e = determine_ext(m.group('filename'), default_ext=None)
1683 return mimetype2ext(getheader('Content-Type'))
1686 def age_restricted(content_limit, age_limit):
1687 """ Returns True iff the content should be blocked """
1689 if age_limit is None: # No limit set
1691 if content_limit is None:
1692 return False # Content available for everyone
1693 return age_limit < content_limit
1696 def is_html(first_bytes):
1697 """ Detect whether a file contains HTML by examining its first bytes. """
1700 (b'\xef\xbb\xbf', 'utf-8'),
1701 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1702 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1703 (b'\xff\xfe', 'utf-16-le'),
1704 (b'\xfe\xff', 'utf-16-be'),
1706 for bom, enc in BOMS:
1707 if first_bytes.startswith(bom):
1708 s = first_bytes[len(bom):].decode(enc, 'replace')
1711 s = first_bytes.decode('utf-8', 'replace')
1713 return re.match(r'^\s*<', s)
1716 def determine_protocol(info_dict):
1717 protocol = info_dict.get('protocol')
1718 if protocol is not None:
1721 url = info_dict['url']
1722 if url.startswith('rtmp'):
1724 elif url.startswith('mms'):
1726 elif url.startswith('rtsp'):
1729 ext = determine_ext(url)
1735 return compat_urllib_parse_urlparse(url).scheme
1738 def render_table(header_row, data):
1739 """ Render a list of rows, each as a list of values """
1740 table = [header_row] + data
1741 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1742 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1743 return '\n'.join(format_str % tuple(row) for row in table)
1746 def _match_one(filter_part, dct):
1747 COMPARISON_OPERATORS = {
1755 operator_rex = re.compile(r'''(?x)\s*
1757 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1759 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1760 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1763 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1764 m = operator_rex.search(filter_part)
1766 op = COMPARISON_OPERATORS[m.group('op')]
1767 if m.group('strval') is not None:
1768 if m.group('op') not in ('=', '!='):
1770 'Operator %s does not support string values!' % m.group('op'))
1771 comparison_value = m.group('strval')
1774 comparison_value = int(m.group('intval'))
1776 comparison_value = parse_filesize(m.group('intval'))
1777 if comparison_value is None:
1778 comparison_value = parse_filesize(m.group('intval') + 'B')
1779 if comparison_value is None:
1781 'Invalid integer value %r in filter part %r' % (
1782 m.group('intval'), filter_part))
1783 actual_value = dct.get(m.group('key'))
1784 if actual_value is None:
1785 return m.group('none_inclusive')
1786 return op(actual_value, comparison_value)
1789 '': lambda v: v is not None,
1790 '!': lambda v: v is None,
1792 operator_rex = re.compile(r'''(?x)\s*
1793 (?P<op>%s)\s*(?P<key>[a-z_]+)
1795 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1796 m = operator_rex.search(filter_part)
1798 op = UNARY_OPERATORS[m.group('op')]
1799 actual_value = dct.get(m.group('key'))
1800 return op(actual_value)
1802 raise ValueError('Invalid filter part %r' % filter_part)
1805 def match_str(filter_str, dct):
1806 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1809 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1812 def match_filter_func(filter_str):
1813 def _match_func(info_dict):
1814 if match_str(filter_str, info_dict):
1817 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1818 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1822 def parse_dfxp_time_expr(time_expr):
1826 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
1828 return float(mobj.group('time_offset'))
1830 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
1832 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
1835 def format_srt_time(seconds):
1836 (mins, secs) = divmod(seconds, 60)
1837 (hours, mins) = divmod(mins, 60)
1838 millisecs = (secs - int(secs)) * 1000
1840 return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
1843 def dfxp2srt(dfxp_data):
1844 _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
1846 def parse_node(node):
1847 str_or_empty = functools.partial(str_or_none, default='')
1849 out = str_or_empty(node.text)
1852 if child.tag == _x('ttml:br'):
1853 out += '\n' + str_or_empty(child.tail)
1854 elif child.tag == _x('ttml:span'):
1855 out += str_or_empty(parse_node(child))
1857 out += str_or_empty(xml.etree.ElementTree.tostring(child))
1861 dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
1863 paras = dfxp.findall(_x('.//ttml:p'))
1865 for para, index in zip(paras, itertools.count(1)):
1866 out.append('%d\n%s --> %s\n%s\n\n' % (
1868 format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
1869 format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
1875 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
1876 def __init__(self, proxies=None):
1877 # Set default handlers
1878 for type in ('http', 'https'):
1879 setattr(self, '%s_open' % type,
1880 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
1881 meth(r, proxy, type))
1882 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
1884 def proxy_open(self, req, proxy, type):
1885 req_proxy = req.headers.get('Ytdl-request-proxy')
1886 if req_proxy is not None:
1888 del req.headers['Ytdl-request-proxy']
1890 if proxy == '__noproxy__':
1891 return None # No Proxy
1892 return compat_urllib_request.ProxyHandler.proxy_open(
1893 self, req, proxy, type)