2 # -*- coding: utf-8 -*-
4 from __future__ import unicode_literals
30 import xml.etree.ElementTree
41 compat_urllib_parse_urlparse,
42 compat_urllib_request,
48 # This is not clearly defined otherwise
49 compiled_regex_type = type(re.compile(''))
52 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
53 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
54 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
55 'Accept-Encoding': 'gzip, deflate',
56 'Accept-Language': 'en-us,en;q=0.5',
59 def preferredencoding():
60 """Get preferred encoding.
62 Returns the best encoding scheme for the system, based on
63 locale.getpreferredencoding() and some further tweaks.
66 pref = locale.getpreferredencoding()
74 def write_json_file(obj, fn):
75 """ Encode obj as JSON and write it to fn, atomically if possible """
77 fn = encodeFilename(fn)
78 if sys.version_info < (3, 0) and sys.platform != 'win32':
79 encoding = get_filesystem_encoding()
80 # os.path.basename returns a bytes object, but NamedTemporaryFile
81 # will fail if the filename contains non ascii characters unless we
82 # use a unicode object
83 path_basename = lambda f: os.path.basename(fn).decode(encoding)
84 # the same for os.path.dirname
85 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
87 path_basename = os.path.basename
88 path_dirname = os.path.dirname
92 'prefix': path_basename(fn) + '.',
93 'dir': path_dirname(fn),
97 # In Python 2.x, json.dump expects a bytestream.
98 # In Python 3.x, it writes to a character stream
99 if sys.version_info < (3, 0):
107 tf = tempfile.NamedTemporaryFile(**args)
112 if sys.platform == 'win32':
113 # Need to remove existing file on Windows, else os.rename raises
114 # WindowsError or FileExistsError.
119 os.rename(tf.name, fn)
128 if sys.version_info >= (2, 7):
129 def find_xpath_attr(node, xpath, key, val):
130 """ Find the xpath xpath[@key=val] """
131 assert re.match(r'^[a-zA-Z-]+$', key)
132 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
133 expr = xpath + u"[@%s='%s']" % (key, val)
134 return node.find(expr)
136 def find_xpath_attr(node, xpath, key, val):
137 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
138 # .//node does not match if a node is a direct child of . !
139 if isinstance(xpath, unicode):
140 xpath = xpath.encode('ascii')
142 for f in node.findall(xpath):
143 if f.attrib.get(key) == val:
147 # On python2.6 the xml.etree.ElementTree.Element methods don't support
148 # the namespace parameter
149 def xpath_with_ns(path, ns_map):
150 components = [c.split(':') for c in path.split('/')]
154 replaced.append(c[0])
157 replaced.append('{%s}%s' % (ns_map[ns], tag))
158 return '/'.join(replaced)
161 def xpath_text(node, xpath, name=None, fatal=False):
162 if sys.version_info < (2, 7): # Crazy 2.6
163 xpath = xpath.encode('ascii')
168 name = xpath if name is None else name
169 raise ExtractorError('Could not find XML element %s' % name)
175 def get_element_by_id(id, html):
176 """Return the content of the tag with the specified ID in the passed HTML document"""
177 return get_element_by_attribute("id", id, html)
180 def get_element_by_attribute(attribute, value, html):
181 """Return the content of the tag with the specified attribute in the passed HTML document"""
183 m = re.search(r'''(?xs)
185 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
187 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
191 ''' % (re.escape(attribute), re.escape(value)), html)
195 res = m.group('content')
197 if res.startswith('"') or res.startswith("'"):
200 return unescapeHTML(res)
203 def clean_html(html):
204 """Clean an HTML snippet into a readable string"""
206 html = html.replace('\n', ' ')
207 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
208 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
210 html = re.sub('<.*?>', '', html)
211 # Replace html entities
212 html = unescapeHTML(html)
216 def sanitize_open(filename, open_mode):
217 """Try to open the given filename, and slightly tweak it if this fails.
219 Attempts to open the given filename. If this fails, it tries to change
220 the filename slightly, step by step, until it's either able to open it
221 or it fails and raises a final exception, like the standard open()
224 It returns the tuple (stream, definitive_file_name).
228 if sys.platform == 'win32':
230 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
231 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
232 stream = open(encodeFilename(filename), open_mode)
233 return (stream, filename)
234 except (IOError, OSError) as err:
235 if err.errno in (errno.EACCES,):
238 # In case of error, try to remove win32 forbidden chars
239 alt_filename = os.path.join(
240 re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
241 for path_part in os.path.split(filename)
243 if alt_filename == filename:
246 # An exception here should be caught in the caller
247 stream = open(encodeFilename(filename), open_mode)
248 return (stream, alt_filename)
251 def timeconvert(timestr):
252 """Convert RFC 2822 defined time string into system timestamp"""
254 timetuple = email.utils.parsedate_tz(timestr)
255 if timetuple is not None:
256 timestamp = email.utils.mktime_tz(timetuple)
259 def sanitize_filename(s, restricted=False, is_id=False):
260 """Sanitizes a string so it could be used as part of a filename.
261 If restricted is set, use a stricter subset of allowed characters.
262 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
264 def replace_insane(char):
265 if char == '?' or ord(char) < 32 or ord(char) == 127:
268 return '' if restricted else '\''
270 return '_-' if restricted else ' -'
271 elif char in '\\/|*<>':
273 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
275 if restricted and ord(char) > 127:
279 result = ''.join(map(replace_insane, s))
281 while '__' in result:
282 result = result.replace('__', '_')
283 result = result.strip('_')
284 # Common case of "Foreign band name - English song title"
285 if restricted and result.startswith('-_'):
291 def orderedSet(iterable):
292 """ Remove all duplicates from the input iterable """
300 def _htmlentity_transform(entity):
301 """Transforms an HTML entity to a character."""
302 # Known non-numeric HTML entity
303 if entity in compat_html_entities.name2codepoint:
304 return compat_chr(compat_html_entities.name2codepoint[entity])
306 mobj = re.match(r'#(x?[0-9]+)', entity)
308 numstr = mobj.group(1)
309 if numstr.startswith('x'):
311 numstr = '0%s' % numstr
314 return compat_chr(int(numstr, base))
316 # Unknown entity in name, return its literal representation
317 return ('&%s;' % entity)
323 assert type(s) == compat_str
326 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
329 def encodeFilename(s, for_subprocess=False):
331 @param s The name of the file
334 assert type(s) == compat_str
336 # Python 3 has a Unicode API
337 if sys.version_info >= (3, 0):
340 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
341 # Pass '' directly to use Unicode APIs on Windows 2000 and up
342 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
343 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
344 if not for_subprocess:
347 # For subprocess calls, encode with locale encoding
348 # Refer to http://stackoverflow.com/a/9951851/35070
349 encoding = preferredencoding()
351 encoding = sys.getfilesystemencoding()
354 return s.encode(encoding, 'ignore')
357 def encodeArgument(s):
358 if not isinstance(s, compat_str):
359 # Legacy code that uses byte strings
360 # Uncomment the following line after fixing all post processors
361 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
362 s = s.decode('ascii')
363 return encodeFilename(s, True)
366 def decodeOption(optval):
369 if isinstance(optval, bytes):
370 optval = optval.decode(preferredencoding())
372 assert isinstance(optval, compat_str)
375 def formatSeconds(secs):
377 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
379 return '%d:%02d' % (secs // 60, secs % 60)
384 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
385 if sys.version_info < (3, 2):
388 class HTTPSConnectionV3(httplib.HTTPSConnection):
389 def __init__(self, *args, **kwargs):
390 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
393 sock = socket.create_connection((self.host, self.port), self.timeout)
394 if getattr(self, '_tunnel_host', False):
398 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
400 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
402 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
403 def https_open(self, req):
404 return self.do_open(HTTPSConnectionV3, req)
405 return HTTPSHandlerV3(**kwargs)
406 elif hasattr(ssl, 'create_default_context'): # Python >= 3.4
407 context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
408 context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3
409 if opts_no_check_certificate:
410 context.verify_mode = ssl.CERT_NONE
411 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
413 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
414 context.verify_mode = (ssl.CERT_NONE
415 if opts_no_check_certificate
416 else ssl.CERT_REQUIRED)
417 context.set_default_verify_paths()
419 context.load_default_certs()
420 except AttributeError:
422 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
425 class ExtractorError(Exception):
426 """Error during info extraction."""
427 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
428 """ tb, if given, is the original traceback (so that it can be printed out).
429 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
432 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
434 if video_id is not None:
435 msg = video_id + ': ' + msg
437 msg += ' (caused by %r)' % cause
439 if ytdl_is_updateable():
440 update_cmd = 'type youtube-dl -U to update'
442 update_cmd = 'see https://yt-dl.org/update on how to update'
443 msg += '; please report this issue on https://yt-dl.org/bug .'
444 msg += ' Make sure you are using the latest version; %s.' % update_cmd
445 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
446 super(ExtractorError, self).__init__(msg)
449 self.exc_info = sys.exc_info() # preserve original exception
451 self.video_id = video_id
453 def format_traceback(self):
454 if self.traceback is None:
456 return ''.join(traceback.format_tb(self.traceback))
459 class RegexNotFoundError(ExtractorError):
460 """Error when a regex didn't match"""
464 class DownloadError(Exception):
465 """Download Error exception.
467 This exception may be thrown by FileDownloader objects if they are not
468 configured to continue on errors. They will contain the appropriate
471 def __init__(self, msg, exc_info=None):
472 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
473 super(DownloadError, self).__init__(msg)
474 self.exc_info = exc_info
477 class SameFileError(Exception):
478 """Same File exception.
480 This exception will be thrown by FileDownloader objects if they detect
481 multiple files would have to be downloaded to the same file on disk.
486 class PostProcessingError(Exception):
487 """Post Processing exception.
489 This exception may be raised by PostProcessor's .run() method to
490 indicate an error in the postprocessing task.
492 def __init__(self, msg):
495 class MaxDownloadsReached(Exception):
496 """ --max-downloads limit has been reached. """
500 class UnavailableVideoError(Exception):
501 """Unavailable Format exception.
503 This exception will be thrown when a video is requested
504 in a format that is not available for that video.
509 class ContentTooShortError(Exception):
510 """Content Too Short exception.
512 This exception may be raised by FileDownloader objects when a file they
513 download is too small for what the server announced first, indicating
514 the connection was probably interrupted.
520 def __init__(self, downloaded, expected):
521 self.downloaded = downloaded
522 self.expected = expected
524 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
525 """Handler for HTTP requests and responses.
527 This class, when installed with an OpenerDirector, automatically adds
528 the standard headers to every HTTP request and handles gzipped and
529 deflated responses from web servers. If compression is to be avoided in
530 a particular request, the original request in the program code only has
531 to include the HTTP header "Youtubedl-No-Compression", which will be
532 removed before making the real request.
534 Part of this code was copied from:
536 http://techknack.net/python-urllib2-handlers/
538 Andrew Rowls, the author of that code, agreed to release it to the
545 return zlib.decompress(data, -zlib.MAX_WBITS)
547 return zlib.decompress(data)
550 def addinfourl_wrapper(stream, headers, url, code):
551 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
552 return compat_urllib_request.addinfourl(stream, headers, url, code)
553 ret = compat_urllib_request.addinfourl(stream, headers, url)
557 def http_request(self, req):
558 for h, v in std_headers.items():
559 if h not in req.headers:
561 if 'Youtubedl-no-compression' in req.headers:
562 if 'Accept-encoding' in req.headers:
563 del req.headers['Accept-encoding']
564 del req.headers['Youtubedl-no-compression']
565 if 'Youtubedl-user-agent' in req.headers:
566 if 'User-agent' in req.headers:
567 del req.headers['User-agent']
568 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
569 del req.headers['Youtubedl-user-agent']
571 if sys.version_info < (2, 7) and '#' in req.get_full_url():
572 # Python 2.6 is brain-dead when it comes to fragments
573 req._Request__original = req._Request__original.partition('#')[0]
574 req._Request__r_type = req._Request__r_type.partition('#')[0]
578 def http_response(self, req, resp):
581 if resp.headers.get('Content-encoding', '') == 'gzip':
582 content = resp.read()
583 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
585 uncompressed = io.BytesIO(gz.read())
586 except IOError as original_ioerror:
587 # There may be junk add the end of the file
588 # See http://stackoverflow.com/q/4928560/35070 for details
589 for i in range(1, 1024):
591 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
592 uncompressed = io.BytesIO(gz.read())
597 raise original_ioerror
598 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
599 resp.msg = old_resp.msg
601 if resp.headers.get('Content-encoding', '') == 'deflate':
602 gz = io.BytesIO(self.deflate(resp.read()))
603 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
604 resp.msg = old_resp.msg
607 https_request = http_request
608 https_response = http_response
611 def parse_iso8601(date_str, delimiter='T'):
612 """ Return a UNIX timestamp from the given date """
618 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
621 timezone = datetime.timedelta()
623 date_str = date_str[:-len(m.group(0))]
624 if not m.group('sign'):
625 timezone = datetime.timedelta()
627 sign = 1 if m.group('sign') == '+' else -1
628 timezone = datetime.timedelta(
629 hours=sign * int(m.group('hours')),
630 minutes=sign * int(m.group('minutes')))
631 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
632 dt = datetime.datetime.strptime(date_str, date_format) - timezone
633 return calendar.timegm(dt.timetuple())
636 def unified_strdate(date_str):
637 """Return a string with the date in the format YYYYMMDD"""
644 date_str = date_str.replace(',', ' ')
645 # %z (UTC offset) is only supported in python>=3.2
646 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
647 format_expressions = [
652 '%b %dst %Y %I:%M%p',
653 '%b %dnd %Y %I:%M%p',
654 '%b %dth %Y %I:%M%p',
663 '%Y-%m-%d %H:%M:%S.%f',
666 '%Y-%m-%dT%H:%M:%SZ',
667 '%Y-%m-%dT%H:%M:%S.%fZ',
668 '%Y-%m-%dT%H:%M:%S.%f0Z',
670 '%Y-%m-%dT%H:%M:%S.%f',
673 for expression in format_expressions:
675 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
678 if upload_date is None:
679 timetuple = email.utils.parsedate_tz(date_str)
681 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
684 def determine_ext(url, default_ext='unknown_video'):
687 guess = url.partition('?')[0].rpartition('.')[2]
688 if re.match(r'^[A-Za-z0-9]+$', guess):
693 def subtitles_filename(filename, sub_lang, sub_format):
694 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
696 def date_from_str(date_str):
698 Return a datetime object from a string in the format YYYYMMDD or
699 (now|today)[+-][0-9](day|week|month|year)(s)?"""
700 today = datetime.date.today()
701 if date_str == 'now'or date_str == 'today':
703 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
704 if match is not None:
705 sign = match.group('sign')
706 time = int(match.group('time'))
709 unit = match.group('unit')
718 delta = datetime.timedelta(**{unit: time})
720 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
722 def hyphenate_date(date_str):
724 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
725 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
726 if match is not None:
727 return '-'.join(match.groups())
731 class DateRange(object):
732 """Represents a time interval between two dates"""
733 def __init__(self, start=None, end=None):
734 """start and end must be strings in the format accepted by date"""
735 if start is not None:
736 self.start = date_from_str(start)
738 self.start = datetime.datetime.min.date()
740 self.end = date_from_str(end)
742 self.end = datetime.datetime.max.date()
743 if self.start > self.end:
744 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
747 """Returns a range that only contains the given day"""
749 def __contains__(self, date):
750 """Check if the date is in the range"""
751 if not isinstance(date, datetime.date):
752 date = date_from_str(date)
753 return self.start <= date <= self.end
755 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
759 """ Returns the platform name as a compat_str """
760 res = platform.platform()
761 if isinstance(res, bytes):
762 res = res.decode(preferredencoding())
764 assert isinstance(res, compat_str)
768 def _windows_write_string(s, out):
769 """ Returns True if the string was written using special methods,
770 False if it has yet to be written out."""
771 # Adapted from http://stackoverflow.com/a/3259271/35070
774 import ctypes.wintypes
782 fileno = out.fileno()
783 except AttributeError:
784 # If the output stream doesn't have a fileno, it's virtual
786 if fileno not in WIN_OUTPUT_IDS:
789 GetStdHandle = ctypes.WINFUNCTYPE(
790 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
791 ("GetStdHandle", ctypes.windll.kernel32))
792 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
794 WriteConsoleW = ctypes.WINFUNCTYPE(
795 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
796 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
797 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
798 written = ctypes.wintypes.DWORD(0)
800 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
801 FILE_TYPE_CHAR = 0x0002
802 FILE_TYPE_REMOTE = 0x8000
803 GetConsoleMode = ctypes.WINFUNCTYPE(
804 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
805 ctypes.POINTER(ctypes.wintypes.DWORD))(
806 ("GetConsoleMode", ctypes.windll.kernel32))
807 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
809 def not_a_console(handle):
810 if handle == INVALID_HANDLE_VALUE or handle is None:
812 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
813 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
818 def next_nonbmp_pos(s):
820 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
821 except StopIteration:
825 count = min(next_nonbmp_pos(s), 1024)
828 h, s, count if count else 2, ctypes.byref(written), None)
830 raise OSError('Failed to write string')
831 if not count: # We just wrote a non-BMP character
832 assert written.value == 2
835 assert written.value > 0
836 s = s[written.value:]
840 def write_string(s, out=None, encoding=None):
843 assert type(s) == compat_str
845 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
846 if _windows_write_string(s, out):
849 if ('b' in getattr(out, 'mode', '') or
850 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
851 byt = s.encode(encoding or preferredencoding(), 'ignore')
853 elif hasattr(out, 'buffer'):
854 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
855 byt = s.encode(enc, 'ignore')
856 out.buffer.write(byt)
862 def bytes_to_intlist(bs):
865 if isinstance(bs[0], int): # Python 3
868 return [ord(c) for c in bs]
871 def intlist_to_bytes(xs):
874 return struct_pack('%dB' % len(xs), *xs)
877 # Cross-platform file locking
878 if sys.platform == 'win32':
879 import ctypes.wintypes
882 class OVERLAPPED(ctypes.Structure):
884 ('Internal', ctypes.wintypes.LPVOID),
885 ('InternalHigh', ctypes.wintypes.LPVOID),
886 ('Offset', ctypes.wintypes.DWORD),
887 ('OffsetHigh', ctypes.wintypes.DWORD),
888 ('hEvent', ctypes.wintypes.HANDLE),
891 kernel32 = ctypes.windll.kernel32
892 LockFileEx = kernel32.LockFileEx
893 LockFileEx.argtypes = [
894 ctypes.wintypes.HANDLE, # hFile
895 ctypes.wintypes.DWORD, # dwFlags
896 ctypes.wintypes.DWORD, # dwReserved
897 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
898 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
899 ctypes.POINTER(OVERLAPPED) # Overlapped
901 LockFileEx.restype = ctypes.wintypes.BOOL
902 UnlockFileEx = kernel32.UnlockFileEx
903 UnlockFileEx.argtypes = [
904 ctypes.wintypes.HANDLE, # hFile
905 ctypes.wintypes.DWORD, # dwReserved
906 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
907 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
908 ctypes.POINTER(OVERLAPPED) # Overlapped
910 UnlockFileEx.restype = ctypes.wintypes.BOOL
911 whole_low = 0xffffffff
912 whole_high = 0x7fffffff
914 def _lock_file(f, exclusive):
915 overlapped = OVERLAPPED()
916 overlapped.Offset = 0
917 overlapped.OffsetHigh = 0
918 overlapped.hEvent = 0
919 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
920 handle = msvcrt.get_osfhandle(f.fileno())
921 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
922 whole_low, whole_high, f._lock_file_overlapped_p):
923 raise OSError('Locking file failed: %r' % ctypes.FormatError())
926 assert f._lock_file_overlapped_p
927 handle = msvcrt.get_osfhandle(f.fileno())
928 if not UnlockFileEx(handle, 0,
929 whole_low, whole_high, f._lock_file_overlapped_p):
930 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
935 def _lock_file(f, exclusive):
936 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
939 fcntl.flock(f, fcntl.LOCK_UN)
942 class locked_file(object):
943 def __init__(self, filename, mode, encoding=None):
944 assert mode in ['r', 'a', 'w']
945 self.f = io.open(filename, mode, encoding=encoding)
949 exclusive = self.mode != 'r'
951 _lock_file(self.f, exclusive)
957 def __exit__(self, etype, value, traceback):
966 def write(self, *args):
967 return self.f.write(*args)
969 def read(self, *args):
970 return self.f.read(*args)
973 def get_filesystem_encoding():
974 encoding = sys.getfilesystemencoding()
975 return encoding if encoding is not None else 'utf-8'
978 def shell_quote(args):
980 encoding = get_filesystem_encoding()
982 if isinstance(a, bytes):
983 # We may get a filename encoded with 'encodeFilename'
984 a = a.decode(encoding)
985 quoted_args.append(pipes.quote(a))
986 return ' '.join(quoted_args)
989 def takewhile_inclusive(pred, seq):
990 """ Like itertools.takewhile, but include the latest evaluated element
991 (the first element so that Not pred(e)) """
998 def smuggle_url(url, data):
999 """ Pass additional data in a URL for internal use. """
1001 sdata = compat_urllib_parse.urlencode(
1002 {'__youtubedl_smuggle': json.dumps(data)})
1003 return url + '#' + sdata
1006 def unsmuggle_url(smug_url, default=None):
1007 if not '#__youtubedl_smuggle' in smug_url:
1008 return smug_url, default
1009 url, _, sdata = smug_url.rpartition('#')
1010 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1011 data = json.loads(jsond)
1015 def format_bytes(bytes):
1018 if type(bytes) is str:
1019 bytes = float(bytes)
1023 exponent = int(math.log(bytes, 1024.0))
1024 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1025 converted = float(bytes) / float(1024 ** exponent)
1026 return '%.2f%s' % (converted, suffix)
1029 def get_term_width():
1030 columns = compat_getenv('COLUMNS', None)
1035 sp = subprocess.Popen(
1037 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1038 out, err = sp.communicate()
1039 return int(out.split()[1])
1045 def month_by_name(name):
1046 """ Return the number of a month by (locale-independently) English name """
1049 'January', 'February', 'March', 'April', 'May', 'June',
1050 'July', 'August', 'September', 'October', 'November', 'December']
1052 return ENGLISH_NAMES.index(name) + 1
1057 def fix_xml_ampersands(xml_str):
1058 """Replace all the '&' by '&' in XML"""
1060 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1065 def setproctitle(title):
1066 assert isinstance(title, compat_str)
1068 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1071 title_bytes = title.encode('utf-8')
1072 buf = ctypes.create_string_buffer(len(title_bytes))
1073 buf.value = title_bytes
1075 libc.prctl(15, buf, 0, 0, 0)
1076 except AttributeError:
1077 return # Strange libc, just skip this
1080 def remove_start(s, start):
1081 if s.startswith(start):
1082 return s[len(start):]
1086 def remove_end(s, end):
1088 return s[:-len(end)]
1092 def url_basename(url):
1093 path = compat_urlparse.urlparse(url).path
1094 return path.strip('/').split('/')[-1]
1097 class HEADRequest(compat_urllib_request.Request):
1098 def get_method(self):
1102 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1105 v = getattr(v, get_attr, None)
1108 return default if v is None else (int(v) * invscale // scale)
1111 def str_or_none(v, default=None):
1112 return default if v is None else compat_str(v)
1115 def str_to_int(int_str):
1116 """ A more relaxed version of int_or_none """
1119 int_str = re.sub(r'[,\.\+]', '', int_str)
1123 def float_or_none(v, scale=1, invscale=1, default=None):
1124 return default if v is None else (float(v) * invscale / scale)
1127 def parse_duration(s):
1136 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
1137 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1139 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
1142 res = int(m.group('secs'))
1144 res += int(m.group('mins')) * 60
1145 if m.group('hours'):
1146 res += int(m.group('hours')) * 60 * 60
1148 res += float(m.group('ms'))
1152 def prepend_extension(filename, ext):
1153 name, real_ext = os.path.splitext(filename)
1154 return '{0}.{1}{2}'.format(name, ext, real_ext)
1157 def check_executable(exe, args=[]):
1158 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1159 args can be a list of arguments for a short output (like -version) """
1161 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1167 def get_exe_version(exe, args=['--version'],
1168 version_re=r'version\s+([0-9._-a-zA-Z]+)',
1169 unrecognized='present'):
1170 """ Returns the version of the specified executable,
1171 or False if the executable is not present """
1173 out, err = subprocess.Popen(
1175 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1178 firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1179 m = re.search(version_re, firstline)
1186 class PagedList(object):
1188 # This is only useful for tests
1189 return len(self.getslice())
1192 class OnDemandPagedList(PagedList):
1193 def __init__(self, pagefunc, pagesize):
1194 self._pagefunc = pagefunc
1195 self._pagesize = pagesize
1197 def getslice(self, start=0, end=None):
1199 for pagenum in itertools.count(start // self._pagesize):
1200 firstid = pagenum * self._pagesize
1201 nextfirstid = pagenum * self._pagesize + self._pagesize
1202 if start >= nextfirstid:
1205 page_results = list(self._pagefunc(pagenum))
1208 start % self._pagesize
1209 if firstid <= start < nextfirstid
1213 ((end - 1) % self._pagesize) + 1
1214 if (end is not None and firstid <= end <= nextfirstid)
1217 if startv != 0 or endv is not None:
1218 page_results = page_results[startv:endv]
1219 res.extend(page_results)
1221 # A little optimization - if current page is not "full", ie. does
1222 # not contain page_size videos then we can assume that this page
1223 # is the last one - there are no more ids on further pages -
1224 # i.e. no need to query again.
1225 if len(page_results) + startv < self._pagesize:
1228 # If we got the whole page, but the next page is not interesting,
1229 # break out early as well
1230 if end == nextfirstid:
1235 class InAdvancePagedList(PagedList):
1236 def __init__(self, pagefunc, pagecount, pagesize):
1237 self._pagefunc = pagefunc
1238 self._pagecount = pagecount
1239 self._pagesize = pagesize
1241 def getslice(self, start=0, end=None):
1243 start_page = start // self._pagesize
1245 self._pagecount if end is None else (end // self._pagesize + 1))
1246 skip_elems = start - start_page * self._pagesize
1247 only_more = None if end is None else end - start
1248 for pagenum in range(start_page, end_page):
1249 page = list(self._pagefunc(pagenum))
1251 page = page[skip_elems:]
1253 if only_more is not None:
1254 if len(page) < only_more:
1255 only_more -= len(page)
1257 page = page[:only_more]
1264 def uppercase_escape(s):
1265 unicode_escape = codecs.getdecoder('unicode_escape')
1267 r'\\U[0-9a-fA-F]{8}',
1268 lambda m: unicode_escape(m.group(0))[0],
1272 def escape_rfc3986(s):
1273 """Escape non-ASCII characters as suggested by RFC 3986"""
1274 if sys.version_info < (3, 0) and isinstance(s, unicode):
1275 s = s.encode('utf-8')
1276 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1279 def escape_url(url):
1280 """Escape URL as suggested by RFC 3986"""
1281 url_parsed = compat_urllib_parse_urlparse(url)
1282 return url_parsed._replace(
1283 path=escape_rfc3986(url_parsed.path),
1284 params=escape_rfc3986(url_parsed.params),
1285 query=escape_rfc3986(url_parsed.query),
1286 fragment=escape_rfc3986(url_parsed.fragment)
1290 struct.pack('!I', 0)
1292 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1293 def struct_pack(spec, *args):
1294 if isinstance(spec, compat_str):
1295 spec = spec.encode('ascii')
1296 return struct.pack(spec, *args)
1298 def struct_unpack(spec, *args):
1299 if isinstance(spec, compat_str):
1300 spec = spec.encode('ascii')
1301 return struct.unpack(spec, *args)
1303 struct_pack = struct.pack
1304 struct_unpack = struct.unpack
1307 def read_batch_urls(batch_fd):
1309 if not isinstance(url, compat_str):
1310 url = url.decode('utf-8', 'replace')
1311 BOM_UTF8 = '\xef\xbb\xbf'
1312 if url.startswith(BOM_UTF8):
1313 url = url[len(BOM_UTF8):]
1315 if url.startswith(('#', ';', ']')):
1319 with contextlib.closing(batch_fd) as fd:
1320 return [url for url in map(fixup, fd) if url]
1323 def urlencode_postdata(*args, **kargs):
1324 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1328 etree_iter = xml.etree.ElementTree.Element.iter
1329 except AttributeError: # Python <=2.6
1330 etree_iter = lambda n: n.findall('.//*')
1334 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1335 def doctype(self, name, pubid, system):
1336 pass # Ignore doctypes
1338 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1339 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1340 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1341 # Fix up XML parser in Python 2.x
1342 if sys.version_info < (3, 0):
1343 for n in etree_iter(tree):
1344 if n.text is not None:
1345 if not isinstance(n.text, compat_str):
1346 n.text = n.text.decode('utf-8')
1359 def parse_age_limit(s):
1362 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1363 return int(m.group('age')) if m else US_RATINGS.get(s, None)
1366 def strip_jsonp(code):
1368 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1371 def js_to_json(code):
1374 if v in ('true', 'false', 'null'):
1376 if v.startswith('"'):
1378 if v.startswith("'"):
1380 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1387 res = re.sub(r'''(?x)
1388 "(?:[^"\\]*(?:\\\\|\\")?)*"|
1389 '(?:[^'\\]*(?:\\\\|\\')?)*'|
1390 [a-zA-Z_][a-zA-Z_0-9]*
1392 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1396 def qualities(quality_ids):
1397 """ Get a numeric quality value out of a list of possible values """
1400 return quality_ids.index(qid)
1406 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1409 def limit_length(s, length):
1410 """ Add ellipses to overly long strings """
1415 return s[:length - len(ELLIPSES)] + ELLIPSES
1419 def version_tuple(v):
1420 return [int(e) for e in v.split('.')]
1423 def is_outdated_version(version, limit, assume_new=True):
1425 return not assume_new
1427 return version_tuple(version) < version_tuple(limit)
1429 return not assume_new
1432 def ytdl_is_updateable():
1433 """ Returns if youtube-dl can be updated with -U """
1434 from zipimport import zipimporter
1436 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
1439 def args_to_str(args):
1440 # Get a short string representation for a subprocess command
1441 return ' '.join(shlex_quote(a) for a in args)