2 # -*- coding: utf-8 -*-
25 import urllib.request as compat_urllib_request
26 except ImportError: # Python 2
27 import urllib2 as compat_urllib_request
30 import urllib.error as compat_urllib_error
31 except ImportError: # Python 2
32 import urllib2 as compat_urllib_error
35 import urllib.parse as compat_urllib_parse
36 except ImportError: # Python 2
37 import urllib as compat_urllib_parse
40 from urllib.parse import urlparse as compat_urllib_parse_urlparse
41 except ImportError: # Python 2
42 from urlparse import urlparse as compat_urllib_parse_urlparse
45 import urllib.parse as compat_urlparse
46 except ImportError: # Python 2
47 import urlparse as compat_urlparse
50 import http.cookiejar as compat_cookiejar
51 except ImportError: # Python 2
52 import cookielib as compat_cookiejar
55 import html.entities as compat_html_entities
56 except ImportError: # Python 2
57 import htmlentitydefs as compat_html_entities
60 import html.parser as compat_html_parser
61 except ImportError: # Python 2
62 import HTMLParser as compat_html_parser
65 import http.client as compat_http_client
66 except ImportError: # Python 2
67 import httplib as compat_http_client
70 from urllib.error import HTTPError as compat_HTTPError
71 except ImportError: # Python 2
72 from urllib2 import HTTPError as compat_HTTPError
75 from urllib.request import urlretrieve as compat_urlretrieve
76 except ImportError: # Python 2
77 from urllib import urlretrieve as compat_urlretrieve
81 from subprocess import DEVNULL
82 compat_subprocess_get_DEVNULL = lambda: DEVNULL
84 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
87 from urllib.parse import parse_qs as compat_parse_qs
88 except ImportError: # Python 2
89 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
90 # Python 2's version is apparently totally broken
91 def _unquote(string, encoding='utf-8', errors='replace'):
94 res = string.split('%')
101 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
108 pct_sequence += item[:2].decode('hex')
111 # This segment was just a single percent-encoded character.
112 # May be part of a sequence of code units, so delay decoding.
113 # (Stored in pct_sequence).
117 # Encountered non-percent-encoded characters. Flush the current
119 string += pct_sequence.decode(encoding, errors) + rest
122 # Flush the final pct_sequence
123 string += pct_sequence.decode(encoding, errors)
126 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
127 encoding='utf-8', errors='replace'):
128 qs, _coerce_result = qs, unicode
129 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
131 for name_value in pairs:
132 if not name_value and not strict_parsing:
134 nv = name_value.split('=', 1)
137 raise ValueError("bad query field: %r" % (name_value,))
138 # Handle case of a control-name with no equal sign
139 if keep_blank_values:
143 if len(nv[1]) or keep_blank_values:
144 name = nv[0].replace('+', ' ')
145 name = _unquote(name, encoding=encoding, errors=errors)
146 name = _coerce_result(name)
147 value = nv[1].replace('+', ' ')
148 value = _unquote(value, encoding=encoding, errors=errors)
149 value = _coerce_result(value)
150 r.append((name, value))
153 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
154 encoding='utf-8', errors='replace'):
156 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
157 encoding=encoding, errors=errors)
158 for name, value in pairs:
159 if name in parsed_result:
160 parsed_result[name].append(value)
162 parsed_result[name] = [value]
166 compat_str = unicode # Python 2
171 compat_chr = unichr # Python 2
176 if type(c) is int: return c
179 # This is not clearly defined otherwise
180 compiled_regex_type = type(re.compile(''))
183 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
184 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
185 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
186 'Accept-Encoding': 'gzip, deflate',
187 'Accept-Language': 'en-us,en;q=0.5',
190 def preferredencoding():
191 """Get preferred encoding.
193 Returns the best encoding scheme for the system, based on
194 locale.getpreferredencoding() and some further tweaks.
197 pref = locale.getpreferredencoding()
204 if sys.version_info < (3,0):
206 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
209 assert type(s) == type(u'')
212 # In Python 2.x, json.dump expects a bytestream.
213 # In Python 3.x, it writes to a character stream
214 if sys.version_info < (3,0):
215 def write_json_file(obj, fn):
216 with open(fn, 'wb') as f:
219 def write_json_file(obj, fn):
220 with open(fn, 'w', encoding='utf-8') as f:
223 if sys.version_info >= (2,7):
224 def find_xpath_attr(node, xpath, key, val):
225 """ Find the xpath xpath[@key=val] """
226 assert re.match(r'^[a-zA-Z]+$', key)
227 assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
228 expr = xpath + u"[@%s='%s']" % (key, val)
229 return node.find(expr)
231 def find_xpath_attr(node, xpath, key, val):
232 for f in node.findall(xpath):
233 if f.attrib.get(key) == val:
237 # On python2.6 the xml.etree.ElementTree.Element methods don't support
238 # the namespace parameter
239 def xpath_with_ns(path, ns_map):
240 components = [c.split(':') for c in path.split('/')]
244 replaced.append(c[0])
247 replaced.append('{%s}%s' % (ns_map[ns], tag))
248 return '/'.join(replaced)
250 def htmlentity_transform(matchobj):
251 """Transforms an HTML entity to a character.
253 This function receives a match object and is intended to be used with
254 the re.sub() function.
256 entity = matchobj.group(1)
258 # Known non-numeric HTML entity
259 if entity in compat_html_entities.name2codepoint:
260 return compat_chr(compat_html_entities.name2codepoint[entity])
262 mobj = re.match(u'(?u)#(x?\\d+)', entity)
264 numstr = mobj.group(1)
265 if numstr.startswith(u'x'):
267 numstr = u'0%s' % numstr
270 return compat_chr(int(numstr, base))
272 # Unknown entity in name, return its literal representation
273 return (u'&%s;' % entity)
275 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
276 class BaseHTMLParser(compat_html_parser.HTMLParser):
278 compat_html_parser.HTMLParser.__init__(self)
281 def loads(self, html):
286 class AttrParser(BaseHTMLParser):
287 """Modified HTMLParser that isolates a tag with the specified attribute"""
288 def __init__(self, attribute, value):
289 self.attribute = attribute
294 self.watch_startpos = False
296 BaseHTMLParser.__init__(self)
298 def error(self, message):
299 if self.error_count > 10 or self.started:
300 raise compat_html_parser.HTMLParseError(message, self.getpos())
301 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
302 self.error_count += 1
305 def handle_starttag(self, tag, attrs):
308 self.find_startpos(None)
309 if self.attribute in attrs and attrs[self.attribute] == self.value:
312 self.watch_startpos = True
314 if not tag in self.depth: self.depth[tag] = 0
317 def handle_endtag(self, tag):
319 if tag in self.depth: self.depth[tag] -= 1
320 if self.depth[self.result[0]] == 0:
322 self.result.append(self.getpos())
324 def find_startpos(self, x):
325 """Needed to put the start position of the result (self.result[1])
326 after the opening tag with the requested id"""
327 if self.watch_startpos:
328 self.watch_startpos = False
329 self.result.append(self.getpos())
330 handle_entityref = handle_charref = handle_data = handle_comment = \
331 handle_decl = handle_pi = unknown_decl = find_startpos
333 def get_result(self):
334 if self.result is None:
336 if len(self.result) != 3:
338 lines = self.html.split('\n')
339 lines = lines[self.result[1][0]-1:self.result[2][0]]
340 lines[0] = lines[0][self.result[1][1]:]
342 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
343 lines[-1] = lines[-1][:self.result[2][1]]
344 return '\n'.join(lines).strip()
345 # Hack for https://github.com/rg3/youtube-dl/issues/662
346 if sys.version_info < (2, 7, 3):
347 AttrParser.parse_endtag = (lambda self, i:
348 i + len("</scr'+'ipt>")
349 if self.rawdata[i:].startswith("</scr'+'ipt>")
350 else compat_html_parser.HTMLParser.parse_endtag(self, i))
352 def get_element_by_id(id, html):
353 """Return the content of the tag with the specified ID in the passed HTML document"""
354 return get_element_by_attribute("id", id, html)
356 def get_element_by_attribute(attribute, value, html):
357 """Return the content of the tag with the specified attribute in the passed HTML document"""
358 parser = AttrParser(attribute, value)
361 except compat_html_parser.HTMLParseError:
363 return parser.get_result()
365 class MetaParser(BaseHTMLParser):
367 Modified HTMLParser that isolates a meta tag with the specified name
370 def __init__(self, name):
371 BaseHTMLParser.__init__(self)
376 def handle_starttag(self, tag, attrs):
380 if attrs.get('name') == self.name:
381 self.result = attrs.get('content')
383 def get_result(self):
386 def get_meta_content(name, html):
388 Return the content attribute from the meta tag with the given name attribute.
390 parser = MetaParser(name)
393 except compat_html_parser.HTMLParseError:
395 return parser.get_result()
398 def clean_html(html):
399 """Clean an HTML snippet into a readable string"""
401 html = html.replace('\n', ' ')
402 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
403 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
405 html = re.sub('<.*?>', '', html)
406 # Replace html entities
407 html = unescapeHTML(html)
411 def sanitize_open(filename, open_mode):
412 """Try to open the given filename, and slightly tweak it if this fails.
414 Attempts to open the given filename. If this fails, it tries to change
415 the filename slightly, step by step, until it's either able to open it
416 or it fails and raises a final exception, like the standard open()
419 It returns the tuple (stream, definitive_file_name).
423 if sys.platform == 'win32':
425 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
426 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
427 stream = open(encodeFilename(filename), open_mode)
428 return (stream, filename)
429 except (IOError, OSError) as err:
430 if err.errno in (errno.EACCES,):
433 # In case of error, try to remove win32 forbidden chars
434 alt_filename = os.path.join(
435 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
436 for path_part in os.path.split(filename)
438 if alt_filename == filename:
441 # An exception here should be caught in the caller
442 stream = open(encodeFilename(filename), open_mode)
443 return (stream, alt_filename)
446 def timeconvert(timestr):
447 """Convert RFC 2822 defined time string into system timestamp"""
449 timetuple = email.utils.parsedate_tz(timestr)
450 if timetuple is not None:
451 timestamp = email.utils.mktime_tz(timetuple)
454 def sanitize_filename(s, restricted=False, is_id=False):
455 """Sanitizes a string so it could be used as part of a filename.
456 If restricted is set, use a stricter subset of allowed characters.
457 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
459 def replace_insane(char):
460 if char == '?' or ord(char) < 32 or ord(char) == 127:
463 return '' if restricted else '\''
465 return '_-' if restricted else ' -'
466 elif char in '\\/|*<>':
468 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
470 if restricted and ord(char) > 127:
474 result = u''.join(map(replace_insane, s))
476 while '__' in result:
477 result = result.replace('__', '_')
478 result = result.strip('_')
479 # Common case of "Foreign band name - English song title"
480 if restricted and result.startswith('-_'):
486 def orderedSet(iterable):
487 """ Remove all duplicates from the input iterable """
498 assert type(s) == type(u'')
500 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
504 def encodeFilename(s, for_subprocess=False):
506 @param s The name of the file
509 assert type(s) == compat_str
511 # Python 3 has a Unicode API
512 if sys.version_info >= (3, 0):
515 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
516 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
517 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
518 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
519 if not for_subprocess:
522 # For subprocess calls, encode with locale encoding
523 # Refer to http://stackoverflow.com/a/9951851/35070
524 encoding = preferredencoding()
526 encoding = sys.getfilesystemencoding()
529 return s.encode(encoding, 'ignore')
532 def decodeOption(optval):
535 if isinstance(optval, bytes):
536 optval = optval.decode(preferredencoding())
538 assert isinstance(optval, compat_str)
541 def formatSeconds(secs):
543 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
545 return '%d:%02d' % (secs // 60, secs % 60)
550 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
551 if sys.version_info < (3, 2):
554 class HTTPSConnectionV3(httplib.HTTPSConnection):
555 def __init__(self, *args, **kwargs):
556 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
559 sock = socket.create_connection((self.host, self.port), self.timeout)
560 if getattr(self, '_tunnel_host', False):
564 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
566 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
568 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
569 def https_open(self, req):
570 return self.do_open(HTTPSConnectionV3, req)
571 return HTTPSHandlerV3(**kwargs)
573 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
574 context.verify_mode = (ssl.CERT_NONE
575 if opts_no_check_certificate
576 else ssl.CERT_REQUIRED)
577 context.set_default_verify_paths()
579 context.load_default_certs()
580 except AttributeError:
582 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
584 class ExtractorError(Exception):
585 """Error during info extraction."""
586 def __init__(self, msg, tb=None, expected=False, cause=None):
587 """ tb, if given, is the original traceback (so that it can be printed out).
588 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
591 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
594 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
595 super(ExtractorError, self).__init__(msg)
598 self.exc_info = sys.exc_info() # preserve original exception
601 def format_traceback(self):
602 if self.traceback is None:
604 return u''.join(traceback.format_tb(self.traceback))
607 class RegexNotFoundError(ExtractorError):
608 """Error when a regex didn't match"""
612 class DownloadError(Exception):
613 """Download Error exception.
615 This exception may be thrown by FileDownloader objects if they are not
616 configured to continue on errors. They will contain the appropriate
619 def __init__(self, msg, exc_info=None):
620 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
621 super(DownloadError, self).__init__(msg)
622 self.exc_info = exc_info
625 class SameFileError(Exception):
626 """Same File exception.
628 This exception will be thrown by FileDownloader objects if they detect
629 multiple files would have to be downloaded to the same file on disk.
634 class PostProcessingError(Exception):
635 """Post Processing exception.
637 This exception may be raised by PostProcessor's .run() method to
638 indicate an error in the postprocessing task.
640 def __init__(self, msg):
643 class MaxDownloadsReached(Exception):
644 """ --max-downloads limit has been reached. """
648 class UnavailableVideoError(Exception):
649 """Unavailable Format exception.
651 This exception will be thrown when a video is requested
652 in a format that is not available for that video.
657 class ContentTooShortError(Exception):
658 """Content Too Short exception.
660 This exception may be raised by FileDownloader objects when a file they
661 download is too small for what the server announced first, indicating
662 the connection was probably interrupted.
668 def __init__(self, downloaded, expected):
669 self.downloaded = downloaded
670 self.expected = expected
672 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
673 """Handler for HTTP requests and responses.
675 This class, when installed with an OpenerDirector, automatically adds
676 the standard headers to every HTTP request and handles gzipped and
677 deflated responses from web servers. If compression is to be avoided in
678 a particular request, the original request in the program code only has
679 to include the HTTP header "Youtubedl-No-Compression", which will be
680 removed before making the real request.
682 Part of this code was copied from:
684 http://techknack.net/python-urllib2-handlers/
686 Andrew Rowls, the author of that code, agreed to release it to the
693 return zlib.decompress(data, -zlib.MAX_WBITS)
695 return zlib.decompress(data)
698 def addinfourl_wrapper(stream, headers, url, code):
699 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
700 return compat_urllib_request.addinfourl(stream, headers, url, code)
701 ret = compat_urllib_request.addinfourl(stream, headers, url)
705 def http_request(self, req):
706 for h,v in std_headers.items():
710 if 'Youtubedl-no-compression' in req.headers:
711 if 'Accept-encoding' in req.headers:
712 del req.headers['Accept-encoding']
713 del req.headers['Youtubedl-no-compression']
714 if 'Youtubedl-user-agent' in req.headers:
715 if 'User-agent' in req.headers:
716 del req.headers['User-agent']
717 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
718 del req.headers['Youtubedl-user-agent']
721 def http_response(self, req, resp):
724 if resp.headers.get('Content-encoding', '') == 'gzip':
725 content = resp.read()
726 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
728 uncompressed = io.BytesIO(gz.read())
729 except IOError as original_ioerror:
730 # There may be junk add the end of the file
731 # See http://stackoverflow.com/q/4928560/35070 for details
732 for i in range(1, 1024):
734 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
735 uncompressed = io.BytesIO(gz.read())
740 raise original_ioerror
741 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
742 resp.msg = old_resp.msg
744 if resp.headers.get('Content-encoding', '') == 'deflate':
745 gz = io.BytesIO(self.deflate(resp.read()))
746 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
747 resp.msg = old_resp.msg
750 https_request = http_request
751 https_response = http_response
753 def unified_strdate(date_str):
754 """Return a string with the date in the format YYYYMMDD"""
757 date_str = date_str.replace(',',' ')
758 # %z (UTC offset) is only supported in python>=3.2
759 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
760 format_expressions = [
769 '%Y-%m-%dT%H:%M:%SZ',
770 '%Y-%m-%dT%H:%M:%S.%fZ',
771 '%Y-%m-%dT%H:%M:%S.%f0Z',
774 for expression in format_expressions:
776 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
779 if upload_date is None:
780 timetuple = email.utils.parsedate_tz(date_str)
782 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
785 def determine_ext(url, default_ext=u'unknown_video'):
786 guess = url.partition(u'?')[0].rpartition(u'.')[2]
787 if re.match(r'^[A-Za-z0-9]+$', guess):
792 def subtitles_filename(filename, sub_lang, sub_format):
793 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
795 def date_from_str(date_str):
797 Return a datetime object from a string in the format YYYYMMDD or
798 (now|today)[+-][0-9](day|week|month|year)(s)?"""
799 today = datetime.date.today()
800 if date_str == 'now'or date_str == 'today':
802 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
803 if match is not None:
804 sign = match.group('sign')
805 time = int(match.group('time'))
808 unit = match.group('unit')
817 delta = datetime.timedelta(**{unit: time})
819 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
821 class DateRange(object):
822 """Represents a time interval between two dates"""
823 def __init__(self, start=None, end=None):
824 """start and end must be strings in the format accepted by date"""
825 if start is not None:
826 self.start = date_from_str(start)
828 self.start = datetime.datetime.min.date()
830 self.end = date_from_str(end)
832 self.end = datetime.datetime.max.date()
833 if self.start > self.end:
834 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
837 """Returns a range that only contains the given day"""
839 def __contains__(self, date):
840 """Check if the date is in the range"""
841 if not isinstance(date, datetime.date):
842 date = date_from_str(date)
843 return self.start <= date <= self.end
845 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
849 """ Returns the platform name as a compat_str """
850 res = platform.platform()
851 if isinstance(res, bytes):
852 res = res.decode(preferredencoding())
854 assert isinstance(res, compat_str)
858 def write_string(s, out=None):
861 assert type(s) == compat_str
863 if ('b' in getattr(out, 'mode', '') or
864 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
865 s = s.encode(preferredencoding(), 'ignore')
868 except UnicodeEncodeError:
869 # In Windows shells, this can fail even when the codec is just charmap!?
870 # See https://wiki.python.org/moin/PrintFails#Issue
871 if sys.platform == 'win32' and hasattr(out, 'encoding'):
872 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
880 def bytes_to_intlist(bs):
883 if isinstance(bs[0], int): # Python 3
886 return [ord(c) for c in bs]
889 def intlist_to_bytes(xs):
892 if isinstance(chr(0), bytes): # Python 2
893 return ''.join([chr(x) for x in xs])
898 def get_cachedir(params={}):
899 cache_root = os.environ.get('XDG_CACHE_HOME',
900 os.path.expanduser('~/.cache'))
901 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
904 # Cross-platform file locking
905 if sys.platform == 'win32':
906 import ctypes.wintypes
909 class OVERLAPPED(ctypes.Structure):
911 ('Internal', ctypes.wintypes.LPVOID),
912 ('InternalHigh', ctypes.wintypes.LPVOID),
913 ('Offset', ctypes.wintypes.DWORD),
914 ('OffsetHigh', ctypes.wintypes.DWORD),
915 ('hEvent', ctypes.wintypes.HANDLE),
918 kernel32 = ctypes.windll.kernel32
919 LockFileEx = kernel32.LockFileEx
920 LockFileEx.argtypes = [
921 ctypes.wintypes.HANDLE, # hFile
922 ctypes.wintypes.DWORD, # dwFlags
923 ctypes.wintypes.DWORD, # dwReserved
924 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
925 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
926 ctypes.POINTER(OVERLAPPED) # Overlapped
928 LockFileEx.restype = ctypes.wintypes.BOOL
929 UnlockFileEx = kernel32.UnlockFileEx
930 UnlockFileEx.argtypes = [
931 ctypes.wintypes.HANDLE, # hFile
932 ctypes.wintypes.DWORD, # dwReserved
933 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
934 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
935 ctypes.POINTER(OVERLAPPED) # Overlapped
937 UnlockFileEx.restype = ctypes.wintypes.BOOL
938 whole_low = 0xffffffff
939 whole_high = 0x7fffffff
941 def _lock_file(f, exclusive):
942 overlapped = OVERLAPPED()
943 overlapped.Offset = 0
944 overlapped.OffsetHigh = 0
945 overlapped.hEvent = 0
946 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
947 handle = msvcrt.get_osfhandle(f.fileno())
948 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
949 whole_low, whole_high, f._lock_file_overlapped_p):
950 raise OSError('Locking file failed: %r' % ctypes.FormatError())
953 assert f._lock_file_overlapped_p
954 handle = msvcrt.get_osfhandle(f.fileno())
955 if not UnlockFileEx(handle, 0,
956 whole_low, whole_high, f._lock_file_overlapped_p):
957 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
962 def _lock_file(f, exclusive):
963 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
966 fcntl.lockf(f, fcntl.LOCK_UN)
969 class locked_file(object):
970 def __init__(self, filename, mode, encoding=None):
971 assert mode in ['r', 'a', 'w']
972 self.f = io.open(filename, mode, encoding=encoding)
976 exclusive = self.mode != 'r'
978 _lock_file(self.f, exclusive)
984 def __exit__(self, etype, value, traceback):
993 def write(self, *args):
994 return self.f.write(*args)
996 def read(self, *args):
997 return self.f.read(*args)
1000 def shell_quote(args):
1002 encoding = sys.getfilesystemencoding()
1003 if encoding is None:
1006 if isinstance(a, bytes):
1007 # We may get a filename encoded with 'encodeFilename'
1008 a = a.decode(encoding)
1009 quoted_args.append(pipes.quote(a))
1010 return u' '.join(quoted_args)
1013 def takewhile_inclusive(pred, seq):
1014 """ Like itertools.takewhile, but include the latest evaluated element
1015 (the first element so that Not pred(e)) """
1022 def smuggle_url(url, data):
1023 """ Pass additional data in a URL for internal use. """
1025 sdata = compat_urllib_parse.urlencode(
1026 {u'__youtubedl_smuggle': json.dumps(data)})
1027 return url + u'#' + sdata
1030 def unsmuggle_url(smug_url):
1031 if not '#__youtubedl_smuggle' in smug_url:
1032 return smug_url, None
1033 url, _, sdata = smug_url.rpartition(u'#')
1034 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1035 data = json.loads(jsond)
1039 def format_bytes(bytes):
1042 if type(bytes) is str:
1043 bytes = float(bytes)
1047 exponent = int(math.log(bytes, 1024.0))
1048 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1049 converted = float(bytes) / float(1024 ** exponent)
1050 return u'%.2f%s' % (converted, suffix)
1053 def str_to_int(int_str):
1054 int_str = re.sub(r'[,\.]', u'', int_str)
1058 def get_term_width():
1059 columns = os.environ.get('COLUMNS', None)
1064 sp = subprocess.Popen(
1066 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1067 out, err = sp.communicate()
1068 return int(out.split()[1])
1074 def month_by_name(name):
1075 """ Return the number of a month by (locale-independently) English name """
1078 u'January', u'February', u'March', u'April', u'May', u'June',
1079 u'July', u'August', u'September', u'October', u'November', u'December']
1081 return ENGLISH_NAMES.index(name) + 1
1086 def fix_xml_all_ampersand(xml_str):
1087 """Replace all the '&' by '&' in XML"""
1088 return xml_str.replace(u'&', u'&')
1091 def setproctitle(title):
1092 assert isinstance(title, compat_str)
1094 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1098 buf = ctypes.create_string_buffer(len(title) + 1)
1099 buf.value = title.encode('utf-8')
1101 libc.prctl(15, ctypes.byref(buf), 0, 0, 0)
1102 except AttributeError:
1103 return # Strange libc, just skip this
1106 def remove_start(s, start):
1107 if s.startswith(start):
1108 return s[len(start):]
1112 def url_basename(url):
1113 path = compat_urlparse.urlparse(url).path
1114 return path.strip(u'/').split(u'/')[-1]
1117 class HEADRequest(compat_urllib_request.Request):
1118 def get_method(self):
1123 return v if v is None else int(v)
1126 def parse_duration(s):
1131 r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s)
1134 res = int(m.group('secs'))
1136 res += int(m.group('mins')) * 60
1137 if m.group('hours'):
1138 res += int(m.group('hours')) * 60 * 60
1142 def prepend_extension(filename, ext):
1143 name, real_ext = os.path.splitext(filename)
1144 return u'{0}.{1}{2}'.format(name, ext, real_ext)