2 # -*- coding: utf-8 -*-
26 import xml.etree.ElementTree
30 import urllib.request as compat_urllib_request
31 except ImportError: # Python 2
32 import urllib2 as compat_urllib_request
35 import urllib.error as compat_urllib_error
36 except ImportError: # Python 2
37 import urllib2 as compat_urllib_error
40 import urllib.parse as compat_urllib_parse
41 except ImportError: # Python 2
42 import urllib as compat_urllib_parse
45 from urllib.parse import urlparse as compat_urllib_parse_urlparse
46 except ImportError: # Python 2
47 from urlparse import urlparse as compat_urllib_parse_urlparse
50 import urllib.parse as compat_urlparse
51 except ImportError: # Python 2
52 import urlparse as compat_urlparse
55 import http.cookiejar as compat_cookiejar
56 except ImportError: # Python 2
57 import cookielib as compat_cookiejar
60 import html.entities as compat_html_entities
61 except ImportError: # Python 2
62 import htmlentitydefs as compat_html_entities
65 import html.parser as compat_html_parser
66 except ImportError: # Python 2
67 import HTMLParser as compat_html_parser
70 import http.client as compat_http_client
71 except ImportError: # Python 2
72 import httplib as compat_http_client
75 from urllib.error import HTTPError as compat_HTTPError
76 except ImportError: # Python 2
77 from urllib2 import HTTPError as compat_HTTPError
80 from urllib.request import urlretrieve as compat_urlretrieve
81 except ImportError: # Python 2
82 from urllib import urlretrieve as compat_urlretrieve
86 from subprocess import DEVNULL
87 compat_subprocess_get_DEVNULL = lambda: DEVNULL
89 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
92 from urllib.parse import parse_qs as compat_parse_qs
93 except ImportError: # Python 2
94 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
95 # Python 2's version is apparently totally broken
96 def _unquote(string, encoding='utf-8', errors='replace'):
99 res = string.split('%')
106 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
113 pct_sequence += item[:2].decode('hex')
116 # This segment was just a single percent-encoded character.
117 # May be part of a sequence of code units, so delay decoding.
118 # (Stored in pct_sequence).
122 # Encountered non-percent-encoded characters. Flush the current
124 string += pct_sequence.decode(encoding, errors) + rest
127 # Flush the final pct_sequence
128 string += pct_sequence.decode(encoding, errors)
131 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
132 encoding='utf-8', errors='replace'):
133 qs, _coerce_result = qs, unicode
134 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
136 for name_value in pairs:
137 if not name_value and not strict_parsing:
139 nv = name_value.split('=', 1)
142 raise ValueError("bad query field: %r" % (name_value,))
143 # Handle case of a control-name with no equal sign
144 if keep_blank_values:
148 if len(nv[1]) or keep_blank_values:
149 name = nv[0].replace('+', ' ')
150 name = _unquote(name, encoding=encoding, errors=errors)
151 name = _coerce_result(name)
152 value = nv[1].replace('+', ' ')
153 value = _unquote(value, encoding=encoding, errors=errors)
154 value = _coerce_result(value)
155 r.append((name, value))
158 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
159 encoding='utf-8', errors='replace'):
161 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
162 encoding=encoding, errors=errors)
163 for name, value in pairs:
164 if name in parsed_result:
165 parsed_result[name].append(value)
167 parsed_result[name] = [value]
171 compat_str = unicode # Python 2
176 compat_chr = unichr # Python 2
181 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
182 except ImportError: # Python 2.6
183 from xml.parsers.expat import ExpatError as compat_xml_parse_error
186 if type(c) is int: return c
189 # This is not clearly defined otherwise
190 compiled_regex_type = type(re.compile(''))
193 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
194 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
195 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
196 'Accept-Encoding': 'gzip, deflate',
197 'Accept-Language': 'en-us,en;q=0.5',
200 def preferredencoding():
201 """Get preferred encoding.
203 Returns the best encoding scheme for the system, based on
204 locale.getpreferredencoding() and some further tweaks.
207 pref = locale.getpreferredencoding()
214 if sys.version_info < (3,0):
216 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
219 assert type(s) == type(u'')
222 # In Python 2.x, json.dump expects a bytestream.
223 # In Python 3.x, it writes to a character stream
224 if sys.version_info < (3,0):
225 def write_json_file(obj, fn):
226 with open(fn, 'wb') as f:
229 def write_json_file(obj, fn):
230 with open(fn, 'w', encoding='utf-8') as f:
233 if sys.version_info >= (2,7):
234 def find_xpath_attr(node, xpath, key, val):
235 """ Find the xpath xpath[@key=val] """
236 assert re.match(r'^[a-zA-Z]+$', key)
237 assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
238 expr = xpath + u"[@%s='%s']" % (key, val)
239 return node.find(expr)
241 def find_xpath_attr(node, xpath, key, val):
242 for f in node.findall(xpath):
243 if f.attrib.get(key) == val:
247 # On python2.6 the xml.etree.ElementTree.Element methods don't support
248 # the namespace parameter
249 def xpath_with_ns(path, ns_map):
250 components = [c.split(':') for c in path.split('/')]
254 replaced.append(c[0])
257 replaced.append('{%s}%s' % (ns_map[ns], tag))
258 return '/'.join(replaced)
260 def htmlentity_transform(matchobj):
261 """Transforms an HTML entity to a character.
263 This function receives a match object and is intended to be used with
264 the re.sub() function.
266 entity = matchobj.group(1)
268 # Known non-numeric HTML entity
269 if entity in compat_html_entities.name2codepoint:
270 return compat_chr(compat_html_entities.name2codepoint[entity])
272 mobj = re.match(u'(?u)#(x?\\d+)', entity)
274 numstr = mobj.group(1)
275 if numstr.startswith(u'x'):
277 numstr = u'0%s' % numstr
280 return compat_chr(int(numstr, base))
282 # Unknown entity in name, return its literal representation
283 return (u'&%s;' % entity)
285 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
286 class BaseHTMLParser(compat_html_parser.HTMLParser):
288 compat_html_parser.HTMLParser.__init__(self)
291 def loads(self, html):
296 class AttrParser(BaseHTMLParser):
297 """Modified HTMLParser that isolates a tag with the specified attribute"""
298 def __init__(self, attribute, value):
299 self.attribute = attribute
304 self.watch_startpos = False
306 BaseHTMLParser.__init__(self)
308 def error(self, message):
309 if self.error_count > 10 or self.started:
310 raise compat_html_parser.HTMLParseError(message, self.getpos())
311 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
312 self.error_count += 1
315 def handle_starttag(self, tag, attrs):
318 self.find_startpos(None)
319 if self.attribute in attrs and attrs[self.attribute] == self.value:
322 self.watch_startpos = True
324 if not tag in self.depth: self.depth[tag] = 0
327 def handle_endtag(self, tag):
329 if tag in self.depth: self.depth[tag] -= 1
330 if self.depth[self.result[0]] == 0:
332 self.result.append(self.getpos())
334 def find_startpos(self, x):
335 """Needed to put the start position of the result (self.result[1])
336 after the opening tag with the requested id"""
337 if self.watch_startpos:
338 self.watch_startpos = False
339 self.result.append(self.getpos())
340 handle_entityref = handle_charref = handle_data = handle_comment = \
341 handle_decl = handle_pi = unknown_decl = find_startpos
343 def get_result(self):
344 if self.result is None:
346 if len(self.result) != 3:
348 lines = self.html.split('\n')
349 lines = lines[self.result[1][0]-1:self.result[2][0]]
350 lines[0] = lines[0][self.result[1][1]:]
352 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
353 lines[-1] = lines[-1][:self.result[2][1]]
354 return '\n'.join(lines).strip()
355 # Hack for https://github.com/rg3/youtube-dl/issues/662
356 if sys.version_info < (2, 7, 3):
357 AttrParser.parse_endtag = (lambda self, i:
358 i + len("</scr'+'ipt>")
359 if self.rawdata[i:].startswith("</scr'+'ipt>")
360 else compat_html_parser.HTMLParser.parse_endtag(self, i))
362 def get_element_by_id(id, html):
363 """Return the content of the tag with the specified ID in the passed HTML document"""
364 return get_element_by_attribute("id", id, html)
366 def get_element_by_attribute(attribute, value, html):
367 """Return the content of the tag with the specified attribute in the passed HTML document"""
368 parser = AttrParser(attribute, value)
371 except compat_html_parser.HTMLParseError:
373 return parser.get_result()
375 class MetaParser(BaseHTMLParser):
377 Modified HTMLParser that isolates a meta tag with the specified name
380 def __init__(self, name):
381 BaseHTMLParser.__init__(self)
386 def handle_starttag(self, tag, attrs):
390 if attrs.get('name') == self.name:
391 self.result = attrs.get('content')
393 def get_result(self):
396 def get_meta_content(name, html):
398 Return the content attribute from the meta tag with the given name attribute.
400 parser = MetaParser(name)
403 except compat_html_parser.HTMLParseError:
405 return parser.get_result()
408 def clean_html(html):
409 """Clean an HTML snippet into a readable string"""
411 html = html.replace('\n', ' ')
412 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
413 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
415 html = re.sub('<.*?>', '', html)
416 # Replace html entities
417 html = unescapeHTML(html)
421 def sanitize_open(filename, open_mode):
422 """Try to open the given filename, and slightly tweak it if this fails.
424 Attempts to open the given filename. If this fails, it tries to change
425 the filename slightly, step by step, until it's either able to open it
426 or it fails and raises a final exception, like the standard open()
429 It returns the tuple (stream, definitive_file_name).
433 if sys.platform == 'win32':
435 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
436 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
437 stream = open(encodeFilename(filename), open_mode)
438 return (stream, filename)
439 except (IOError, OSError) as err:
440 if err.errno in (errno.EACCES,):
443 # In case of error, try to remove win32 forbidden chars
444 alt_filename = os.path.join(
445 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
446 for path_part in os.path.split(filename)
448 if alt_filename == filename:
451 # An exception here should be caught in the caller
452 stream = open(encodeFilename(filename), open_mode)
453 return (stream, alt_filename)
456 def timeconvert(timestr):
457 """Convert RFC 2822 defined time string into system timestamp"""
459 timetuple = email.utils.parsedate_tz(timestr)
460 if timetuple is not None:
461 timestamp = email.utils.mktime_tz(timetuple)
464 def sanitize_filename(s, restricted=False, is_id=False):
465 """Sanitizes a string so it could be used as part of a filename.
466 If restricted is set, use a stricter subset of allowed characters.
467 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
469 def replace_insane(char):
470 if char == '?' or ord(char) < 32 or ord(char) == 127:
473 return '' if restricted else '\''
475 return '_-' if restricted else ' -'
476 elif char in '\\/|*<>':
478 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
480 if restricted and ord(char) > 127:
484 result = u''.join(map(replace_insane, s))
486 while '__' in result:
487 result = result.replace('__', '_')
488 result = result.strip('_')
489 # Common case of "Foreign band name - English song title"
490 if restricted and result.startswith('-_'):
496 def orderedSet(iterable):
497 """ Remove all duplicates from the input iterable """
508 assert type(s) == type(u'')
510 result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
514 def encodeFilename(s, for_subprocess=False):
516 @param s The name of the file
519 assert type(s) == compat_str
521 # Python 3 has a Unicode API
522 if sys.version_info >= (3, 0):
525 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
526 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
527 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
528 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
529 if not for_subprocess:
532 # For subprocess calls, encode with locale encoding
533 # Refer to http://stackoverflow.com/a/9951851/35070
534 encoding = preferredencoding()
536 encoding = sys.getfilesystemencoding()
539 return s.encode(encoding, 'ignore')
542 def decodeOption(optval):
545 if isinstance(optval, bytes):
546 optval = optval.decode(preferredencoding())
548 assert isinstance(optval, compat_str)
551 def formatSeconds(secs):
553 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
555 return '%d:%02d' % (secs // 60, secs % 60)
560 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
561 if sys.version_info < (3, 2):
564 class HTTPSConnectionV3(httplib.HTTPSConnection):
565 def __init__(self, *args, **kwargs):
566 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
569 sock = socket.create_connection((self.host, self.port), self.timeout)
570 if getattr(self, '_tunnel_host', False):
574 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
576 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
578 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
579 def https_open(self, req):
580 return self.do_open(HTTPSConnectionV3, req)
581 return HTTPSHandlerV3(**kwargs)
583 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
584 context.verify_mode = (ssl.CERT_NONE
585 if opts_no_check_certificate
586 else ssl.CERT_REQUIRED)
587 context.set_default_verify_paths()
589 context.load_default_certs()
590 except AttributeError:
592 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
594 class ExtractorError(Exception):
595 """Error during info extraction."""
596 def __init__(self, msg, tb=None, expected=False, cause=None):
597 """ tb, if given, is the original traceback (so that it can be printed out).
598 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
601 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
604 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
605 super(ExtractorError, self).__init__(msg)
608 self.exc_info = sys.exc_info() # preserve original exception
611 def format_traceback(self):
612 if self.traceback is None:
614 return u''.join(traceback.format_tb(self.traceback))
617 class RegexNotFoundError(ExtractorError):
618 """Error when a regex didn't match"""
622 class DownloadError(Exception):
623 """Download Error exception.
625 This exception may be thrown by FileDownloader objects if they are not
626 configured to continue on errors. They will contain the appropriate
629 def __init__(self, msg, exc_info=None):
630 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
631 super(DownloadError, self).__init__(msg)
632 self.exc_info = exc_info
635 class SameFileError(Exception):
636 """Same File exception.
638 This exception will be thrown by FileDownloader objects if they detect
639 multiple files would have to be downloaded to the same file on disk.
644 class PostProcessingError(Exception):
645 """Post Processing exception.
647 This exception may be raised by PostProcessor's .run() method to
648 indicate an error in the postprocessing task.
650 def __init__(self, msg):
653 class MaxDownloadsReached(Exception):
654 """ --max-downloads limit has been reached. """
658 class UnavailableVideoError(Exception):
659 """Unavailable Format exception.
661 This exception will be thrown when a video is requested
662 in a format that is not available for that video.
667 class ContentTooShortError(Exception):
668 """Content Too Short exception.
670 This exception may be raised by FileDownloader objects when a file they
671 download is too small for what the server announced first, indicating
672 the connection was probably interrupted.
678 def __init__(self, downloaded, expected):
679 self.downloaded = downloaded
680 self.expected = expected
682 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
683 """Handler for HTTP requests and responses.
685 This class, when installed with an OpenerDirector, automatically adds
686 the standard headers to every HTTP request and handles gzipped and
687 deflated responses from web servers. If compression is to be avoided in
688 a particular request, the original request in the program code only has
689 to include the HTTP header "Youtubedl-No-Compression", which will be
690 removed before making the real request.
692 Part of this code was copied from:
694 http://techknack.net/python-urllib2-handlers/
696 Andrew Rowls, the author of that code, agreed to release it to the
703 return zlib.decompress(data, -zlib.MAX_WBITS)
705 return zlib.decompress(data)
708 def addinfourl_wrapper(stream, headers, url, code):
709 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
710 return compat_urllib_request.addinfourl(stream, headers, url, code)
711 ret = compat_urllib_request.addinfourl(stream, headers, url)
715 def http_request(self, req):
716 for h,v in std_headers.items():
720 if 'Youtubedl-no-compression' in req.headers:
721 if 'Accept-encoding' in req.headers:
722 del req.headers['Accept-encoding']
723 del req.headers['Youtubedl-no-compression']
724 if 'Youtubedl-user-agent' in req.headers:
725 if 'User-agent' in req.headers:
726 del req.headers['User-agent']
727 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
728 del req.headers['Youtubedl-user-agent']
731 def http_response(self, req, resp):
734 if resp.headers.get('Content-encoding', '') == 'gzip':
735 content = resp.read()
736 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
738 uncompressed = io.BytesIO(gz.read())
739 except IOError as original_ioerror:
740 # There may be junk add the end of the file
741 # See http://stackoverflow.com/q/4928560/35070 for details
742 for i in range(1, 1024):
744 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
745 uncompressed = io.BytesIO(gz.read())
750 raise original_ioerror
751 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
752 resp.msg = old_resp.msg
754 if resp.headers.get('Content-encoding', '') == 'deflate':
755 gz = io.BytesIO(self.deflate(resp.read()))
756 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
757 resp.msg = old_resp.msg
760 https_request = http_request
761 https_response = http_response
764 def unified_strdate(date_str):
765 """Return a string with the date in the format YYYYMMDD"""
772 date_str = date_str.replace(',', ' ')
773 # %z (UTC offset) is only supported in python>=3.2
774 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
775 format_expressions = [
787 '%Y-%m-%dT%H:%M:%SZ',
788 '%Y-%m-%dT%H:%M:%S.%fZ',
789 '%Y-%m-%dT%H:%M:%S.%f0Z',
791 '%Y-%m-%dT%H:%M:%S.%f',
794 for expression in format_expressions:
796 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
799 if upload_date is None:
800 timetuple = email.utils.parsedate_tz(date_str)
802 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
805 def determine_ext(url, default_ext=u'unknown_video'):
806 guess = url.partition(u'?')[0].rpartition(u'.')[2]
807 if re.match(r'^[A-Za-z0-9]+$', guess):
812 def subtitles_filename(filename, sub_lang, sub_format):
813 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
815 def date_from_str(date_str):
817 Return a datetime object from a string in the format YYYYMMDD or
818 (now|today)[+-][0-9](day|week|month|year)(s)?"""
819 today = datetime.date.today()
820 if date_str == 'now'or date_str == 'today':
822 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
823 if match is not None:
824 sign = match.group('sign')
825 time = int(match.group('time'))
828 unit = match.group('unit')
837 delta = datetime.timedelta(**{unit: time})
839 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
841 def hyphenate_date(date_str):
843 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
844 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
845 if match is not None:
846 return '-'.join(match.groups())
850 class DateRange(object):
851 """Represents a time interval between two dates"""
852 def __init__(self, start=None, end=None):
853 """start and end must be strings in the format accepted by date"""
854 if start is not None:
855 self.start = date_from_str(start)
857 self.start = datetime.datetime.min.date()
859 self.end = date_from_str(end)
861 self.end = datetime.datetime.max.date()
862 if self.start > self.end:
863 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
866 """Returns a range that only contains the given day"""
868 def __contains__(self, date):
869 """Check if the date is in the range"""
870 if not isinstance(date, datetime.date):
871 date = date_from_str(date)
872 return self.start <= date <= self.end
874 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
878 """ Returns the platform name as a compat_str """
879 res = platform.platform()
880 if isinstance(res, bytes):
881 res = res.decode(preferredencoding())
883 assert isinstance(res, compat_str)
887 def write_string(s, out=None):
890 assert type(s) == compat_str
892 if ('b' in getattr(out, 'mode', '') or
893 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
894 s = s.encode(preferredencoding(), 'ignore')
897 except UnicodeEncodeError:
898 # In Windows shells, this can fail even when the codec is just charmap!?
899 # See https://wiki.python.org/moin/PrintFails#Issue
900 if sys.platform == 'win32' and hasattr(out, 'encoding'):
901 s = s.encode(out.encoding, 'ignore').decode(out.encoding)
909 def bytes_to_intlist(bs):
912 if isinstance(bs[0], int): # Python 3
915 return [ord(c) for c in bs]
918 def intlist_to_bytes(xs):
921 if isinstance(chr(0), bytes): # Python 2
922 return ''.join([chr(x) for x in xs])
927 def get_cachedir(params={}):
928 cache_root = os.environ.get('XDG_CACHE_HOME',
929 os.path.expanduser('~/.cache'))
930 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
933 # Cross-platform file locking
934 if sys.platform == 'win32':
935 import ctypes.wintypes
938 class OVERLAPPED(ctypes.Structure):
940 ('Internal', ctypes.wintypes.LPVOID),
941 ('InternalHigh', ctypes.wintypes.LPVOID),
942 ('Offset', ctypes.wintypes.DWORD),
943 ('OffsetHigh', ctypes.wintypes.DWORD),
944 ('hEvent', ctypes.wintypes.HANDLE),
947 kernel32 = ctypes.windll.kernel32
948 LockFileEx = kernel32.LockFileEx
949 LockFileEx.argtypes = [
950 ctypes.wintypes.HANDLE, # hFile
951 ctypes.wintypes.DWORD, # dwFlags
952 ctypes.wintypes.DWORD, # dwReserved
953 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
954 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
955 ctypes.POINTER(OVERLAPPED) # Overlapped
957 LockFileEx.restype = ctypes.wintypes.BOOL
958 UnlockFileEx = kernel32.UnlockFileEx
959 UnlockFileEx.argtypes = [
960 ctypes.wintypes.HANDLE, # hFile
961 ctypes.wintypes.DWORD, # dwReserved
962 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
963 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
964 ctypes.POINTER(OVERLAPPED) # Overlapped
966 UnlockFileEx.restype = ctypes.wintypes.BOOL
967 whole_low = 0xffffffff
968 whole_high = 0x7fffffff
970 def _lock_file(f, exclusive):
971 overlapped = OVERLAPPED()
972 overlapped.Offset = 0
973 overlapped.OffsetHigh = 0
974 overlapped.hEvent = 0
975 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
976 handle = msvcrt.get_osfhandle(f.fileno())
977 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
978 whole_low, whole_high, f._lock_file_overlapped_p):
979 raise OSError('Locking file failed: %r' % ctypes.FormatError())
982 assert f._lock_file_overlapped_p
983 handle = msvcrt.get_osfhandle(f.fileno())
984 if not UnlockFileEx(handle, 0,
985 whole_low, whole_high, f._lock_file_overlapped_p):
986 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
991 def _lock_file(f, exclusive):
992 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
995 fcntl.lockf(f, fcntl.LOCK_UN)
998 class locked_file(object):
999 def __init__(self, filename, mode, encoding=None):
1000 assert mode in ['r', 'a', 'w']
1001 self.f = io.open(filename, mode, encoding=encoding)
1004 def __enter__(self):
1005 exclusive = self.mode != 'r'
1007 _lock_file(self.f, exclusive)
1013 def __exit__(self, etype, value, traceback):
1015 _unlock_file(self.f)
1022 def write(self, *args):
1023 return self.f.write(*args)
1025 def read(self, *args):
1026 return self.f.read(*args)
1029 def shell_quote(args):
1031 encoding = sys.getfilesystemencoding()
1032 if encoding is None:
1035 if isinstance(a, bytes):
1036 # We may get a filename encoded with 'encodeFilename'
1037 a = a.decode(encoding)
1038 quoted_args.append(pipes.quote(a))
1039 return u' '.join(quoted_args)
1042 def takewhile_inclusive(pred, seq):
1043 """ Like itertools.takewhile, but include the latest evaluated element
1044 (the first element so that Not pred(e)) """
1051 def smuggle_url(url, data):
1052 """ Pass additional data in a URL for internal use. """
1054 sdata = compat_urllib_parse.urlencode(
1055 {u'__youtubedl_smuggle': json.dumps(data)})
1056 return url + u'#' + sdata
1059 def unsmuggle_url(smug_url, default=None):
1060 if not '#__youtubedl_smuggle' in smug_url:
1061 return smug_url, default
1062 url, _, sdata = smug_url.rpartition(u'#')
1063 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1064 data = json.loads(jsond)
1068 def format_bytes(bytes):
1071 if type(bytes) is str:
1072 bytes = float(bytes)
1076 exponent = int(math.log(bytes, 1024.0))
1077 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1078 converted = float(bytes) / float(1024 ** exponent)
1079 return u'%.2f%s' % (converted, suffix)
1082 def str_to_int(int_str):
1083 int_str = re.sub(r'[,\.]', u'', int_str)
1087 def get_term_width():
1088 columns = os.environ.get('COLUMNS', None)
1093 sp = subprocess.Popen(
1095 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1096 out, err = sp.communicate()
1097 return int(out.split()[1])
1103 def month_by_name(name):
1104 """ Return the number of a month by (locale-independently) English name """
1107 u'January', u'February', u'March', u'April', u'May', u'June',
1108 u'July', u'August', u'September', u'October', u'November', u'December']
1110 return ENGLISH_NAMES.index(name) + 1
1115 def fix_xml_ampersands(xml_str):
1116 """Replace all the '&' by '&' in XML"""
1118 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1123 def setproctitle(title):
1124 assert isinstance(title, compat_str)
1126 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1129 title_bytes = title.encode('utf-8')
1130 buf = ctypes.create_string_buffer(len(title_bytes))
1131 buf.value = title_bytes
1133 libc.prctl(15, buf, 0, 0, 0)
1134 except AttributeError:
1135 return # Strange libc, just skip this
1138 def remove_start(s, start):
1139 if s.startswith(start):
1140 return s[len(start):]
1144 def url_basename(url):
1145 path = compat_urlparse.urlparse(url).path
1146 return path.strip(u'/').split(u'/')[-1]
1149 class HEADRequest(compat_urllib_request.Request):
1150 def get_method(self):
1154 def int_or_none(v, scale=1):
1155 return v if v is None else (int(v) // scale)
1158 def parse_duration(s):
1163 r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
1166 res = int(m.group('secs'))
1168 res += int(m.group('mins')) * 60
1169 if m.group('hours'):
1170 res += int(m.group('hours')) * 60 * 60
1174 def prepend_extension(filename, ext):
1175 name, real_ext = os.path.splitext(filename)
1176 return u'{0}.{1}{2}'.format(name, ext, real_ext)
1179 def check_executable(exe, args=[]):
1180 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1181 args can be a list of arguments for a short output (like -version) """
1183 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1189 class PagedList(object):
1190 def __init__(self, pagefunc, pagesize):
1191 self._pagefunc = pagefunc
1192 self._pagesize = pagesize
1195 # This is only useful for tests
1196 return len(self.getslice())
1198 def getslice(self, start=0, end=None):
1200 for pagenum in itertools.count(start // self._pagesize):
1201 firstid = pagenum * self._pagesize
1202 nextfirstid = pagenum * self._pagesize + self._pagesize
1203 if start >= nextfirstid:
1206 page_results = list(self._pagefunc(pagenum))
1209 start % self._pagesize
1210 if firstid <= start < nextfirstid
1214 ((end - 1) % self._pagesize) + 1
1215 if (end is not None and firstid <= end <= nextfirstid)
1218 if startv != 0 or endv is not None:
1219 page_results = page_results[startv:endv]
1220 res.extend(page_results)
1222 # A little optimization - if current page is not "full", ie. does
1223 # not contain page_size videos then we can assume that this page
1224 # is the last one - there are no more ids on further pages -
1225 # i.e. no need to query again.
1226 if len(page_results) + startv < self._pagesize:
1229 # If we got the whole page, but the next page is not interesting,
1230 # break out early as well
1231 if end == nextfirstid:
1236 def uppercase_escape(s):
1238 r'\\U([0-9a-fA-F]{8})',
1239 lambda m: compat_chr(int(m.group(1), base=16)), s)
1242 struct.pack(u'!I', 0)
1244 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1245 def struct_pack(spec, *args):
1246 if isinstance(spec, compat_str):
1247 spec = spec.encode('ascii')
1248 return struct.pack(spec, *args)
1250 def struct_unpack(spec, *args):
1251 if isinstance(spec, compat_str):
1252 spec = spec.encode('ascii')
1253 return struct.unpack(spec, *args)
1255 struct_pack = struct.pack
1256 struct_unpack = struct.unpack
1259 def read_batch_urls(batch_fd):
1261 if not isinstance(url, compat_str):
1262 url = url.decode('utf-8', 'replace')
1263 BOM_UTF8 = u'\xef\xbb\xbf'
1264 if url.startswith(BOM_UTF8):
1265 url = url[len(BOM_UTF8):]
1267 if url.startswith(('#', ';', ']')):
1271 with contextlib.closing(batch_fd) as fd:
1272 return [url for url in map(fixup, fd) if url]
1275 def urlencode_postdata(*args, **kargs):
1276 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1280 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1281 def doctype(self, name, pubid, system):
1282 pass # Ignore doctypes
1284 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1285 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1286 return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1289 if sys.version_info < (3, 0) and sys.platform == 'win32':
1290 def compat_getpass(prompt, *args, **kwargs):
1291 if isinstance(prompt, compat_str):
1292 prompt = prompt.encode(preferredencoding())
1293 return getpass.getpass(prompt, *args, **kwargs)
1295 compat_getpass = getpass.getpass