X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=079e8d2c3f5168b3f0af233f3a5377f15e5a46f3;hb=c24dfef63c55ef1a5424d11b485c3b76245448a4;hp=2864e51428e69591ba9592869de7f4bbc87072f9;hpb=7d11297f3f91e6ddd3f0caa5ad4dca1a40d6c820;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2864e5142..079e8d2c3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from __future__ import unicode_literals + import calendar import codecs import contextlib @@ -8,7 +10,6 @@ import ctypes import datetime import email.utils import errno -import getpass import gzip import itertools import io @@ -29,254 +30,19 @@ import traceback import xml.etree.ElementTree import zlib -try: - import urllib.request as compat_urllib_request -except ImportError: # Python 2 - import urllib2 as compat_urllib_request - -try: - import urllib.error as compat_urllib_error -except ImportError: # Python 2 - import urllib2 as compat_urllib_error - -try: - import urllib.parse as compat_urllib_parse -except ImportError: # Python 2 - import urllib as compat_urllib_parse - -try: - from urllib.parse import urlparse as compat_urllib_parse_urlparse -except ImportError: # Python 2 - from urlparse import urlparse as compat_urllib_parse_urlparse - -try: - import urllib.parse as compat_urlparse -except ImportError: # Python 2 - import urlparse as compat_urlparse - -try: - import http.cookiejar as compat_cookiejar -except ImportError: # Python 2 - import cookielib as compat_cookiejar - -try: - import html.entities as compat_html_entities -except ImportError: # Python 2 - import htmlentitydefs as compat_html_entities - -try: - import html.parser as compat_html_parser -except ImportError: # Python 2 - import HTMLParser as compat_html_parser - -try: - import http.client as compat_http_client -except ImportError: # Python 2 - import httplib as compat_http_client - -try: - from urllib.error import HTTPError as compat_HTTPError -except ImportError: # Python 2 - from urllib2 import HTTPError as compat_HTTPError - -try: - from urllib.request import urlretrieve as compat_urlretrieve -except ImportError: # Python 2 - from urllib import urlretrieve as compat_urlretrieve - - -try: - from subprocess import DEVNULL - compat_subprocess_get_DEVNULL = lambda: DEVNULL -except ImportError: - compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') - -try: - from urllib.parse import unquote as compat_urllib_parse_unquote -except ImportError: - def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): - if string == '': - return string - res = string.split('%') - if len(res) == 1: - return string - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'replace' - # pct_sequence: contiguous sequence of percent-encoded bytes, decoded - pct_sequence = b'' - string = res[0] - for item in res[1:]: - try: - if not item: - raise ValueError - pct_sequence += item[:2].decode('hex') - rest = item[2:] - if not rest: - # This segment was just a single percent-encoded character. - # May be part of a sequence of code units, so delay decoding. - # (Stored in pct_sequence). - continue - except ValueError: - rest = '%' + item - # Encountered non-percent-encoded characters. Flush the current - # pct_sequence. - string += pct_sequence.decode(encoding, errors) + rest - pct_sequence = b'' - if pct_sequence: - # Flush the final pct_sequence - string += pct_sequence.decode(encoding, errors) - return string - - -try: - from urllib.parse import parse_qs as compat_parse_qs -except ImportError: # Python 2 - # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. - # Python 2's version is apparently totally broken - - def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - qs, _coerce_result = qs, unicode - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError("bad query field: %r" % (name_value,)) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = nv[0].replace('+', ' ') - name = compat_urllib_parse_unquote( - name, encoding=encoding, errors=errors) - name = _coerce_result(name) - value = nv[1].replace('+', ' ') - value = compat_urllib_parse_unquote( - value, encoding=encoding, errors=errors) - value = _coerce_result(value) - r.append((name, value)) - return r - - def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): - parsed_result = {} - pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, - encoding=encoding, errors=errors) - for name, value in pairs: - if name in parsed_result: - parsed_result[name].append(value) - else: - parsed_result[name] = [value] - return parsed_result - -try: - compat_str = unicode # Python 2 -except NameError: - compat_str = str - -try: - compat_chr = unichr # Python 2 -except NameError: - compat_chr = chr - -try: - from xml.etree.ElementTree import ParseError as compat_xml_parse_error -except ImportError: # Python 2.6 - from xml.parsers.expat import ExpatError as compat_xml_parse_error - -try: - from shlex import quote as shlex_quote -except ImportError: # Python < 3.3 - def shlex_quote(s): - return "'" + s.replace("'", "'\"'\"'") + "'" - - -def compat_ord(c): - if type(c) is int: return c - else: return ord(c) - - -if sys.version_info >= (3, 0): - compat_getenv = os.getenv - compat_expanduser = os.path.expanduser -else: - # Environment variables should be decoded with filesystem encoding. - # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) - - def compat_getenv(key, default=None): - env = os.getenv(key, default) - if env: - env = env.decode(get_filesystem_encoding()) - return env - - # HACK: The default implementations of os.path.expanduser from cpython do not decode - # environment variables with filesystem encoding. We will work around this by - # providing adjusted implementations. - # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib - # for different platforms with correct environment variables decoding. - - if os.name == 'posix': - def compat_expanduser(path): - """Expand ~ and ~user constructions. If user or $HOME is unknown, - do nothing.""" - if not path.startswith('~'): - return path - i = path.find('/', 1) - if i < 0: - i = len(path) - if i == 1: - if 'HOME' not in os.environ: - import pwd - userhome = pwd.getpwuid(os.getuid()).pw_dir - else: - userhome = compat_getenv('HOME') - else: - import pwd - try: - pwent = pwd.getpwnam(path[1:i]) - except KeyError: - return path - userhome = pwent.pw_dir - userhome = userhome.rstrip('/') - return (userhome + path[i:]) or '/' - elif os.name == 'nt' or os.name == 'ce': - def compat_expanduser(path): - """Expand ~ and ~user constructs. - - If user or $HOME is unknown, do nothing.""" - if path[:1] != '~': - return path - i, n = 1, len(path) - while i < n and path[i] not in '/\\': - i = i + 1 - - if 'HOME' in os.environ: - userhome = compat_getenv('HOME') - elif 'USERPROFILE' in os.environ: - userhome = compat_getenv('USERPROFILE') - elif not 'HOMEPATH' in os.environ: - return path - else: - try: - drive = compat_getenv('HOMEDRIVE') - except KeyError: - drive = '' - userhome = os.path.join(drive, compat_getenv('HOMEPATH')) - - if i != 1: #~user - userhome = os.path.join(os.path.dirname(userhome), path[1:i]) - - return userhome + path[i:] - else: - compat_expanduser = os.path.expanduser +from .compat import ( + compat_chr, + compat_getenv, + compat_html_entities, + compat_parse_qs, + compat_str, + compat_urllib_error, + compat_urllib_parse, + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urlparse, + shlex_quote, +) # This is not clearly defined otherwise @@ -290,6 +56,7 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } + def preferredencoding(): """Get preferred encoding. @@ -298,28 +65,33 @@ def preferredencoding(): """ try: pref = locale.getpreferredencoding() - u'TEST'.encode(pref) + 'TEST'.encode(pref) except: pref = 'UTF-8' return pref -if sys.version_info < (3,0): - def compat_print(s): - print(s.encode(preferredencoding(), 'xmlcharrefreplace')) -else: - def compat_print(s): - assert type(s) == type(u'') - print(s) - def write_json_file(obj, fn): - """ Encode obj as JSON and write it to fn, atomically """ + """ Encode obj as JSON and write it to fn, atomically if possible """ + + fn = encodeFilename(fn) + if sys.version_info < (3, 0) and sys.platform != 'win32': + encoding = get_filesystem_encoding() + # os.path.basename returns a bytes object, but NamedTemporaryFile + # will fail if the filename contains non ascii characters unless we + # use a unicode object + path_basename = lambda f: os.path.basename(fn).decode(encoding) + # the same for os.path.dirname + path_dirname = lambda f: os.path.dirname(fn).decode(encoding) + else: + path_basename = os.path.basename + path_dirname = os.path.dirname args = { 'suffix': '.tmp', - 'prefix': os.path.basename(fn) + '.', - 'dir': os.path.dirname(fn), + 'prefix': path_basename(fn) + '.', + 'dir': path_dirname(fn), 'delete': False, } @@ -338,6 +110,13 @@ def write_json_file(obj, fn): try: with tf: json.dump(obj, tf) + if sys.platform == 'win32': + # Need to remove existing file on Windows, else os.rename raises + # WindowsError or FileExistsError. + try: + os.unlink(fn) + except OSError: + pass os.rename(tf.name, fn) except: try: @@ -352,7 +131,7 @@ if sys.version_info >= (2, 7): """ Find the xpath xpath[@key=val] """ assert re.match(r'^[a-zA-Z-]+$', key) assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val) - expr = xpath + u"[@%s='%s']" % (key, val) + expr = xpath + "[@%s='%s']" % (key, val) return node.find(expr) else: def find_xpath_attr(node, xpath, key, val): @@ -368,6 +147,8 @@ else: # On python2.6 the xml.etree.ElementTree.Element methods don't support # the namespace parameter + + def xpath_with_ns(path, ns_map): components = [c.split(':') for c in path.split('/')] replaced = [] @@ -385,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False): xpath = xpath.encode('ascii') n = node.find(xpath) - if n is None: + if n is None or n.text is None: if fatal: name = xpath if name is None else name raise ExtractorError('Could not find XML element %s' % name) @@ -394,131 +175,40 @@ def xpath_text(node, xpath, name=None, fatal=False): return n.text -compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix -class BaseHTMLParser(compat_html_parser.HTMLParser): - def __init(self): - compat_html_parser.HTMLParser.__init__(self) - self.html = None - - def loads(self, html): - self.html = html - self.feed(html) - self.close() - -class AttrParser(BaseHTMLParser): - """Modified HTMLParser that isolates a tag with the specified attribute""" - def __init__(self, attribute, value): - self.attribute = attribute - self.value = value - self.result = None - self.started = False - self.depth = {} - self.watch_startpos = False - self.error_count = 0 - BaseHTMLParser.__init__(self) - - def error(self, message): - if self.error_count > 10 or self.started: - raise compat_html_parser.HTMLParseError(message, self.getpos()) - self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line - self.error_count += 1 - self.goahead(1) - - def handle_starttag(self, tag, attrs): - attrs = dict(attrs) - if self.started: - self.find_startpos(None) - if self.attribute in attrs and attrs[self.attribute] == self.value: - self.result = [tag] - self.started = True - self.watch_startpos = True - if self.started: - if not tag in self.depth: self.depth[tag] = 0 - self.depth[tag] += 1 - - def handle_endtag(self, tag): - if self.started: - if tag in self.depth: self.depth[tag] -= 1 - if self.depth[self.result[0]] == 0: - self.started = False - self.result.append(self.getpos()) - - def find_startpos(self, x): - """Needed to put the start position of the result (self.result[1]) - after the opening tag with the requested id""" - if self.watch_startpos: - self.watch_startpos = False - self.result.append(self.getpos()) - handle_entityref = handle_charref = handle_data = handle_comment = \ - handle_decl = handle_pi = unknown_decl = find_startpos - - def get_result(self): - if self.result is None: - return None - if len(self.result) != 3: - return None - lines = self.html.split('\n') - lines = lines[self.result[1][0]-1:self.result[2][0]] - lines[0] = lines[0][self.result[1][1]:] - if len(lines) == 1: - lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]] - lines[-1] = lines[-1][:self.result[2][1]] - return '\n'.join(lines).strip() -# Hack for https://github.com/rg3/youtube-dl/issues/662 -if sys.version_info < (2, 7, 3): - AttrParser.parse_endtag = (lambda self, i: - i + len("") - if self.rawdata[i:].startswith("") - else compat_html_parser.HTMLParser.parse_endtag(self, i)) - def get_element_by_id(id, html): """Return the content of the tag with the specified ID in the passed HTML document""" return get_element_by_attribute("id", id, html) + def get_element_by_attribute(attribute, value, html): """Return the content of the tag with the specified attribute in the passed HTML document""" - parser = AttrParser(attribute, value) - try: - parser.loads(html) - except compat_html_parser.HTMLParseError: - pass - return parser.get_result() -class MetaParser(BaseHTMLParser): - """ - Modified HTMLParser that isolates a meta tag with the specified name - attribute. - """ - def __init__(self, name): - BaseHTMLParser.__init__(self) - self.name = name - self.content = None - self.result = None - - def handle_starttag(self, tag, attrs): - if tag != 'meta': - return - attrs = dict(attrs) - if attrs.get('name') == self.name: - self.result = attrs.get('content') + m = re.search(r'''(?xs) + <([a-zA-Z0-9:._-]+) + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? + \s+%s=['"]?%s['"]? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? + \s*> + (?P.*?) + + ''' % (re.escape(attribute), re.escape(value)), html) - def get_result(self): - return self.result + if not m: + return None + res = m.group('content') -def get_meta_content(name, html): - """ - Return the content attribute from the meta tag with the given name attribute. - """ - parser = MetaParser(name) - try: - parser.loads(html) - except compat_html_parser.HTMLParseError: - pass - return parser.get_result() + if res.startswith('"') or res.startswith("'"): + res = res[1:-1] + + return unescapeHTML(res) def clean_html(html): """Clean an HTML snippet into a readable string""" + + if html is None: # Convenience for sanitizing descriptions etc. + return html + # Newline vs
html = html.replace('\n', ' ') html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html) @@ -541,7 +231,7 @@ def sanitize_open(filename, open_mode): It returns the tuple (stream, definitive_file_name). """ try: - if filename == u'-': + if filename == '-': if sys.platform == 'win32': import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) @@ -554,9 +244,9 @@ def sanitize_open(filename, open_mode): # In case of error, try to remove win32 forbidden chars alt_filename = os.path.join( - re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part) - for path_part in os.path.split(filename) - ) + re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) + for path_part in os.path.split(filename) + ) if alt_filename == filename: raise else: @@ -573,6 +263,7 @@ def timeconvert(timestr): timestamp = email.utils.mktime_tz(timetuple) return timestamp + def sanitize_filename(s, restricted=False, is_id=False): """Sanitizes a string so it could be used as part of a filename. If restricted is set, use a stricter subset of allowed characters. @@ -593,7 +284,7 @@ def sanitize_filename(s, restricted=False, is_id=False): return '_' return char - result = u''.join(map(replace_insane, s)) + result = ''.join(map(replace_insane, s)) if not is_id: while '__' in result: result = result.replace('__', '_') @@ -605,6 +296,7 @@ def sanitize_filename(s, restricted=False, is_id=False): result = '_' return result + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] @@ -623,15 +315,15 @@ def _htmlentity_transform(entity): mobj = re.match(r'#(x?[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) - if numstr.startswith(u'x'): + if numstr.startswith('x'): base = 16 - numstr = u'0%s' % numstr + numstr = '0%s' % numstr else: base = 10 return compat_chr(int(numstr, base)) # Unknown entity in name, return its literal representation - return (u'&%s;' % entity) + return ('&%s;' % entity) def unescapeHTML(s): @@ -655,7 +347,7 @@ def encodeFilename(s, for_subprocess=False): return s if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - # Pass u'' directly to use Unicode APIs on Windows 2000 and up + # Pass '' directly to use Unicode APIs on Windows 2000 and up # (Detecting Windows NT 4 is tricky because 'major >= 4' would # match Windows 9x series as well. Besides, NT 4 is obsolete.) if not for_subprocess: @@ -675,7 +367,7 @@ def encodeArgument(s): if not isinstance(s, compat_str): # Legacy code that uses byte strings # Uncomment the following line after fixing all post processors - #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) + # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) s = s.decode('ascii') return encodeFilename(s, True) @@ -689,6 +381,7 @@ def decodeOption(optval): assert isinstance(optval, compat_str) return optval + def formatSeconds(secs): if secs > 3600: return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) @@ -699,6 +392,17 @@ def formatSeconds(secs): def make_HTTPS_handler(opts_no_check_certificate, **kwargs): + if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 + context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + if opts_no_check_certificate: + context.verify_mode = ssl.CERT_NONE + try: + return compat_urllib_request.HTTPSHandler(context=context, **kwargs) + except TypeError: + # Python 2.7.8 + # (create_default_context present but HTTPSHandler has no context=) + pass + if sys.version_info < (3, 2): import httplib @@ -720,26 +424,18 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs): def https_open(self, req): return self.do_open(HTTPSConnectionV3, req) return HTTPSHandlerV3(**kwargs) - elif hasattr(ssl, 'create_default_context'): # Python >= 3.4 - context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) - context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3 - if opts_no_check_certificate: - context.verify_mode = ssl.CERT_NONE - return compat_urllib_request.HTTPSHandler(context=context, **kwargs) else: # Python < 3.4 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.verify_mode = (ssl.CERT_NONE if opts_no_check_certificate else ssl.CERT_REQUIRED) context.set_default_verify_paths() - try: - context.load_default_certs() - except AttributeError: - pass # Python < 3.4 return compat_urllib_request.HTTPSHandler(context=context, **kwargs) + class ExtractorError(Exception): """Error during info extraction.""" + def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): """ tb, if given, is the original traceback (so that it can be printed out). If expected is set, this is a normal error message and most likely not a bug in youtube-dl. @@ -750,9 +446,15 @@ class ExtractorError(Exception): if video_id is not None: msg = video_id + ': ' + msg if cause: - msg += u' (caused by %r)' % cause + msg += ' (caused by %r)' % cause if not expected: - msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.' + if ytdl_is_updateable(): + update_cmd = 'type youtube-dl -U to update' + else: + update_cmd = 'see https://yt-dl.org/update on how to update' + msg += '; please report this issue on https://yt-dl.org/bug .' + msg += ' Make sure you are using the latest version; %s.' % update_cmd + msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' super(ExtractorError, self).__init__(msg) self.traceback = tb @@ -763,7 +465,14 @@ class ExtractorError(Exception): def format_traceback(self): if self.traceback is None: return None - return u''.join(traceback.format_tb(self.traceback)) + return ''.join(traceback.format_tb(self.traceback)) + + +class UnsupportedError(ExtractorError): + def __init__(self, url): + super(UnsupportedError, self).__init__( + 'Unsupported URL: %s' % url, expected=True) + self.url = url class RegexNotFoundError(ExtractorError): @@ -778,6 +487,7 @@ class DownloadError(Exception): configured to continue on errors. They will contain the appropriate error message. """ + def __init__(self, msg, exc_info=None): """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ super(DownloadError, self).__init__(msg) @@ -799,9 +509,11 @@ class PostProcessingError(Exception): This exception may be raised by PostProcessor's .run() method to indicate an error in the postprocessing task. """ + def __init__(self, msg): self.msg = msg + class MaxDownloadsReached(Exception): """ --max-downloads limit has been reached. """ pass @@ -831,6 +543,7 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected + class YoutubeDLHandler(compat_urllib_request.HTTPHandler): """Handler for HTTP requests and responses. @@ -943,17 +656,19 @@ def parse_iso8601(date_str, delimiter='T'): return calendar.timegm(dt.timetuple()) -def unified_strdate(date_str): +def unified_strdate(date_str, day_first=True): """Return a string with the date in the format YYYYMMDD""" if date_str is None: return None - upload_date = None - #Replace commas + # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) + # Remove AM/PM + timezone + date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) + format_expressions = [ '%d %B %Y', '%d %b %Y', @@ -968,7 +683,6 @@ def unified_strdate(date_str): '%d/%m/%Y', '%d/%m/%y', '%Y/%m/%d %H:%M:%S', - '%d/%m/%Y %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', '%d.%m.%Y %H:%M', @@ -980,6 +694,14 @@ def unified_strdate(date_str): '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M', ] + if day_first: + format_expressions.extend([ + '%d/%m/%Y %H:%M:%S', + ]) + else: + format_expressions.extend([ + '%m/%d/%Y %H:%M:%S', + ]) for expression in format_expressions: try: upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -991,25 +713,30 @@ def unified_strdate(date_str): upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') return upload_date -def determine_ext(url, default_ext=u'unknown_video'): + +def determine_ext(url, default_ext='unknown_video'): if url is None: return default_ext - guess = url.partition(u'?')[0].rpartition(u'.')[2] + guess = url.partition('?')[0].rpartition('.')[2] if re.match(r'^[A-Za-z0-9]+$', guess): return guess else: return default_ext + def subtitles_filename(filename, sub_lang, sub_format): - return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format + return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format + def date_from_str(date_str): """ Return a datetime object from a string in the format YYYYMMDD or (now|today)[+-][0-9](day|week|month|year)(s)?""" today = datetime.date.today() - if date_str == 'now'or date_str == 'today': + if date_str in ('now', 'today'): return today + if date_str == 'yesterday': + return today - datetime.timedelta(days=1) match = re.match('(now|today)(?P[+-])(?P