X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=8ccf254895133f38d1c8e095718a0d53d53bd061;hb=43ebf77df3bbd93dbbd0336b0243d8d50895ab72;hp=0d30075aa1dd982a6b0a95e717575ce217453533;hpb=9d30c2132acf2d12bfa8e559987c341c76d9cd24;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0d30075aa..8ccf25489 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -46,6 +46,7 @@ from .compat import ( compat_html_entities, compat_html_entities_html5, compat_http_client, + compat_integer_types, compat_kwargs, compat_os_name, compat_parse_qs, @@ -2794,6 +2795,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): https_response = http_response +class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): + if sys.version_info[0] < 3: + def redirect_request(self, req, fp, code, msg, headers, newurl): + # On python 2 urlh.geturl() may sometimes return redirect URL + # as byte string instead of unicode. This workaround allows + # to force it always return unicode. + return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl)) + + def extract_timezone(date_str): m = re.search( r'^.{8,}?(?PZ$| ?(?P\+|-)(?P[0-9]{2}):?(?P[0-9]{2})$)', @@ -3519,10 +3529,11 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if int_str is None: - return None - int_str = re.sub(r'[,\.\+]', '', int_str) - return int(int_str) + if isinstance(int_str, compat_integer_types): + return int_str + elif isinstance(int_str, compat_str): + int_str = re.sub(r'[,\.\+]', '', int_str) + return int_or_none(int_str) def float_or_none(v, scale=1, invscale=1, default=None): @@ -5383,6 +5394,19 @@ def decode_packed_codes(code): obfucasted_code) +def caesar(s, alphabet, shift): + if shift == 0: + return s + l = len(alphabet) + return ''.join( + alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c + for c in s) + + +def rot47(s): + return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47) + + def parse_m3u8_attributes(attrib): info = {} for (key, val) in re.findall(r'(?P[A-Z0-9-]+)=(?P"[^"]+"|[^",]+)(?:,|$)', attrib):