X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=03566d22357a086bb1019bb1187db9b5425b83f0;hb=024c53694d674f046fdad593e03bfaeec17cc559;hp=4ade0554e33597ce8aa965e147d2d73cce6a9fab;hpb=6789defea9b1fc7ff631e9da8a281504167ced10;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4ade0554e..03566d223 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -17,6 +17,7 @@ import io import json import locale import math +import operator import os import pipes import platform @@ -701,7 +702,7 @@ def unified_strdate(date_str, day_first=True): # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) # Remove AM/PM + timezone - date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) + date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) format_expressions = [ '%d %B %Y', @@ -1275,7 +1276,10 @@ def parse_duration(s): (?P[0-9.]+)\s*(?:hours?)| (?: - (?:(?P[0-9]+)\s*(?:[:h]|hours?)\s*)? + (?: + (?:(?P[0-9]+)\s*(?:[:d]|days?)\s*)? + (?P[0-9]+)\s*(?:[:h]|hours?)\s* + )? (?P[0-9]+)\s*(?:[:m]|mins?|minutes?)\s* )? (?P[0-9]+)(?P\.[0-9]+)?\s*(?:s|secs?|seconds?)? @@ -1293,6 +1297,8 @@ def parse_duration(s): res += int(m.group('mins')) * 60 if m.group('hours'): res += int(m.group('hours')) * 60 * 60 + if m.group('days'): + res += int(m.group('days')) * 24 * 60 * 60 if m.group('ms'): res += float(m.group('ms')) return res @@ -1543,7 +1549,7 @@ def js_to_json(code): res = re.sub(r'''(?x) "(?:[^"\\]*(?:\\\\|\\")?)*"| '(?:[^'\\]*(?:\\\\|\\')?)*'| - [a-zA-Z_][a-zA-Z_0-9]* + [a-zA-Z_][.a-zA-Z_0-9]* ''', fix_kv, code) res = re.sub(r',(\s*\])', lambda m: m.group(1), res) return res @@ -1673,3 +1679,79 @@ def render_table(header_row, data): max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' return '\n'.join(format_str % tuple(row) for row in table) + + +def _match_one(filter_part, dct): + COMPARISON_OPERATORS = { + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, + '=': operator.eq, + '!=': operator.ne, + } + operator_rex = re.compile(r'''(?x)\s* + (?P[a-z_]+) + \s*(?P%s)(?P\s*\?)?\s* + (?: + (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| + (?P(?![0-9.])[a-z0-9A-Z]*) + ) + \s*$ + ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) + m = operator_rex.search(filter_part) + if m: + op = COMPARISON_OPERATORS[m.group('op')] + if m.group('strval') is not None: + if m.group('op') not in ('=', '!='): + raise ValueError( + 'Operator %s does not support string values!' % m.group('op')) + comparison_value = m.group('strval') + else: + try: + comparison_value = int(m.group('intval')) + except ValueError: + comparison_value = parse_filesize(m.group('intval')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('intval') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid integer value %r in filter part %r' % ( + m.group('intval'), filter_part)) + actual_value = dct.get(m.group('key')) + if actual_value is None: + return m.group('none_inclusive') + return op(actual_value, comparison_value) + + UNARY_OPERATORS = { + '': lambda v: v is not None, + '!': lambda v: v is None, + } + operator_rex = re.compile(r'''(?x)\s* + (?P%s)\s*(?P[a-z_]+) + \s*$ + ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) + m = operator_rex.search(filter_part) + if m: + op = UNARY_OPERATORS[m.group('op')] + actual_value = dct.get(m.group('key')) + return op(actual_value) + + raise ValueError('Invalid filter part %r' % filter_part) + + +def match_str(filter_str, dct): + """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ + + return all( + _match_one(filter_part, dct) for filter_part in filter_str.split('&')) + + +def match_filter_func(filter_str): + def _match_func(info_dict): + if match_str(filter_str, info_dict): + return None + else: + video_title = info_dict.get('title', info_dict.get('id', 'video')) + return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) + return _match_func