X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=f3a2a32b43faf6799358994409437140ff950fff;hb=795f28f871074aca2a74dfe67e1e75252b525c4c;hp=5c0ea2e43931c7cacf961a808f8b5b62b9d25889;hpb=2f2ffea9cad7d30165a0171bf6e662bef2182ab4;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5c0ea2e43..f3a2a32b4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,33 +1,39 @@ # coding: utf-8 import collections -import itertools +import errno import io +import itertools import json -import netrc +import os.path import re import socket import string import struct import traceback +import xml.etree.ElementTree import zlib from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor from ..utils import ( + compat_chr, compat_http_client, compat_parse_qs, compat_urllib_error, compat_urllib_parse, compat_urllib_request, + compat_urlparse, compat_str, clean_html, + get_cachedir, get_element_by_id, ExtractorError, unescapeHTML, unified_strdate, orderedSet, + write_json_file, ) class YoutubeBaseInfoExtractor(InfoExtractor): @@ -68,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) return False - galx = None - dsh = None - match = re.search(re.compile(r'[a-zA-Z0-9]+)\.(?P[a-z]+)$', + def _extract_signature_function(self, video_id, player_url, slen): + id_m = re.match(r'.*-(?P[a-zA-Z0-9_-]+)\.(?P[a-z]+)$', player_url) player_type = id_m.group('ext') player_id = id_m.group('id') - # TODO read from filesystem cache + # Read from filesystem cache + func_id = '%s_%s_%d' % (player_type, player_id, slen) + assert os.path.basename(func_id) == func_id + cache_dir = get_cachedir(self._downloader.params) + + cache_enabled = cache_dir is not None + if cache_enabled: + cache_fn = os.path.join(os.path.expanduser(cache_dir), + u'youtube-sigfuncs', + func_id + '.json') + try: + with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + cache_spec = json.load(cachef) + return lambda s: u''.join(s[i] for i in cache_spec) + except IOError: + pass # No cache available if player_type == 'js': code = self._download_webpage( @@ -436,7 +437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): note=u'Downloading %s player %s' % (player_type, player_id), errnote=u'Download of %s failed' % player_url) res = self._parse_sig_js(code) - elif player_tpye == 'swf': + elif player_type == 'swf': urlh = self._request_webpage( player_url, video_id, note=u'Downloading %s player %s' % (player_type, player_id), @@ -446,10 +447,60 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: assert False, 'Invalid player type %r' % player_type - # TODO write cache + if cache_enabled: + try: + test_string = u''.join(map(compat_chr, range(slen))) + cache_res = res(test_string) + cache_spec = [ord(c) for c in cache_res] + try: + os.makedirs(os.path.dirname(cache_fn)) + except OSError as ose: + if ose.errno != errno.EEXIST: + raise + write_json_file(cache_spec, cache_fn) + except Exception: + tb = traceback.format_exc() + self._downloader.report_warning( + u'Writing cache to %r failed: %s' % (cache_fn, tb)) return res + def _print_sig_code(self, func, slen): + def gen_sig_code(idxs): + def _genslice(start, end, step): + starts = u'' if start == 0 else str(start) + ends = (u':%d' % (end+step)) if end + step >= 0 else u':' + steps = u'' if step == 1 else (u':%d' % step) + return u's[%s%s%s]' % (starts, ends, steps) + + step = None + start = '(Never used)' # Quelch pyflakes warnings - start will be + # set as soon as step is set + for i, prev in zip(idxs[1:], idxs[:-1]): + if step is not None: + if i - prev == step: + continue + yield _genslice(start, prev, step) + step = None + continue + if i - prev in [-1, 1]: + step = i - prev + start = prev + continue + else: + yield u's[%d]' % prev + if step is None: + yield u's[%d]' % i + else: + yield _genslice(start, i, step) + + test_string = u''.join(map(compat_chr, range(slen))) + cache_res = func(test_string) + cache_spec = [ord(c) for c in cache_res] + expr_code = u' + '.join(gen_sig_code(cache_spec)) + code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code) + self.to_screen(u'Extracted signature function:\n' + code) + def _parse_sig_js(self, jscode): funcname = self._search_regex( r'signature=([a-zA-Z]+)', jscode, @@ -462,7 +513,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def interpret_statement(stmt, local_vars, allow_recursion=20): if allow_recursion < 0: - raise ExctractorError(u'Recursion limit reached') + raise ExtractorError(u'Recursion limit reached') if stmt.startswith(u'var '): stmt = stmt[len(u'var '):] @@ -620,7 +671,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): v = - ((v ^ 0xffffffff) + 1) return v - def string(reader=None): + def read_string(reader=None): if reader is None: reader = code_reader slen = u30(reader) @@ -641,31 +692,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return res # minor_version + major_version - _ = read_bytes(2 + 2) + read_bytes(2 + 2) # Constant pool int_count = u30() for _c in range(1, int_count): - _ = s32() + s32() uint_count = u30() for _c in range(1, uint_count): - _ = u32() + u32() double_count = u30() - _ = read_bytes((double_count-1) * 8) + read_bytes((double_count-1) * 8) string_count = u30() constant_strings = [u''] for _c in range(1, string_count): - s = string() + s = read_string() constant_strings.append(s) namespace_count = u30() for _c in range(1, namespace_count): - _ = read_bytes(1) # kind - _ = u30() # name + read_bytes(1) # kind + u30() # name ns_set_count = u30() for _c in range(1, ns_set_count): count = u30() for _c2 in range(count): - _ = u30() + u30() multiname_count = u30() MULTINAME_SIZES = { 0x07: 2, # QName @@ -684,13 +735,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): kind = u30() assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind if kind == 0x07: - namespace_idx = u30() + u30() # namespace_idx name_idx = u30() multinames.append(constant_strings[name_idx]) else: multinames.append('[MULTINAME kind: %d]' % kind) for _c2 in range(MULTINAME_SIZES[kind]): - _ = u30() + u30() # Methods method_count = u30() @@ -700,32 +751,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): method_infos = [] for method_id in range(method_count): param_count = u30() - _ = u30() # return type + u30() # return type for _ in range(param_count): - _ = u30() # param type - _ = u30() # name index (always 0 for youtube) + u30() # param type + u30() # name index (always 0 for youtube) flags = read_byte() if flags & 0x08 != 0: # Options present option_count = u30() for c in range(option_count): - _ = u30() # val - _ = read_bytes(1) # kind + u30() # val + read_bytes(1) # kind if flags & 0x80 != 0: # Param names present for _ in range(param_count): - _ = u30() # param name + u30() # param name mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) method_infos.append(mi) # Metadata metadata_count = u30() for _c in range(metadata_count): - _ = u30() # name + u30() # name item_count = u30() for _c2 in range(item_count): - _ = u30() # key - _ = u30() # value + u30() # key + u30() # value def parse_traits_info(): trait_name_idx = u30() @@ -734,20 +785,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): attrs = kind_full >> 4 methods = {} if kind in [0x00, 0x06]: # Slot or Const - _ = u30() # Slot id - type_name_idx = u30() + u30() # Slot id + u30() # type_name_idx vindex = u30() if vindex != 0: - _ = read_byte() # vkind + read_byte() # vkind elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter - _ = u30() # disp_id + u30() # disp_id method_idx = u30() methods[multinames[trait_name_idx]] = method_idx elif kind == 0x04: # Class - _ = u30() # slot_id - _ = u30() # classi + u30() # slot_id + u30() # classi elif kind == 0x05: # Function - _ = u30() # slot_id + u30() # slot_id function_idx = u30() methods[function_idx] = multinames[trait_name_idx] else: @@ -756,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if attrs & 0x4 != 0: # Metadata present metadata_count = u30() for _c3 in range(metadata_count): - _ = u30() + u30() # metadata index return methods @@ -770,17 +821,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if name_idx == searched_idx: # We found the class we're looking for! searched_class_id = class_id - _ = u30() # super_name idx + u30() # super_name idx flags = read_byte() if flags & 0x08 != 0: # Protected namespace is present - protected_ns_idx = u30() + u30() # protected_ns_idx intrf_count = u30() for _c2 in range(intrf_count): - _ = u30() - _ = u30() # iinit + u30() + u30() # iinit trait_count = u30() for _c2 in range(trait_count): - _ = parse_traits_info() + parse_traits_info() if searched_class_id is None: raise ExtractorError(u'Target class %r not found' % @@ -789,7 +840,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): method_names = {} method_idxs = {} for class_id in range(class_count): - _ = u30() # cinit + u30() # cinit trait_count = u30() for _c2 in range(trait_count): trait_methods = parse_traits_info() @@ -802,10 +853,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Scripts script_count = u30() for _c in range(script_count): - _ = u30() # init + u30() # init trait_count = u30() for _c2 in range(trait_count): - _ = parse_traits_info() + parse_traits_info() # Method bodies method_body_count = u30() @@ -813,10 +864,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): methods = {} for _c in range(method_body_count): method_idx = u30() - max_stack = u30() + u30() # max_stack local_count = u30() - init_scope_depth = u30() - max_scope_depth = u30() + u30() # init_scope_depth + u30() # max_scope_depth code_length = u30() code = read_bytes(code_length) if method_idx in method_idxs: @@ -824,14 +875,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): methods[method_idxs[method_idx]] = m exception_count = u30() for _c2 in range(exception_count): - _ = u30() # from - _ = u30() # to - _ = u30() # target - _ = u30() # exc_type - _ = u30() # var_name + u30() # from + u30() # to + u30() # target + u30() # exc_type + u30() # var_name trait_count = u30() for _c2 in range(trait_count): - _ = parse_traits_info() + parse_traits_info() assert p + code_reader.tell() == len(code_tag) assert len(methods) == len(method_idxs) @@ -946,7 +997,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): assert isinstance(obj, list) stack.append(obj[idx]) elif opcode == 128: # coerce - _ = u30(coder) + u30(coder) elif opcode == 133: # coerce_s assert isinstance(stack[-1], (type(None), compat_str)) elif opcode == 164: # modulo @@ -981,19 +1032,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if player_url is not None: try: - if player_url not in self._player_cache: + player_id = (player_url, len(s)) + if player_id not in self._player_cache: func = self._extract_signature_function( - video_id, player_url + video_id, player_url, len(s) ) - self._player_cache[player_url] = func - return self._player_cache[player_url](s) - except Exception as e: + self._player_cache[player_id] = func + func = self._player_cache[player_id] + if self._downloader.params.get('youtube_print_sig_code'): + self._print_sig_code(func, len(s)) + return func(s) + except Exception: tb = traceback.format_exc() self._downloader.report_warning( u'Automatic signature extraction failed: ' + tb) - self._downloader.report_warning( - u'Warning: Falling back to static signature algorithm') + self._downloader.report_warning( + u'Warning: Falling back to static signature algorithm') + return self._static_decrypt_signature( s, video_id, player_url, age_gate) @@ -1004,8 +1060,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if len(s) == 86: return s[2:63] + s[82] + s[64:82] + s[63] - if len(s) == 92: + if len(s) == 93: + return s[86:29:-1] + s[88] + s[28:5:-1] + elif len(s) == 92: return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] + elif len(s) == 91: + return s[84:27:-1] + s[86] + s[26:5:-1] elif len(s) == 90: return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] elif len(s) == 89: @@ -1015,15 +1075,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 87: return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] elif len(s) == 86: - return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53] + return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] elif len(s) == 85: return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] elif len(s) == 84: - return s[81:36:-1] + s[0] + s[35:2:-1] + return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] elif len(s) == 83: - return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] + return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] elif len(s) == 82: - return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54] + return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] elif len(s) == 81: return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] elif len(s) == 80: @@ -1034,15 +1094,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _decrypt_signature_age_gate(self, s): - # The videos with age protection use another player, so the algorithms - # can be different. - if len(s) == 86: - return s[2:63] + s[82] + s[64:82] + s[63] - else: - # Fallback to the other algortihms - return self._decrypt_signature(s) - def _get_available_subtitles(self, video_id): try: sub_list = self._download_webpage( @@ -1060,6 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'lang': lang, 'v': video_id, 'fmt': self._downloader.params.get('subtitlesformat'), + 'name': l[0], }) url = u'http://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url @@ -1093,7 +1145,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): list_page = self._download_webpage(list_url, video_id) caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8')) original_lang_node = caption_list.find('track') - if original_lang_node.attrib.get('kind') != 'asr' : + if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : self._downloader.report_warning(u'Video doesn\'t have automatic captions') return {} original_lang = original_lang_node.attrib['lang_code'] @@ -1193,10 +1245,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url_map[itag] = format_url return url_map - def _real_extract(self, url): - if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url): - self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).') + def _extract_annotations(self, video_id): + url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id + return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') + def _real_extract(self, url): # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: @@ -1279,9 +1332,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self._downloader.report_warning(u'unable to extract uploader nickname') # title - if 'title' not in video_info: - raise ExtractorError(u'Unable to extract video title') - video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) + if 'title' in video_info: + video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) + else: + self._downloader.report_warning(u'Unable to extract video title') + video_title = u'_' # thumbnail image # We try first to get a high quality image: @@ -1291,7 +1346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_thumbnail = m_thumb.group(1) elif 'thumbnail_url' not in video_info: self._downloader.report_warning(u'unable to extract video thumbnail') - video_thumbnail = '' + video_thumbnail = None else: # don't panic if we can't find it video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) @@ -1326,6 +1381,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) + # annotations + video_annotations = None + if self._downloader.params.get('writeannotations', False): + video_annotations = self._extract_annotations(video_id) + # Decide which formats to download try: @@ -1336,32 +1396,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): args = info['args'] # Easy way to know if the 's' value is in url_encoded_fmt_stream_map # this signatures are encrypted - m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map']) + if 'url_encoded_fmt_stream_map' not in args: + raise ValueError(u'No stream_map present') # caught below + re_signature = re.compile(r'[&,]s=') + m_s = re_signature.search(args['url_encoded_fmt_stream_map']) if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] - m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u'')) + m_s = re_signature.search(args.get('adaptive_fmts', u'')) if m_s is not None: - if 'url_encoded_fmt_stream_map' in video_info: - video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts'] + if 'adaptive_fmts' in video_info: + video_info['adaptive_fmts'][0] += ',' + args['adaptive_fmts'] else: - video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']] - elif 'adaptive_fmts' in video_info: - if 'url_encoded_fmt_stream_map' in video_info: - video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0] - else: - video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts'] + video_info['adaptive_fmts'] = [args['adaptive_fmts']] except ValueError: pass if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] - elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: - if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]: + elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: + encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] + if 'rtmpe%3Dyes' in encoded_url_map: raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) url_map = {} - for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','): + for url_data_str in encoded_url_map.split(','): url_data = compat_parse_qs(url_data_str) if 'itag' in url_data and 'url' in url_data: url = url_data['url'][0] @@ -1371,10 +1430,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): encrypted_sig = url_data['s'][0] if self._downloader.params.get('verbose'): if age_gate: - player_version = self._search_regex( - r'-(.+)\.swf$', - player_url if player_url else None, - 'flash player', fatal=False) + if player_url is None: + player_version = 'unknown' + else: + player_version = self._search_regex( + r'-(.+)\.swf$', player_url, + u'flash player', fatal=False) player_desc = 'flash player %s' % player_version else: player_version = self._search_regex( @@ -1409,16 +1470,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return else: - raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info') + raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') results = [] - for format_param, video_real_url in video_url_list: + for itag, video_real_url in video_url_list: # Extension - video_extension = self._video_extensions.get(format_param, 'flv') + video_extension = self._video_extensions.get(itag, 'flv') - video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, - self._video_dimensions.get(format_param, '???'), - ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '') + video_format = '{0} - {1}{2}'.format(itag if itag else video_extension, + self._video_dimensions.get(itag, '???'), + ' ('+self._special_itags[itag]+')' if itag in self._special_itags else '') results.append({ 'id': video_id, @@ -1429,11 +1490,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'title': video_title, 'ext': video_extension, 'format': video_format, + 'format_id': itag, 'thumbnail': video_thumbnail, 'description': video_description, 'player_url': player_url, 'subtitles': video_subtitles, - 'duration': video_duration + 'duration': video_duration, + 'age_limit': 18 if age_gate else 0, + 'annotations': video_annotations }) return results @@ -1467,9 +1531,19 @@ class YoutubePlaylistIE(InfoExtractor): mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) + playlist_id = mobj.group(1) or mobj.group(2) + + # Check if it's a video-specific URL + query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + if 'v' in query_dict: + video_id = query_dict['v'][0] + if self._downloader.params.get('noplaylist'): + self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) + return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') + else: + self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) # Download playlist videos from API - playlist_id = mobj.group(1) or mobj.group(2) videos = [] for page_num in itertools.count(1): @@ -1564,7 +1638,7 @@ class YoutubeChannelIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' @@ -1757,3 +1831,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos') playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id') return self.url_result(playlist_id, 'YoutubePlaylist') + + +class YoutubeTruncatedURLIE(InfoExtractor): + IE_NAME = 'youtube:truncated_url' + IE_DESC = False # Do not list + _VALID_URL = r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$' + + def _real_extract(self, url): + raise ExtractorError( + u'Did you forget to quote the URL? Remember that & is a meta ' + u'character in most shells, so you want to put the URL in quotes, ' + u'like youtube-dl ' + u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\'' + u' (or simply youtube-dl BaW_jenozKc ).', + expected=True)