15 from .common import InfoExtractor, SearchInfoExtractor
16 from .subtitles import SubtitlesInfoExtractor
22 compat_urllib_request,
33 class YoutubeBaseInfoExtractor(InfoExtractor):
34 """Provide base functions for Youtube extractors"""
35 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
36 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
37 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
38 _NETRC_MACHINE = 'youtube'
39 # If True it will raise an error if no login info is provided
40 _LOGIN_REQUIRED = False
42 def report_lang(self):
43 """Report attempt to set language."""
44 self.to_screen(u'Setting language')
46 def _set_language(self):
47 request = compat_urllib_request.Request(self._LANG_URL)
50 compat_urllib_request.urlopen(request).read()
51 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
52 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
57 (username, password) = self._get_login_info()
58 # No authentication to be performed
60 if self._LOGIN_REQUIRED:
61 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
64 request = compat_urllib_request.Request(self._LOGIN_URL)
66 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
67 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
68 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
73 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
76 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
82 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
86 u'PersistentCookie': u'yes',
88 u'bgresponse': u'js_disabled',
89 u'checkConnection': u'',
90 u'checkedDomains': u'youtube',
96 u'signIn': u'Sign in',
98 u'service': u'youtube',
102 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
104 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
105 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
106 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
109 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
110 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
111 self._downloader.report_warning(u'unable to log in: bad username or password')
113 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
114 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
118 def _confirm_age(self):
121 'action_confirm': 'Confirm',
123 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
125 self.report_age_confirmation()
126 compat_urllib_request.urlopen(request).read().decode('utf-8')
127 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
128 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
131 def _real_initialize(self):
132 if self._downloader is None:
134 if not self._set_language():
136 if not self._login():
141 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
142 IE_DESC = u'YouTube.com'
145 (?:https?://)? # http(s):// (optional)
146 (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
147 tube\.majestyc\.net/|
148 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
149 (?:.*?\#/)? # handle anchor (#/) redirect urls
150 (?: # the various things that can precede the ID:
151 (?:(?:v|embed|e)/) # v/ or embed/ or e/
152 |(?: # or the v= param in all its forms
153 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
154 (?:\?|\#!?) # the params delimiter ? or # or #!
155 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
159 |youtu\.be/ # just youtu.be/xxxx
161 )? # all until now is optional -> you can pass the naked ID
162 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
163 (?(1).+)? # if we found the ID, everything can follow
165 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
166 # Listed in order of quality
167 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
168 # Apple HTTP Live Streaming
169 '96', '95', '94', '93', '92', '132', '151',
171 '85', '84', '102', '83', '101', '82', '100',
173 '138', '137', '248', '136', '247', '135', '246',
174 '245', '244', '134', '243', '133', '242', '160',
176 '141', '172', '140', '171', '139',
178 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
179 # Apple HTTP Live Streaming
180 '96', '95', '94', '93', '92', '132', '151',
182 '85', '102', '84', '101', '83', '100', '82',
184 '138', '248', '137', '247', '136', '246', '245',
185 '244', '135', '243', '134', '242', '133', '160',
187 '172', '141', '171', '140', '139',
189 _video_formats_map = {
190 'flv': ['35', '34', '6', '5'],
191 '3gp': ['36', '17', '13'],
192 'mp4': ['38', '37', '22', '18'],
193 'webm': ['46', '45', '44', '43'],
195 _video_extensions = {
217 # Apple HTTP Live Streaming
249 _video_dimensions = {
331 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
332 u"file": u"BaW_jenozKc.mp4",
334 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
335 u"uploader": u"Philipp Hagemeister",
336 u"uploader_id": u"phihag",
337 u"upload_date": u"20121002",
338 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
342 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
343 u"file": u"1ltcDfZMA3U.flv",
344 u"note": u"Test VEVO video (#897)",
346 u"upload_date": u"20070518",
347 u"title": u"Maps - It Will Find You",
348 u"description": u"Music video by Maps performing It Will Find You.",
349 u"uploader": u"MuteUSA",
350 u"uploader_id": u"MuteUSA"
354 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
355 u"file": u"UxxajLWwzqY.mp4",
356 u"note": u"Test generic use_cipher_signature video (#897)",
358 u"upload_date": u"20120506",
359 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
360 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
361 u"uploader": u"Icona Pop",
362 u"uploader_id": u"IconaPop"
366 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
367 u"file": u"07FYdnEawAQ.mp4",
368 u"note": u"Test VEVO video with age protection (#956)",
370 u"upload_date": u"20130703",
371 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
372 u"description": u"md5:64249768eec3bc4276236606ea996373",
373 u"uploader": u"justintimberlakeVEVO",
374 u"uploader_id": u"justintimberlakeVEVO"
378 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
379 u'file': u'TGi3HqYrWHE.mp4',
380 u'note': u'm3u8 video',
382 u'title': u'Triathlon - Men - London 2012 Olympic Games',
383 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
384 u'uploader': u'olympic',
385 u'upload_date': u'20120807',
386 u'uploader_id': u'olympic',
389 u'skip_download': True,
396 def suitable(cls, url):
397 """Receives a URL and returns True if suitable for this IE."""
398 if YoutubePlaylistIE.suitable(url): return False
399 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
401 def __init__(self, *args, **kwargs):
402 super(YoutubeIE, self).__init__(*args, **kwargs)
403 self._jsplayer_cache = {}
405 def report_video_webpage_download(self, video_id):
406 """Report attempt to download video webpage."""
407 self.to_screen(u'%s: Downloading video webpage' % video_id)
409 def report_video_info_webpage_download(self, video_id):
410 """Report attempt to download video info webpage."""
411 self.to_screen(u'%s: Downloading video info webpage' % video_id)
413 def report_information_extraction(self, video_id):
414 """Report attempt to extract video information."""
415 self.to_screen(u'%s: Extracting video information' % video_id)
417 def report_unavailable_format(self, video_id, format):
418 """Report extracted video URL."""
419 self.to_screen(u'%s: Format %s not available' % (video_id, format))
421 def report_rtmp_download(self):
422 """Indicate the download will use the RTMP protocol."""
423 self.to_screen(u'RTMP download detected')
425 def _extract_signature_function(self, video_id, player_url):
426 id_m = re.match(r'.*-(?P<id>[^.]+)\.(?P<ext>[^.]+)$', player_url)
427 player_type = id_m.group('ext')
428 player_id = id_m.group('id')
430 if player_type == 'js':
431 code = self._download_webpage(
432 player_url, video_id,
433 note=u'Downloading %s player %s' % (player_type, jsplayer_id),
434 errnote=u'Download of %s failed' % player_url)
435 return self._parse_sig_js(code)
436 elif player_tpye == 'swf':
437 urlh = self._request_webpage(
438 player_url, video_id,
439 note=u'Downloading %s player %s' % (player_type, jsplayer_id),
440 errnote=u'Download of %s failed' % player_url)
442 return self._parse_sig_swf(code)
444 assert False, 'Invalid player type %r' % player_type
446 def _parse_sig_js(self, jscode):
447 funcname = self._search_regex(
448 r'signature=([a-zA-Z]+)', jscode,
449 u'Initial JS player signature function name')
454 return string.lowercase.index(varname)
456 def interpret_statement(stmt, local_vars, allow_recursion=20):
457 if allow_recursion < 0:
458 raise ExctractorError(u'Recursion limit reached')
460 if stmt.startswith(u'var '):
461 stmt = stmt[len(u'var '):]
462 ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
463 r'=(?P<expr>.*)$', stmt)
465 if ass_m.groupdict().get('index'):
467 lvar = local_vars[ass_m.group('out')]
468 idx = interpret_expression(ass_m.group('index'),
469 local_vars, allow_recursion)
470 assert isinstance(idx, int)
473 expr = ass_m.group('expr')
476 local_vars[ass_m.group('out')] = val
478 expr = ass_m.group('expr')
479 elif stmt.startswith(u'return '):
481 expr = stmt[len(u'return '):]
483 raise ExtractorError(
484 u'Cannot determine left side of statement in %r' % stmt)
486 v = interpret_expression(expr, local_vars, allow_recursion)
489 def interpret_expression(expr, local_vars, allow_recursion):
494 return local_vars[expr]
496 m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
498 member = m.group('member')
499 val = local_vars[m.group('in')]
500 if member == 'split("")':
502 if member == 'join("")':
504 if member == 'length':
506 if member == 'reverse()':
508 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
510 idx = interpret_expression(
511 slice_m.group('idx'), local_vars, allow_recursion-1)
515 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
517 val = local_vars[m.group('in')]
518 idx = interpret_expression(m.group('idx'), local_vars,
522 m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
524 a = interpret_expression(m.group('a'),
525 local_vars, allow_recursion)
526 b = interpret_expression(m.group('b'),
527 local_vars, allow_recursion)
531 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
533 fname = m.group('func')
534 if fname not in functions:
535 functions[fname] = extract_function(fname)
536 argvals = [int(v) if v.isdigit() else local_vars[v]
537 for v in m.group('args').split(',')]
538 return functions[fname](argvals)
539 raise ExtractorError(u'Unsupported JS expression %r' % expr)
541 def extract_function(funcname):
543 r'function ' + re.escape(funcname) +
544 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
546 argnames = func_m.group('args').split(',')
549 local_vars = dict(zip(argnames, args))
550 for stmt in func_m.group('code').split(';'):
551 res = interpret_statement(stmt, local_vars)
555 initial_function = extract_function(funcname)
556 return lambda s: initial_function([s])
558 def _parse_sig_swf(self, file_contents):
559 if file_contents[1:3] != b'WS':
560 raise ExtractorError(
561 u'Not an SWF file; header is %r' % file_contents[:3])
562 if file_contents[:1] == b'C':
563 content = zlib.decompress(file_contents[8:])
565 raise NotImplementedError(u'Unsupported compression format %r' %
568 def extract_tags(content):
570 while pos < len(content):
571 header16 = struct.unpack('<H', content[pos:pos+2])[0]
573 tag_code = header16 >> 6
574 tag_len = header16 & 0x3f
576 tag_len = struct.unpack('<I', content[pos:pos+4])[0]
578 assert pos+tag_len <= len(content)
579 yield (tag_code, content[pos:pos+tag_len])
583 for tag_code, tag in extract_tags(content)
585 p = code_tag.index(b'\0', 4) + 1
587 # Parse ABC (AVM2 ByteCode)
588 def read_int(data=None, pos=None):
589 if hasattr(data, 'read'):
597 b = struct.unpack('<B', buf)[0]
598 res = res | ((b & 0x7f) << shift)
611 b = struct.unpack('<B', data[pos:pos+1])[0]
613 res = res | ((b & 0x7f) << shift)
618 assert read_int(b'\x00', 0) == (0, 1)
619 assert read_int(b'\x10', 0) == (16, 1)
620 assert read_int(b'\x34', 0) == (0x34, 1)
621 assert read_int(b'\xb4\x12', 0) == (0x12 * 0x80 + 0x34, 2)
622 assert read_int(b'\xff\xff\xff\x00', 0) == (0x1fffff, 4)
624 def u30(*args, **kwargs):
625 res = read_int(*args, **kwargs)
626 if isinstance(res, tuple):
627 assert res[0] & 0xf0000000 == 0
629 assert res & 0xf0000000 == 0
633 def s32(data=None, pos=None):
634 v, pos = read_int(data, pos)
635 if v & 0x80000000 != 0:
636 v = - ((v ^ 0xffffffff) + 1)
638 assert s32(b'\xff\xff\xff\xff\x0f', 0) == (-1, 5)
642 return (code_tag[p:p+slen].decode('utf-8'), p + slen)
644 def read_byte(data=None, pos=None):
649 res = struct.unpack('<B', data[pos:pos+1])[0]
650 return (res, pos + 1)
652 # minor_version + major_version
657 for _c in range(1, int_count):
659 uint_count, p = u30()
660 for _c in range(1, uint_count):
662 double_count, p = u30()
663 p += (double_count-1) * 8
664 string_count, p = u30()
665 constant_strings = [u'']
666 for _c in range(1, string_count):
668 constant_strings.append(s)
669 namespace_count, p = u30()
670 for _c in range(1, namespace_count):
673 ns_set_count, p = u30()
674 for _c in range(1, ns_set_count):
676 for _c2 in range(count):
678 multiname_count, p = u30()
687 0x0e: 2, # MultinameA
688 0x1b: 1, # MultinameL
689 0x1c: 1, # MultinameLA
692 for _c in range(1, multiname_count):
694 assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
696 namespace_idx, p = u30()
698 multinames.append(constant_strings[name_idx])
700 multinames.append('[MULTINAME kind: %d]' % kind)
701 for _c2 in range(MULTINAME_SIZES[kind]):
705 method_count, p = u30()
706 MethodInfo = collections.namedtuple(
708 ['NEED_ARGUMENTS', 'NEED_REST'])
710 for method_id in range(method_count):
711 param_count, p = u30()
712 _, p = u30() # return type
713 for _ in range(param_count):
714 _, p = u30() # param type
715 _, p = u30() # name index (always 0 for youtube)
716 flags, p = read_byte()
717 if flags & 0x08 != 0:
719 option_count, p = u30()
720 for c in range(option_count):
723 if flags & 0x80 != 0:
724 # Param names present
725 for _ in range(param_count):
726 _, p = u30() # param name
727 mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
728 method_infos.append(mi)
731 metadata_count, p = u30()
732 for _c in range(metadata_count):
734 item_count, p = u30()
735 for _c2 in range(item_count):
739 def parse_traits_info(pos=None):
742 trait_name_idx, pos = u30(pos=pos)
743 kind_full, pos = read_byte(pos=pos)
744 kind = kind_full & 0x0f
745 attrs = kind_full >> 4
747 if kind in [0x00, 0x06]: # Slot or Const
748 _, pos = u30(pos=pos) # Slot id
749 type_name_idx, pos = u30(pos=pos)
750 vindex, pos = u30(pos=pos)
752 _, pos = read_byte(pos=pos) # vkind
753 elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
754 _, pos = u30(pos=pos) # disp_id
755 method_idx, pos = u30(pos=pos)
756 methods[multinames[trait_name_idx]] = method_idx
757 elif kind == 0x04: # Class
758 _, pos = u30(pos=pos) # slot_id
759 _, pos = u30(pos=pos) # classi
760 elif kind == 0x05: # Function
761 _, pos = u30(pos=pos) # slot_id
762 function_idx, pos = u30(pos=pos)
763 methods[function_idx] = multinames[trait_name_idx]
765 raise ExtractorError(u'Unsupported trait kind %d' % kind)
767 if attrs & 0x4 != 0: # Metadata present
768 metadata_count, pos = u30(pos=pos)
769 for _c3 in range(metadata_count):
770 _, pos = u30(pos=pos)
772 return (methods, pos)
775 TARGET_CLASSNAME = u'SignatureDecipher'
776 searched_idx = multinames.index(TARGET_CLASSNAME)
777 searched_class_id = None
778 class_count, p = u30()
779 for class_id in range(class_count):
781 if name_idx == searched_idx:
782 # We found the class we're looking for!
783 searched_class_id = class_id
784 _, p = u30() # super_name idx
785 flags, p = read_byte()
786 if flags & 0x08 != 0: # Protected namespace is present
787 protected_ns_idx, p = u30()
788 intrf_count, p = u30()
789 for _c2 in range(intrf_count):
792 trait_count, p = u30()
793 for _c2 in range(trait_count):
794 _, p = parse_traits_info()
796 if searched_class_id is None:
797 raise ExtractorError(u'Target class %r not found' %
802 for class_id in range(class_count):
804 trait_count, p = u30()
805 for _c2 in range(trait_count):
806 trait_methods, p = parse_traits_info()
807 if class_id == searched_class_id:
808 method_names.update(trait_methods.items())
809 method_idxs.update(dict(
811 for name, idx in trait_methods.items()))
814 script_count, p = u30()
815 for _c in range(script_count):
817 trait_count, p = u30()
818 for _c2 in range(trait_count):
819 _, p = parse_traits_info()
822 method_body_count, p = u30()
823 Method = collections.namedtuple('Method', ['code', 'local_count'])
825 for _c in range(method_body_count):
826 method_idx, p = u30()
828 local_count, p = u30()
829 init_scope_depth, p = u30()
830 max_scope_depth, p = u30()
831 code_length, p = u30()
832 if method_idx in method_idxs:
833 m = Method(code_tag[p:p+code_length], local_count)
834 methods[method_idxs[method_idx]] = m
836 exception_count, p = u30()
837 for _c2 in range(exception_count):
840 _, p = u30() # target
841 _, p = u30() # exc_type
842 _, p = u30() # var_name
843 trait_count, p = u30()
844 for _c2 in range(trait_count):
845 _, p = parse_traits_info()
847 assert p == len(code_tag)
848 assert len(methods) == len(method_idxs)
850 method_pyfunctions = {}
852 def extract_function(func_name):
853 if func_name in method_pyfunctions:
854 return method_pyfunctions[func_name]
855 if func_name not in methods:
856 raise ExtractorError(u'Cannot find function %r' % func_name)
857 m = methods[func_name]
860 print('Entering function %s(%r)' % (func_name, args))
861 registers = ['(this)'] + list(args) + [None] * m.local_count
863 coder = io.BytesIO(m.code)
865 opcode = struct.unpack('!B', coder.read(1))[0]
866 if opcode == 36: # pushbyte
867 v = struct.unpack('!B', coder.read(1))[0]
869 elif opcode == 44: # pushstring
871 stack.append(constant_strings[idx])
872 elif opcode == 48: # pushscope
873 # We don't implement the scope register, so we'll just
874 # ignore the popped value
876 elif opcode == 70: # callproperty
878 mname = multinames[index]
879 arg_count = u30(coder)
880 args = list(reversed(
881 [stack.pop() for _ in range(arg_count)]))
883 if mname == u'split':
884 assert len(args) == 1
885 assert isinstance(args[0], compat_str)
886 assert isinstance(obj, compat_str)
890 res = obj.split(args[0])
892 elif mname == u'slice':
893 assert len(args) == 1
894 assert isinstance(args[0], int)
895 assert isinstance(obj, list)
898 elif mname == u'join':
899 assert len(args) == 1
900 assert isinstance(args[0], compat_str)
901 assert isinstance(obj, list)
902 res = args[0].join(obj)
904 elif mname in method_pyfunctions:
905 stack.append(method_pyfunctions[mname](args))
907 raise NotImplementedError(
908 u'Unsupported property %r on %r'
910 elif opcode == 72: # returnvalue
913 elif opcode == 79: # callpropvoid
915 mname = multinames[index]
916 arg_count = u30(coder)
917 args = list(reversed(
918 [stack.pop() for _ in range(arg_count)]))
920 if mname == u'reverse':
921 assert isinstance(obj, list)
924 raise NotImplementedError(
925 u'Unsupported (void) property %r on %r'
927 elif opcode == 93: # findpropstrict
929 mname = multinames[index]
930 res = extract_function(mname)
932 elif opcode == 97: # setproperty
937 assert isinstance(obj, list)
938 assert isinstance(idx, int)
940 elif opcode == 98: # getlocal
942 stack.append(registers[index])
943 elif opcode == 99: # setlocal
946 registers[index] = value
947 elif opcode == 102: # getproperty
949 pname = multinames[index]
950 if pname == u'length':
952 assert isinstance(obj, list)
953 stack.append(len(obj))
954 else: # Assume attribute access
956 assert isinstance(idx, int)
958 assert isinstance(obj, list)
959 stack.append(obj[idx])
960 elif opcode == 128: # coerce
962 elif opcode == 133: # coerce_s
963 assert isinstance(stack[-1], (type(None), compat_str))
964 elif opcode == 164: # modulo
967 res = value1 % value2
969 elif opcode == 208: # getlocal_0
970 stack.append(registers[0])
971 elif opcode == 209: # getlocal_1
972 stack.append(registers[1])
973 elif opcode == 210: # getlocal_2
974 stack.append(registers[2])
975 elif opcode == 211: # getlocal_3
976 stack.append(registers[3])
977 elif opcode == 214: # setlocal_2
978 registers[2] = stack.pop()
979 elif opcode == 215: # setlocal_3
980 registers[3] = stack.pop()
982 raise NotImplementedError(
983 u'Unsupported opcode %d' % opcode)
985 method_pyfunctions[func_name] = resfunc
988 initial_function = extract_function(u'decipher')
989 return lambda s: initial_function([s])
991 def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False):
992 """Turn the encrypted s field into a working signature"""
994 if jsplayer_url is not None:
996 if jsplayer_url not in self._jsplayer_cache:
997 self._jsplayer_cache[jsplayer_url] = self._extract_signature_function(
998 video_id, jsplayer_url
1000 return self._jsplayer_cache[jsplayer_url]([s])
1001 except Exception as e:
1002 tb = traceback.format_exc()
1003 self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb)
1005 self._downloader.report_warning(u'Warning: Falling back to static signature algorithm')
1008 # The videos with age protection use another player, so the
1009 # algorithms can be different.
1011 return s[2:63] + s[82] + s[64:82] + s[63]
1014 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1016 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1018 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1020 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1022 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1024 return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
1026 return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1028 return s[81:36:-1] + s[0] + s[35:2:-1]
1030 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1032 return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1034 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1036 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1038 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1041 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1043 def _decrypt_signature_age_gate(self, s):
1044 # The videos with age protection use another player, so the algorithms
1047 return s[2:63] + s[82] + s[64:82] + s[63]
1049 # Fallback to the other algortihms
1050 return self._decrypt_signature(s)
1052 def _get_available_subtitles(self, video_id):
1054 sub_list = self._download_webpage(
1055 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1056 video_id, note=False)
1057 except ExtractorError as err:
1058 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1060 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1065 params = compat_urllib_parse.urlencode({
1068 'fmt': self._downloader.params.get('subtitlesformat'),
1070 url = u'http://www.youtube.com/api/timedtext?' + params
1071 sub_lang_list[lang] = url
1072 if not sub_lang_list:
1073 self._downloader.report_warning(u'video doesn\'t have subtitles')
1075 return sub_lang_list
1077 def _get_available_automatic_caption(self, video_id, webpage):
1078 """We need the webpage for getting the captions url, pass it as an
1079 argument to speed up the process."""
1080 sub_format = self._downloader.params.get('subtitlesformat')
1081 self.to_screen(u'%s: Looking for automatic captions' % video_id)
1082 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1083 err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1085 self._downloader.report_warning(err_msg)
1087 player_config = json.loads(mobj.group(1))
1089 args = player_config[u'args']
1090 caption_url = args[u'ttsurl']
1091 timestamp = args[u'timestamp']
1092 # We get the available subtitles
1093 list_params = compat_urllib_parse.urlencode({
1098 list_url = caption_url + '&' + list_params
1099 list_page = self._download_webpage(list_url, video_id)
1100 caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1101 original_lang_node = caption_list.find('track')
1102 if original_lang_node.attrib.get('kind') != 'asr' :
1103 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1105 original_lang = original_lang_node.attrib['lang_code']
1108 for lang_node in caption_list.findall('target'):
1109 sub_lang = lang_node.attrib['lang_code']
1110 params = compat_urllib_parse.urlencode({
1111 'lang': original_lang,
1117 sub_lang_list[sub_lang] = caption_url + '&' + params
1118 return sub_lang_list
1119 # An extractor error can be raise by the download process if there are
1120 # no automatic captions but there are subtitles
1121 except (KeyError, ExtractorError):
1122 self._downloader.report_warning(err_msg)
1125 def _print_formats(self, formats):
1126 print('Available formats:')
1128 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1129 self._video_dimensions.get(x, '???'),
1130 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1132 def _extract_id(self, url):
1133 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1135 raise ExtractorError(u'Invalid URL: %s' % url)
1136 video_id = mobj.group(2)
1139 def _get_video_url_list(self, url_map):
1141 Transform a dictionary in the format {itag:url} to a list of (itag, url)
1142 with the requested formats.
1144 req_format = self._downloader.params.get('format', None)
1145 format_limit = self._downloader.params.get('format_limit', None)
1146 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1147 if format_limit is not None and format_limit in available_formats:
1148 format_list = available_formats[available_formats.index(format_limit):]
1150 format_list = available_formats
1151 existing_formats = [x for x in format_list if x in url_map]
1152 if len(existing_formats) == 0:
1153 raise ExtractorError(u'no known formats available for video')
1154 if self._downloader.params.get('listformats', None):
1155 self._print_formats(existing_formats)
1157 if req_format is None or req_format == 'best':
1158 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1159 elif req_format == 'worst':
1160 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1161 elif req_format in ('-1', 'all'):
1162 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1164 # Specific formats. We pick the first in a slash-delimeted sequence.
1165 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1166 # available in the specified format. For example,
1167 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1168 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1169 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1170 req_formats = req_format.split('/')
1171 video_url_list = None
1172 for rf in req_formats:
1174 video_url_list = [(rf, url_map[rf])]
1176 if rf in self._video_formats_map:
1177 for srf in self._video_formats_map[rf]:
1179 video_url_list = [(srf, url_map[srf])]
1184 if video_url_list is None:
1185 raise ExtractorError(u'requested format not available')
1186 return video_url_list
1188 def _extract_from_m3u8(self, manifest_url, video_id):
1190 def _get_urls(_manifest):
1191 lines = _manifest.split('\n')
1192 urls = filter(lambda l: l and not l.startswith('#'),
1195 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1196 formats_urls = _get_urls(manifest)
1197 for format_url in formats_urls:
1198 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1199 url_map[itag] = format_url
1202 def _real_extract(self, url):
1203 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1204 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
1206 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1207 mobj = re.search(self._NEXT_URL_RE, url)
1209 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1210 video_id = self._extract_id(url)
1213 self.report_video_webpage_download(video_id)
1214 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1215 request = compat_urllib_request.Request(url)
1217 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1218 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1219 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1221 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1223 # Attempt to extract SWF player URL
1224 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1225 if mobj is not None:
1226 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1231 self.report_video_info_webpage_download(video_id)
1232 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1233 self.report_age_confirmation()
1235 # We simulate the access to the video from www.youtube.com/v/{video_id}
1236 # this can be viewed without login into Youtube
1237 data = compat_urllib_parse.urlencode({'video_id': video_id,
1241 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1245 video_info_url = 'https://www.youtube.com/get_video_info?' + data
1246 video_info_webpage = self._download_webpage(video_info_url, video_id,
1248 errnote='unable to download video info webpage')
1249 video_info = compat_parse_qs(video_info_webpage)
1252 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1253 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1254 % (video_id, el_type))
1255 video_info_webpage = self._download_webpage(video_info_url, video_id,
1257 errnote='unable to download video info webpage')
1258 video_info = compat_parse_qs(video_info_webpage)
1259 if 'token' in video_info:
1261 if 'token' not in video_info:
1262 if 'reason' in video_info:
1263 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1265 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1267 # Check for "rental" videos
1268 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1269 raise ExtractorError(u'"rental" videos not supported')
1271 # Start extracting information
1272 self.report_information_extraction(video_id)
1275 if 'author' not in video_info:
1276 raise ExtractorError(u'Unable to extract uploader name')
1277 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1280 video_uploader_id = None
1281 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1282 if mobj is not None:
1283 video_uploader_id = mobj.group(1)
1285 self._downloader.report_warning(u'unable to extract uploader nickname')
1288 if 'title' not in video_info:
1289 raise ExtractorError(u'Unable to extract video title')
1290 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1293 # We try first to get a high quality image:
1294 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1295 video_webpage, re.DOTALL)
1296 if m_thumb is not None:
1297 video_thumbnail = m_thumb.group(1)
1298 elif 'thumbnail_url' not in video_info:
1299 self._downloader.report_warning(u'unable to extract video thumbnail')
1300 video_thumbnail = ''
1301 else: # don't panic if we can't find it
1302 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1306 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1307 if mobj is not None:
1308 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1309 upload_date = unified_strdate(upload_date)
1312 video_description = get_element_by_id("eow-description", video_webpage)
1313 if video_description:
1314 video_description = clean_html(video_description)
1316 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1318 video_description = unescapeHTML(fd_mobj.group(1))
1320 video_description = u''
1323 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1325 if self._downloader.params.get('listsubtitles', False):
1326 self._list_available_subtitles(video_id, video_webpage)
1329 if 'length_seconds' not in video_info:
1330 self._downloader.report_warning(u'unable to extract video duration')
1333 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1335 # Decide which formats to download
1338 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1340 raise ValueError('Could not find vevo ID')
1341 info = json.loads(mobj.group(1))
1343 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1344 # this signatures are encrypted
1345 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1347 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1348 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1349 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1351 if 'url_encoded_fmt_stream_map' in video_info:
1352 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1354 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1355 elif 'adaptive_fmts' in video_info:
1356 if 'url_encoded_fmt_stream_map' in video_info:
1357 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1359 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1363 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1364 self.report_rtmp_download()
1365 video_url_list = [(None, video_info['conn'][0])]
1366 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1367 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1368 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1370 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1371 url_data = compat_parse_qs(url_data_str)
1372 if 'itag' in url_data and 'url' in url_data:
1373 url = url_data['url'][0]
1374 if 'sig' in url_data:
1375 url += '&signature=' + url_data['sig'][0]
1376 elif 's' in url_data:
1377 encrypted_sig = url_data['s'][0]
1378 if self._downloader.params.get('verbose'):
1380 player_version = self._search_regex(r'-(.+)\.swf$',
1381 player_url if player_url else 'NOT FOUND',
1382 'flash player', fatal=False)
1383 player_desc = 'flash player %s' % player_version
1385 player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
1386 'html5 player', fatal=False)
1387 player_desc = u'html5 player %s' % player_version
1389 parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1390 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1391 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1396 jsplayer_url_json = self._search_regex(
1397 r'"assets":.+?"js":\s*("[^"]+")',
1398 video_webpage, u'JS player URL')
1399 jsplayer_url = json.loads(jsplayer_url_json)
1401 signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate)
1402 url += '&signature=' + signature
1403 if 'ratebypass' not in url:
1404 url += '&ratebypass=yes'
1405 url_map[url_data['itag'][0]] = url
1406 video_url_list = self._get_video_url_list(url_map)
1407 if not video_url_list:
1409 elif video_info.get('hlsvp'):
1410 manifest_url = video_info['hlsvp'][0]
1411 url_map = self._extract_from_m3u8(manifest_url, video_id)
1412 video_url_list = self._get_video_url_list(url_map)
1413 if not video_url_list:
1417 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1420 for format_param, video_real_url in video_url_list:
1422 video_extension = self._video_extensions.get(format_param, 'flv')
1424 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1425 self._video_dimensions.get(format_param, '???'),
1426 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1430 'url': video_real_url,
1431 'uploader': video_uploader,
1432 'uploader_id': video_uploader_id,
1433 'upload_date': upload_date,
1434 'title': video_title,
1435 'ext': video_extension,
1436 'format': video_format,
1437 'thumbnail': video_thumbnail,
1438 'description': video_description,
1439 'player_url': player_url,
1440 'subtitles': video_subtitles,
1441 'duration': video_duration
1445 class YoutubePlaylistIE(InfoExtractor):
1446 IE_DESC = u'YouTube.com playlists'
1447 _VALID_URL = r"""(?:
1452 (?:course|view_play_list|my_playlists|artist|playlist|watch)
1453 \? (?:.*?&)*? (?:p|a|list)=
1456 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1459 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1461 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1463 IE_NAME = u'youtube:playlist'
1466 def suitable(cls, url):
1467 """Receives a URL and returns True if suitable for this IE."""
1468 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1470 def _real_extract(self, url):
1471 # Extract playlist id
1472 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1474 raise ExtractorError(u'Invalid URL: %s' % url)
1476 # Download playlist videos from API
1477 playlist_id = mobj.group(1) or mobj.group(2)
1480 for page_num in itertools.count(1):
1481 start_index = self._MAX_RESULTS * (page_num - 1) + 1
1482 if start_index >= 1000:
1483 self._downloader.report_warning(u'Max number of results reached')
1485 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1486 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1489 response = json.loads(page)
1490 except ValueError as err:
1491 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1493 if 'feed' not in response:
1494 raise ExtractorError(u'Got a malformed response from YouTube API')
1495 playlist_title = response['feed']['title']['$t']
1496 if 'entry' not in response['feed']:
1497 # Number of videos is a multiple of self._MAX_RESULTS
1500 for entry in response['feed']['entry']:
1501 index = entry['yt$position']['$t']
1502 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1505 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1508 videos = [v[1] for v in sorted(videos)]
1510 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1511 return [self.playlist_result(url_results, playlist_id, playlist_title)]
1514 class YoutubeChannelIE(InfoExtractor):
1515 IE_DESC = u'YouTube.com channels'
1516 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1517 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1518 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1519 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1520 IE_NAME = u'youtube:channel'
1522 def extract_videos_from_page(self, page):
1524 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1525 if mobj.group(1) not in ids_in_page:
1526 ids_in_page.append(mobj.group(1))
1529 def _real_extract(self, url):
1530 # Extract channel id
1531 mobj = re.match(self._VALID_URL, url)
1533 raise ExtractorError(u'Invalid URL: %s' % url)
1535 # Download channel page
1536 channel_id = mobj.group(1)
1540 url = self._TEMPLATE_URL % (channel_id, pagenum)
1541 page = self._download_webpage(url, channel_id,
1542 u'Downloading page #%s' % pagenum)
1544 # Extract video identifiers
1545 ids_in_page = self.extract_videos_from_page(page)
1546 video_ids.extend(ids_in_page)
1548 # Download any subsequent channel pages using the json-based channel_ajax query
1549 if self._MORE_PAGES_INDICATOR in page:
1550 for pagenum in itertools.count(1):
1551 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1552 page = self._download_webpage(url, channel_id,
1553 u'Downloading page #%s' % pagenum)
1555 page = json.loads(page)
1557 ids_in_page = self.extract_videos_from_page(page['content_html'])
1558 video_ids.extend(ids_in_page)
1560 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1563 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1565 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1566 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1567 return [self.playlist_result(url_entries, channel_id)]
1570 class YoutubeUserIE(InfoExtractor):
1571 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1572 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1573 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1574 _GDATA_PAGE_SIZE = 50
1575 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1576 IE_NAME = u'youtube:user'
1579 def suitable(cls, url):
1580 # Don't return True if the url can be extracted with other youtube
1581 # extractor, the regex would is too permissive and it would match.
1582 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1583 if any(ie.suitable(url) for ie in other_ies): return False
1584 else: return super(YoutubeUserIE, cls).suitable(url)
1586 def _real_extract(self, url):
1588 mobj = re.match(self._VALID_URL, url)
1590 raise ExtractorError(u'Invalid URL: %s' % url)
1592 username = mobj.group(1)
1594 # Download video ids using YouTube Data API. Result size per
1595 # query is limited (currently to 50 videos) so we need to query
1596 # page by page until there are no video ids - it means we got
1601 for pagenum in itertools.count(0):
1602 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1604 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1605 page = self._download_webpage(gdata_url, username,
1606 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1609 response = json.loads(page)
1610 except ValueError as err:
1611 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1612 if 'entry' not in response['feed']:
1613 # Number of videos is a multiple of self._MAX_RESULTS
1616 # Extract video identifiers
1618 for entry in response['feed']['entry']:
1619 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1620 video_ids.extend(ids_in_page)
1622 # A little optimization - if current page is not
1623 # "full", ie. does not contain PAGE_SIZE video ids then
1624 # we can assume that this page is the last one - there
1625 # are no more ids on further pages - no need to query
1628 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1631 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1632 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1633 return [self.playlist_result(url_results, playlist_title = username)]
1635 class YoutubeSearchIE(SearchInfoExtractor):
1636 IE_DESC = u'YouTube.com searches'
1637 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1639 IE_NAME = u'youtube:search'
1640 _SEARCH_KEY = 'ytsearch'
1642 def report_download_page(self, query, pagenum):
1643 """Report attempt to download search page with given number."""
1644 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1646 def _get_n_results(self, query, n):
1647 """Get a specified number of results for a query"""
1653 while (50 * pagenum) < limit:
1654 self.report_download_page(query, pagenum+1)
1655 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1656 request = compat_urllib_request.Request(result_url)
1658 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1659 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1660 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1661 api_response = json.loads(data)['data']
1663 if not 'items' in api_response:
1664 raise ExtractorError(u'[youtube] No video results')
1666 new_ids = list(video['id'] for video in api_response['items'])
1667 video_ids += new_ids
1669 limit = min(n, api_response['totalItems'])
1672 if len(video_ids) > n:
1673 video_ids = video_ids[:n]
1674 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1675 return self.playlist_result(videos, query)
1678 class YoutubeShowIE(InfoExtractor):
1679 IE_DESC = u'YouTube.com (multi-season) shows'
1680 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1681 IE_NAME = u'youtube:show'
1683 def _real_extract(self, url):
1684 mobj = re.match(self._VALID_URL, url)
1685 show_name = mobj.group(1)
1686 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1687 # There's one playlist for each season of the show
1688 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1689 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1690 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1693 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1695 Base class for extractors that fetch info from
1696 http://www.youtube.com/feed_ajax
1697 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1699 _LOGIN_REQUIRED = True
1701 # use action_load_personal_feed instead of action_load_system_feed
1702 _PERSONAL_FEED = False
1705 def _FEED_TEMPLATE(self):
1706 action = 'action_load_system_feed'
1707 if self._PERSONAL_FEED:
1708 action = 'action_load_personal_feed'
1709 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1713 return u'youtube:%s' % self._FEED_NAME
1715 def _real_initialize(self):
1718 def _real_extract(self, url):
1720 # The step argument is available only in 2.7 or higher
1721 for i in itertools.count(0):
1722 paging = i*self._PAGING_STEP
1723 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1724 u'%s feed' % self._FEED_NAME,
1725 u'Downloading page %s' % i)
1726 info = json.loads(info)
1727 feed_html = info['feed_html']
1728 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1729 ids = orderedSet(m.group(1) for m in m_ids)
1730 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1731 if info['paging'] is None:
1733 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1735 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1736 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1737 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1738 _FEED_NAME = 'subscriptions'
1739 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1741 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1742 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1743 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1744 _FEED_NAME = 'recommended'
1745 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1747 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1748 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1749 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1750 _FEED_NAME = 'watch_later'
1751 _PLAYLIST_TITLE = u'Youtube Watch Later'
1753 _PERSONAL_FEED = True
1755 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1756 IE_NAME = u'youtube:favorites'
1757 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1758 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1759 _LOGIN_REQUIRED = True
1761 def _real_extract(self, url):
1762 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1763 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1764 return self.url_result(playlist_id, 'YoutubePlaylist')