15 from .common import InfoExtractor, SearchInfoExtractor
16 from .subtitles import SubtitlesInfoExtractor
22 compat_urllib_request,
33 class YoutubeBaseInfoExtractor(InfoExtractor):
34 """Provide base functions for Youtube extractors"""
35 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
36 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
37 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
38 _NETRC_MACHINE = 'youtube'
39 # If True it will raise an error if no login info is provided
40 _LOGIN_REQUIRED = False
42 def report_lang(self):
43 """Report attempt to set language."""
44 self.to_screen(u'Setting language')
46 def _set_language(self):
47 request = compat_urllib_request.Request(self._LANG_URL)
50 compat_urllib_request.urlopen(request).read()
51 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
52 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
57 (username, password) = self._get_login_info()
58 # No authentication to be performed
60 if self._LOGIN_REQUIRED:
61 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
64 request = compat_urllib_request.Request(self._LOGIN_URL)
66 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
67 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
68 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
73 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
76 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
82 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
86 u'PersistentCookie': u'yes',
88 u'bgresponse': u'js_disabled',
89 u'checkConnection': u'',
90 u'checkedDomains': u'youtube',
96 u'signIn': u'Sign in',
98 u'service': u'youtube',
102 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
104 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
105 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
106 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
109 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
110 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
111 self._downloader.report_warning(u'unable to log in: bad username or password')
113 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
114 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
118 def _confirm_age(self):
121 'action_confirm': 'Confirm',
123 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
125 self.report_age_confirmation()
126 compat_urllib_request.urlopen(request).read().decode('utf-8')
127 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
128 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
131 def _real_initialize(self):
132 if self._downloader is None:
134 if not self._set_language():
136 if not self._login():
141 class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
142 IE_DESC = u'YouTube.com'
145 (?:https?://)? # http(s):// (optional)
146 (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
147 tube\.majestyc\.net/|
148 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
149 (?:.*?\#/)? # handle anchor (#/) redirect urls
150 (?: # the various things that can precede the ID:
151 (?:(?:v|embed|e)/) # v/ or embed/ or e/
152 |(?: # or the v= param in all its forms
153 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
154 (?:\?|\#!?) # the params delimiter ? or # or #!
155 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
159 |youtu\.be/ # just youtu.be/xxxx
161 )? # all until now is optional -> you can pass the naked ID
162 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
163 (?(1).+)? # if we found the ID, everything can follow
165 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
166 # Listed in order of quality
167 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
168 # Apple HTTP Live Streaming
169 '96', '95', '94', '93', '92', '132', '151',
171 '85', '84', '102', '83', '101', '82', '100',
173 '138', '137', '248', '136', '247', '135', '246',
174 '245', '244', '134', '243', '133', '242', '160',
176 '141', '172', '140', '171', '139',
178 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
179 # Apple HTTP Live Streaming
180 '96', '95', '94', '93', '92', '132', '151',
182 '85', '102', '84', '101', '83', '100', '82',
184 '138', '248', '137', '247', '136', '246', '245',
185 '244', '135', '243', '134', '242', '133', '160',
187 '172', '141', '171', '140', '139',
189 _video_formats_map = {
190 'flv': ['35', '34', '6', '5'],
191 '3gp': ['36', '17', '13'],
192 'mp4': ['38', '37', '22', '18'],
193 'webm': ['46', '45', '44', '43'],
195 _video_extensions = {
217 # Apple HTTP Live Streaming
249 _video_dimensions = {
331 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
332 u"file": u"BaW_jenozKc.mp4",
334 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
335 u"uploader": u"Philipp Hagemeister",
336 u"uploader_id": u"phihag",
337 u"upload_date": u"20121002",
338 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
342 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
343 u"file": u"1ltcDfZMA3U.flv",
344 u"note": u"Test VEVO video (#897)",
346 u"upload_date": u"20070518",
347 u"title": u"Maps - It Will Find You",
348 u"description": u"Music video by Maps performing It Will Find You.",
349 u"uploader": u"MuteUSA",
350 u"uploader_id": u"MuteUSA"
354 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
355 u"file": u"UxxajLWwzqY.mp4",
356 u"note": u"Test generic use_cipher_signature video (#897)",
358 u"upload_date": u"20120506",
359 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
360 u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
361 u"uploader": u"Icona Pop",
362 u"uploader_id": u"IconaPop"
366 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
367 u"file": u"07FYdnEawAQ.mp4",
368 u"note": u"Test VEVO video with age protection (#956)",
370 u"upload_date": u"20130703",
371 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
372 u"description": u"md5:64249768eec3bc4276236606ea996373",
373 u"uploader": u"justintimberlakeVEVO",
374 u"uploader_id": u"justintimberlakeVEVO"
378 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
379 u'file': u'TGi3HqYrWHE.mp4',
380 u'note': u'm3u8 video',
382 u'title': u'Triathlon - Men - London 2012 Olympic Games',
383 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
384 u'uploader': u'olympic',
385 u'upload_date': u'20120807',
386 u'uploader_id': u'olympic',
389 u'skip_download': True,
396 def suitable(cls, url):
397 """Receives a URL and returns True if suitable for this IE."""
398 if YoutubePlaylistIE.suitable(url): return False
399 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
401 def __init__(self, *args, **kwargs):
402 super(YoutubeIE, self).__init__(*args, **kwargs)
403 self._player_cache = {}
405 def report_video_webpage_download(self, video_id):
406 """Report attempt to download video webpage."""
407 self.to_screen(u'%s: Downloading video webpage' % video_id)
409 def report_video_info_webpage_download(self, video_id):
410 """Report attempt to download video info webpage."""
411 self.to_screen(u'%s: Downloading video info webpage' % video_id)
413 def report_information_extraction(self, video_id):
414 """Report attempt to extract video information."""
415 self.to_screen(u'%s: Extracting video information' % video_id)
417 def report_unavailable_format(self, video_id, format):
418 """Report extracted video URL."""
419 self.to_screen(u'%s: Format %s not available' % (video_id, format))
421 def report_rtmp_download(self):
422 """Indicate the download will use the RTMP protocol."""
423 self.to_screen(u'RTMP download detected')
425 def _extract_signature_function(self, video_id, player_url):
426 id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9]+)\.(?P<ext>[a-z]+)$',
428 player_type = id_m.group('ext')
429 player_id = id_m.group('id')
431 # TODO read from filesystem cache
433 if player_type == 'js':
434 code = self._download_webpage(
435 player_url, video_id,
436 note=u'Downloading %s player %s' % (player_type, player_id),
437 errnote=u'Download of %s failed' % player_url)
438 res = self._parse_sig_js(code)
439 elif player_tpye == 'swf':
440 urlh = self._request_webpage(
441 player_url, video_id,
442 note=u'Downloading %s player %s' % (player_type, player_id),
443 errnote=u'Download of %s failed' % player_url)
445 res = self._parse_sig_swf(code)
447 assert False, 'Invalid player type %r' % player_type
453 def _parse_sig_js(self, jscode):
454 funcname = self._search_regex(
455 r'signature=([a-zA-Z]+)', jscode,
456 u'Initial JS player signature function name')
461 return string.lowercase.index(varname)
463 def interpret_statement(stmt, local_vars, allow_recursion=20):
464 if allow_recursion < 0:
465 raise ExctractorError(u'Recursion limit reached')
467 if stmt.startswith(u'var '):
468 stmt = stmt[len(u'var '):]
469 ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
470 r'=(?P<expr>.*)$', stmt)
472 if ass_m.groupdict().get('index'):
474 lvar = local_vars[ass_m.group('out')]
475 idx = interpret_expression(ass_m.group('index'),
476 local_vars, allow_recursion)
477 assert isinstance(idx, int)
480 expr = ass_m.group('expr')
483 local_vars[ass_m.group('out')] = val
485 expr = ass_m.group('expr')
486 elif stmt.startswith(u'return '):
488 expr = stmt[len(u'return '):]
490 raise ExtractorError(
491 u'Cannot determine left side of statement in %r' % stmt)
493 v = interpret_expression(expr, local_vars, allow_recursion)
496 def interpret_expression(expr, local_vars, allow_recursion):
501 return local_vars[expr]
503 m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
505 member = m.group('member')
506 val = local_vars[m.group('in')]
507 if member == 'split("")':
509 if member == 'join("")':
511 if member == 'length':
513 if member == 'reverse()':
515 slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
517 idx = interpret_expression(
518 slice_m.group('idx'), local_vars, allow_recursion-1)
522 r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
524 val = local_vars[m.group('in')]
525 idx = interpret_expression(m.group('idx'), local_vars,
529 m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
531 a = interpret_expression(m.group('a'),
532 local_vars, allow_recursion)
533 b = interpret_expression(m.group('b'),
534 local_vars, allow_recursion)
538 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
540 fname = m.group('func')
541 if fname not in functions:
542 functions[fname] = extract_function(fname)
543 argvals = [int(v) if v.isdigit() else local_vars[v]
544 for v in m.group('args').split(',')]
545 return functions[fname](argvals)
546 raise ExtractorError(u'Unsupported JS expression %r' % expr)
548 def extract_function(funcname):
550 r'function ' + re.escape(funcname) +
551 r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
553 argnames = func_m.group('args').split(',')
556 local_vars = dict(zip(argnames, args))
557 for stmt in func_m.group('code').split(';'):
558 res = interpret_statement(stmt, local_vars)
562 initial_function = extract_function(funcname)
563 return lambda s: initial_function([s])
565 def _parse_sig_swf(self, file_contents):
566 if file_contents[1:3] != b'WS':
567 raise ExtractorError(
568 u'Not an SWF file; header is %r' % file_contents[:3])
569 if file_contents[:1] == b'C':
570 content = zlib.decompress(file_contents[8:])
572 raise NotImplementedError(u'Unsupported compression format %r' %
575 def extract_tags(content):
577 while pos < len(content):
578 header16 = struct.unpack('<H', content[pos:pos+2])[0]
580 tag_code = header16 >> 6
581 tag_len = header16 & 0x3f
583 tag_len = struct.unpack('<I', content[pos:pos+4])[0]
585 assert pos+tag_len <= len(content)
586 yield (tag_code, content[pos:pos+tag_len])
590 for tag_code, tag in extract_tags(content)
592 p = code_tag.index(b'\0', 4) + 1
593 code_reader = io.BytesIO(code_tag[p:])
595 # Parse ABC (AVM2 ByteCode)
596 def read_int(reader=None):
604 b = struct.unpack('<B', buf)[0]
605 res = res | ((b & 0x7f) << shift)
611 def u30(reader=None):
612 res = read_int(reader)
613 assert res & 0xf0000000 == 0
617 def s32(reader=None):
619 if v & 0x80000000 != 0:
620 v = - ((v ^ 0xffffffff) + 1)
623 def string(reader=None):
627 resb = reader.read(slen)
628 assert len(resb) == slen
629 return resb.decode('utf-8')
631 def read_bytes(count, reader=None):
634 resb = reader.read(count)
635 assert len(resb) == count
638 def read_byte(reader=None):
639 resb = read_bytes(1, reader=reader)
640 res = struct.unpack('<B', resb)[0]
643 # minor_version + major_version
648 for _c in range(1, int_count):
651 for _c in range(1, uint_count):
654 _ = read_bytes((double_count-1) * 8)
656 constant_strings = [u'']
657 for _c in range(1, string_count):
659 constant_strings.append(s)
660 namespace_count = u30()
661 for _c in range(1, namespace_count):
662 _ = read_bytes(1) # kind
665 for _c in range(1, ns_set_count):
667 for _c2 in range(count):
669 multiname_count = u30()
678 0x0e: 2, # MultinameA
679 0x1b: 1, # MultinameL
680 0x1c: 1, # MultinameLA
683 for _c in range(1, multiname_count):
685 assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
687 namespace_idx = u30()
689 multinames.append(constant_strings[name_idx])
691 multinames.append('[MULTINAME kind: %d]' % kind)
692 for _c2 in range(MULTINAME_SIZES[kind]):
697 MethodInfo = collections.namedtuple(
699 ['NEED_ARGUMENTS', 'NEED_REST'])
701 for method_id in range(method_count):
703 _ = u30() # return type
704 for _ in range(param_count):
705 _ = u30() # param type
706 _ = u30() # name index (always 0 for youtube)
708 if flags & 0x08 != 0:
711 for c in range(option_count):
713 _ = read_bytes(1) # kind
714 if flags & 0x80 != 0:
715 # Param names present
716 for _ in range(param_count):
717 _ = u30() # param name
718 mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
719 method_infos.append(mi)
722 metadata_count = u30()
723 for _c in range(metadata_count):
726 for _c2 in range(item_count):
730 def parse_traits_info():
731 trait_name_idx = u30()
732 kind_full = read_byte()
733 kind = kind_full & 0x0f
734 attrs = kind_full >> 4
736 if kind in [0x00, 0x06]: # Slot or Const
738 type_name_idx = u30()
741 _ = read_byte() # vkind
742 elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
745 methods[multinames[trait_name_idx]] = method_idx
746 elif kind == 0x04: # Class
749 elif kind == 0x05: # Function
752 methods[function_idx] = multinames[trait_name_idx]
754 raise ExtractorError(u'Unsupported trait kind %d' % kind)
756 if attrs & 0x4 != 0: # Metadata present
757 metadata_count = u30()
758 for _c3 in range(metadata_count):
764 TARGET_CLASSNAME = u'SignatureDecipher'
765 searched_idx = multinames.index(TARGET_CLASSNAME)
766 searched_class_id = None
768 for class_id in range(class_count):
770 if name_idx == searched_idx:
771 # We found the class we're looking for!
772 searched_class_id = class_id
773 _ = u30() # super_name idx
775 if flags & 0x08 != 0: # Protected namespace is present
776 protected_ns_idx = u30()
778 for _c2 in range(intrf_count):
782 for _c2 in range(trait_count):
783 _ = parse_traits_info()
785 if searched_class_id is None:
786 raise ExtractorError(u'Target class %r not found' %
791 for class_id in range(class_count):
794 for _c2 in range(trait_count):
795 trait_methods = parse_traits_info()
796 if class_id == searched_class_id:
797 method_names.update(trait_methods.items())
798 method_idxs.update(dict(
800 for name, idx in trait_methods.items()))
804 for _c in range(script_count):
807 for _c2 in range(trait_count):
808 _ = parse_traits_info()
811 method_body_count = u30()
812 Method = collections.namedtuple('Method', ['code', 'local_count'])
814 for _c in range(method_body_count):
818 init_scope_depth = u30()
819 max_scope_depth = u30()
821 code = read_bytes(code_length)
822 if method_idx in method_idxs:
823 m = Method(code, local_count)
824 methods[method_idxs[method_idx]] = m
825 exception_count = u30()
826 for _c2 in range(exception_count):
833 for _c2 in range(trait_count):
834 _ = parse_traits_info()
836 assert p + code_reader.tell() == len(code_tag)
837 assert len(methods) == len(method_idxs)
839 method_pyfunctions = {}
841 def extract_function(func_name):
842 if func_name in method_pyfunctions:
843 return method_pyfunctions[func_name]
844 if func_name not in methods:
845 raise ExtractorError(u'Cannot find function %r' % func_name)
846 m = methods[func_name]
849 registers = ['(this)'] + list(args) + [None] * m.local_count
851 coder = io.BytesIO(m.code)
853 opcode = struct.unpack('!B', coder.read(1))[0]
854 if opcode == 36: # pushbyte
855 v = struct.unpack('!B', coder.read(1))[0]
857 elif opcode == 44: # pushstring
859 stack.append(constant_strings[idx])
860 elif opcode == 48: # pushscope
861 # We don't implement the scope register, so we'll just
862 # ignore the popped value
864 elif opcode == 70: # callproperty
866 mname = multinames[index]
867 arg_count = u30(coder)
868 args = list(reversed(
869 [stack.pop() for _ in range(arg_count)]))
871 if mname == u'split':
872 assert len(args) == 1
873 assert isinstance(args[0], compat_str)
874 assert isinstance(obj, compat_str)
878 res = obj.split(args[0])
880 elif mname == u'slice':
881 assert len(args) == 1
882 assert isinstance(args[0], int)
883 assert isinstance(obj, list)
886 elif mname == u'join':
887 assert len(args) == 1
888 assert isinstance(args[0], compat_str)
889 assert isinstance(obj, list)
890 res = args[0].join(obj)
892 elif mname in method_pyfunctions:
893 stack.append(method_pyfunctions[mname](args))
895 raise NotImplementedError(
896 u'Unsupported property %r on %r'
898 elif opcode == 72: # returnvalue
901 elif opcode == 79: # callpropvoid
903 mname = multinames[index]
904 arg_count = u30(coder)
905 args = list(reversed(
906 [stack.pop() for _ in range(arg_count)]))
908 if mname == u'reverse':
909 assert isinstance(obj, list)
912 raise NotImplementedError(
913 u'Unsupported (void) property %r on %r'
915 elif opcode == 93: # findpropstrict
917 mname = multinames[index]
918 res = extract_function(mname)
920 elif opcode == 97: # setproperty
925 assert isinstance(obj, list)
926 assert isinstance(idx, int)
928 elif opcode == 98: # getlocal
930 stack.append(registers[index])
931 elif opcode == 99: # setlocal
934 registers[index] = value
935 elif opcode == 102: # getproperty
937 pname = multinames[index]
938 if pname == u'length':
940 assert isinstance(obj, list)
941 stack.append(len(obj))
942 else: # Assume attribute access
944 assert isinstance(idx, int)
946 assert isinstance(obj, list)
947 stack.append(obj[idx])
948 elif opcode == 128: # coerce
950 elif opcode == 133: # coerce_s
951 assert isinstance(stack[-1], (type(None), compat_str))
952 elif opcode == 164: # modulo
955 res = value1 % value2
957 elif opcode == 208: # getlocal_0
958 stack.append(registers[0])
959 elif opcode == 209: # getlocal_1
960 stack.append(registers[1])
961 elif opcode == 210: # getlocal_2
962 stack.append(registers[2])
963 elif opcode == 211: # getlocal_3
964 stack.append(registers[3])
965 elif opcode == 214: # setlocal_2
966 registers[2] = stack.pop()
967 elif opcode == 215: # setlocal_3
968 registers[3] = stack.pop()
970 raise NotImplementedError(
971 u'Unsupported opcode %d' % opcode)
973 method_pyfunctions[func_name] = resfunc
976 initial_function = extract_function(u'decipher')
977 return lambda s: initial_function([s])
979 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
980 """Turn the encrypted s field into a working signature"""
982 if player_url is not None:
984 if player_url not in self._player_cache:
985 func = self._extract_signature_function(
988 self._player_cache[player_url] = func
989 return self._player_cache[player_url](s)
990 except Exception as e:
991 tb = traceback.format_exc()
992 self._downloader.report_warning(
993 u'Automatic signature extraction failed: ' + tb)
995 self._downloader.report_warning(
996 u'Warning: Falling back to static signature algorithm')
997 return self._static_decrypt_signature(s)
999 def _static_decrypt_signature(self, s):
1001 # The videos with age protection use another player, so the
1002 # algorithms can be different.
1004 return s[2:63] + s[82] + s[64:82] + s[63]
1007 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
1009 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
1011 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
1013 return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
1015 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
1017 return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
1019 return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
1021 return s[81:36:-1] + s[0] + s[35:2:-1]
1023 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
1025 return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
1027 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1029 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
1031 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
1034 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
1036 def _decrypt_signature_age_gate(self, s):
1037 # The videos with age protection use another player, so the algorithms
1040 return s[2:63] + s[82] + s[64:82] + s[63]
1042 # Fallback to the other algortihms
1043 return self._decrypt_signature(s)
1045 def _get_available_subtitles(self, video_id):
1047 sub_list = self._download_webpage(
1048 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
1049 video_id, note=False)
1050 except ExtractorError as err:
1051 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
1053 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
1058 params = compat_urllib_parse.urlencode({
1061 'fmt': self._downloader.params.get('subtitlesformat'),
1063 url = u'http://www.youtube.com/api/timedtext?' + params
1064 sub_lang_list[lang] = url
1065 if not sub_lang_list:
1066 self._downloader.report_warning(u'video doesn\'t have subtitles')
1068 return sub_lang_list
1070 def _get_available_automatic_caption(self, video_id, webpage):
1071 """We need the webpage for getting the captions url, pass it as an
1072 argument to speed up the process."""
1073 sub_format = self._downloader.params.get('subtitlesformat')
1074 self.to_screen(u'%s: Looking for automatic captions' % video_id)
1075 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
1076 err_msg = u'Couldn\'t find automatic captions for %s' % video_id
1078 self._downloader.report_warning(err_msg)
1080 player_config = json.loads(mobj.group(1))
1082 args = player_config[u'args']
1083 caption_url = args[u'ttsurl']
1084 timestamp = args[u'timestamp']
1085 # We get the available subtitles
1086 list_params = compat_urllib_parse.urlencode({
1091 list_url = caption_url + '&' + list_params
1092 list_page = self._download_webpage(list_url, video_id)
1093 caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
1094 original_lang_node = caption_list.find('track')
1095 if original_lang_node.attrib.get('kind') != 'asr' :
1096 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
1098 original_lang = original_lang_node.attrib['lang_code']
1101 for lang_node in caption_list.findall('target'):
1102 sub_lang = lang_node.attrib['lang_code']
1103 params = compat_urllib_parse.urlencode({
1104 'lang': original_lang,
1110 sub_lang_list[sub_lang] = caption_url + '&' + params
1111 return sub_lang_list
1112 # An extractor error can be raise by the download process if there are
1113 # no automatic captions but there are subtitles
1114 except (KeyError, ExtractorError):
1115 self._downloader.report_warning(err_msg)
1118 def _print_formats(self, formats):
1119 print('Available formats:')
1121 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
1122 self._video_dimensions.get(x, '???'),
1123 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
1125 def _extract_id(self, url):
1126 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1128 raise ExtractorError(u'Invalid URL: %s' % url)
1129 video_id = mobj.group(2)
1132 def _get_video_url_list(self, url_map):
1134 Transform a dictionary in the format {itag:url} to a list of (itag, url)
1135 with the requested formats.
1137 req_format = self._downloader.params.get('format', None)
1138 format_limit = self._downloader.params.get('format_limit', None)
1139 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1140 if format_limit is not None and format_limit in available_formats:
1141 format_list = available_formats[available_formats.index(format_limit):]
1143 format_list = available_formats
1144 existing_formats = [x for x in format_list if x in url_map]
1145 if len(existing_formats) == 0:
1146 raise ExtractorError(u'no known formats available for video')
1147 if self._downloader.params.get('listformats', None):
1148 self._print_formats(existing_formats)
1150 if req_format is None or req_format == 'best':
1151 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1152 elif req_format == 'worst':
1153 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
1154 elif req_format in ('-1', 'all'):
1155 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1157 # Specific formats. We pick the first in a slash-delimeted sequence.
1158 # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
1159 # available in the specified format. For example,
1160 # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1161 # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
1162 # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
1163 req_formats = req_format.split('/')
1164 video_url_list = None
1165 for rf in req_formats:
1167 video_url_list = [(rf, url_map[rf])]
1169 if rf in self._video_formats_map:
1170 for srf in self._video_formats_map[rf]:
1172 video_url_list = [(srf, url_map[srf])]
1177 if video_url_list is None:
1178 raise ExtractorError(u'requested format not available')
1179 return video_url_list
1181 def _extract_from_m3u8(self, manifest_url, video_id):
1183 def _get_urls(_manifest):
1184 lines = _manifest.split('\n')
1185 urls = filter(lambda l: l and not l.startswith('#'),
1188 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
1189 formats_urls = _get_urls(manifest)
1190 for format_url in formats_urls:
1191 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1192 url_map[itag] = format_url
1195 def _real_extract(self, url):
1196 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
1197 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
1199 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1200 mobj = re.search(self._NEXT_URL_RE, url)
1202 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
1203 video_id = self._extract_id(url)
1206 self.report_video_webpage_download(video_id)
1207 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
1208 request = compat_urllib_request.Request(url)
1210 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
1211 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1212 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
1214 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
1216 # Attempt to extract SWF player URL
1217 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1218 if mobj is not None:
1219 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1224 self.report_video_info_webpage_download(video_id)
1225 if re.search(r'player-age-gate-content">', video_webpage) is not None:
1226 self.report_age_confirmation()
1228 # We simulate the access to the video from www.youtube.com/v/{video_id}
1229 # this can be viewed without login into Youtube
1230 data = compat_urllib_parse.urlencode({'video_id': video_id,
1234 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1238 video_info_url = 'https://www.youtube.com/get_video_info?' + data
1239 video_info_webpage = self._download_webpage(video_info_url, video_id,
1241 errnote='unable to download video info webpage')
1242 video_info = compat_parse_qs(video_info_webpage)
1245 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1246 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1247 % (video_id, el_type))
1248 video_info_webpage = self._download_webpage(video_info_url, video_id,
1250 errnote='unable to download video info webpage')
1251 video_info = compat_parse_qs(video_info_webpage)
1252 if 'token' in video_info:
1254 if 'token' not in video_info:
1255 if 'reason' in video_info:
1256 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
1258 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
1260 # Check for "rental" videos
1261 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1262 raise ExtractorError(u'"rental" videos not supported')
1264 # Start extracting information
1265 self.report_information_extraction(video_id)
1268 if 'author' not in video_info:
1269 raise ExtractorError(u'Unable to extract uploader name')
1270 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
1273 video_uploader_id = None
1274 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1275 if mobj is not None:
1276 video_uploader_id = mobj.group(1)
1278 self._downloader.report_warning(u'unable to extract uploader nickname')
1281 if 'title' not in video_info:
1282 raise ExtractorError(u'Unable to extract video title')
1283 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
1286 # We try first to get a high quality image:
1287 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1288 video_webpage, re.DOTALL)
1289 if m_thumb is not None:
1290 video_thumbnail = m_thumb.group(1)
1291 elif 'thumbnail_url' not in video_info:
1292 self._downloader.report_warning(u'unable to extract video thumbnail')
1293 video_thumbnail = ''
1294 else: # don't panic if we can't find it
1295 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
1299 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1300 if mobj is not None:
1301 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1302 upload_date = unified_strdate(upload_date)
1305 video_description = get_element_by_id("eow-description", video_webpage)
1306 if video_description:
1307 video_description = clean_html(video_description)
1309 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1311 video_description = unescapeHTML(fd_mobj.group(1))
1313 video_description = u''
1316 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1318 if self._downloader.params.get('listsubtitles', False):
1319 self._list_available_subtitles(video_id, video_webpage)
1322 if 'length_seconds' not in video_info:
1323 self._downloader.report_warning(u'unable to extract video duration')
1326 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
1328 # Decide which formats to download
1331 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
1333 raise ValueError('Could not find vevo ID')
1334 info = json.loads(mobj.group(1))
1336 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
1337 # this signatures are encrypted
1338 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
1340 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
1341 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
1342 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
1344 if 'url_encoded_fmt_stream_map' in video_info:
1345 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
1347 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
1348 elif 'adaptive_fmts' in video_info:
1349 if 'url_encoded_fmt_stream_map' in video_info:
1350 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
1352 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
1356 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1357 self.report_rtmp_download()
1358 video_url_list = [(None, video_info['conn'][0])]
1359 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1360 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
1361 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1363 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
1364 url_data = compat_parse_qs(url_data_str)
1365 if 'itag' in url_data and 'url' in url_data:
1366 url = url_data['url'][0]
1367 if 'sig' in url_data:
1368 url += '&signature=' + url_data['sig'][0]
1369 elif 's' in url_data:
1370 encrypted_sig = url_data['s'][0]
1371 if self._downloader.params.get('verbose'):
1373 player_version = self._search_regex(
1375 player_url if player_url else None,
1376 'flash player', fatal=False)
1377 player_desc = 'flash player %s' % player_version
1379 player_version = self._search_regex(
1380 r'html5player-(.+?)\.js', video_webpage,
1381 'html5 player', fatal=False)
1382 player_desc = u'html5 player %s' % player_version
1384 parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
1385 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
1386 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
1389 jsplayer_url_json = self._search_regex(
1390 r'"assets":.+?"js":\s*("[^"]+")',
1391 video_webpage, u'JS player URL')
1392 player_url = json.loads(jsplayer_url_json)
1394 signature = self._decrypt_signature(
1395 encrypted_sig, video_id, player_url, age_gate)
1396 url += '&signature=' + signature
1397 if 'ratebypass' not in url:
1398 url += '&ratebypass=yes'
1399 url_map[url_data['itag'][0]] = url
1400 video_url_list = self._get_video_url_list(url_map)
1401 if not video_url_list:
1403 elif video_info.get('hlsvp'):
1404 manifest_url = video_info['hlsvp'][0]
1405 url_map = self._extract_from_m3u8(manifest_url, video_id)
1406 video_url_list = self._get_video_url_list(url_map)
1407 if not video_url_list:
1411 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
1414 for format_param, video_real_url in video_url_list:
1416 video_extension = self._video_extensions.get(format_param, 'flv')
1418 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
1419 self._video_dimensions.get(format_param, '???'),
1420 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
1424 'url': video_real_url,
1425 'uploader': video_uploader,
1426 'uploader_id': video_uploader_id,
1427 'upload_date': upload_date,
1428 'title': video_title,
1429 'ext': video_extension,
1430 'format': video_format,
1431 'thumbnail': video_thumbnail,
1432 'description': video_description,
1433 'player_url': player_url,
1434 'subtitles': video_subtitles,
1435 'duration': video_duration
1439 class YoutubePlaylistIE(InfoExtractor):
1440 IE_DESC = u'YouTube.com playlists'
1441 _VALID_URL = r"""(?:
1446 (?:course|view_play_list|my_playlists|artist|playlist|watch)
1447 \? (?:.*?&)*? (?:p|a|list)=
1450 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
1453 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
1455 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
1457 IE_NAME = u'youtube:playlist'
1460 def suitable(cls, url):
1461 """Receives a URL and returns True if suitable for this IE."""
1462 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1464 def _real_extract(self, url):
1465 # Extract playlist id
1466 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
1468 raise ExtractorError(u'Invalid URL: %s' % url)
1470 # Download playlist videos from API
1471 playlist_id = mobj.group(1) or mobj.group(2)
1474 for page_num in itertools.count(1):
1475 start_index = self._MAX_RESULTS * (page_num - 1) + 1
1476 if start_index >= 1000:
1477 self._downloader.report_warning(u'Max number of results reached')
1479 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
1480 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
1483 response = json.loads(page)
1484 except ValueError as err:
1485 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1487 if 'feed' not in response:
1488 raise ExtractorError(u'Got a malformed response from YouTube API')
1489 playlist_title = response['feed']['title']['$t']
1490 if 'entry' not in response['feed']:
1491 # Number of videos is a multiple of self._MAX_RESULTS
1494 for entry in response['feed']['entry']:
1495 index = entry['yt$position']['$t']
1496 if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
1499 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
1502 videos = [v[1] for v in sorted(videos)]
1504 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
1505 return [self.playlist_result(url_results, playlist_id, playlist_title)]
1508 class YoutubeChannelIE(InfoExtractor):
1509 IE_DESC = u'YouTube.com channels'
1510 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
1511 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
1512 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
1513 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
1514 IE_NAME = u'youtube:channel'
1516 def extract_videos_from_page(self, page):
1518 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
1519 if mobj.group(1) not in ids_in_page:
1520 ids_in_page.append(mobj.group(1))
1523 def _real_extract(self, url):
1524 # Extract channel id
1525 mobj = re.match(self._VALID_URL, url)
1527 raise ExtractorError(u'Invalid URL: %s' % url)
1529 # Download channel page
1530 channel_id = mobj.group(1)
1534 url = self._TEMPLATE_URL % (channel_id, pagenum)
1535 page = self._download_webpage(url, channel_id,
1536 u'Downloading page #%s' % pagenum)
1538 # Extract video identifiers
1539 ids_in_page = self.extract_videos_from_page(page)
1540 video_ids.extend(ids_in_page)
1542 # Download any subsequent channel pages using the json-based channel_ajax query
1543 if self._MORE_PAGES_INDICATOR in page:
1544 for pagenum in itertools.count(1):
1545 url = self._MORE_PAGES_URL % (pagenum, channel_id)
1546 page = self._download_webpage(url, channel_id,
1547 u'Downloading page #%s' % pagenum)
1549 page = json.loads(page)
1551 ids_in_page = self.extract_videos_from_page(page['content_html'])
1552 video_ids.extend(ids_in_page)
1554 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
1557 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
1559 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
1560 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
1561 return [self.playlist_result(url_entries, channel_id)]
1564 class YoutubeUserIE(InfoExtractor):
1565 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
1566 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
1567 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1568 _GDATA_PAGE_SIZE = 50
1569 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
1570 IE_NAME = u'youtube:user'
1573 def suitable(cls, url):
1574 # Don't return True if the url can be extracted with other youtube
1575 # extractor, the regex would is too permissive and it would match.
1576 other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1577 if any(ie.suitable(url) for ie in other_ies): return False
1578 else: return super(YoutubeUserIE, cls).suitable(url)
1580 def _real_extract(self, url):
1582 mobj = re.match(self._VALID_URL, url)
1584 raise ExtractorError(u'Invalid URL: %s' % url)
1586 username = mobj.group(1)
1588 # Download video ids using YouTube Data API. Result size per
1589 # query is limited (currently to 50 videos) so we need to query
1590 # page by page until there are no video ids - it means we got
1595 for pagenum in itertools.count(0):
1596 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1598 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1599 page = self._download_webpage(gdata_url, username,
1600 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1603 response = json.loads(page)
1604 except ValueError as err:
1605 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
1606 if 'entry' not in response['feed']:
1607 # Number of videos is a multiple of self._MAX_RESULTS
1610 # Extract video identifiers
1612 for entry in response['feed']['entry']:
1613 ids_in_page.append(entry['id']['$t'].split('/')[-1])
1614 video_ids.extend(ids_in_page)
1616 # A little optimization - if current page is not
1617 # "full", ie. does not contain PAGE_SIZE video ids then
1618 # we can assume that this page is the last one - there
1619 # are no more ids on further pages - no need to query
1622 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1625 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1626 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1627 return [self.playlist_result(url_results, playlist_title = username)]
1629 class YoutubeSearchIE(SearchInfoExtractor):
1630 IE_DESC = u'YouTube.com searches'
1631 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1633 IE_NAME = u'youtube:search'
1634 _SEARCH_KEY = 'ytsearch'
1636 def report_download_page(self, query, pagenum):
1637 """Report attempt to download search page with given number."""
1638 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1640 def _get_n_results(self, query, n):
1641 """Get a specified number of results for a query"""
1647 while (50 * pagenum) < limit:
1648 self.report_download_page(query, pagenum+1)
1649 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1650 request = compat_urllib_request.Request(result_url)
1652 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1653 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1654 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1655 api_response = json.loads(data)['data']
1657 if not 'items' in api_response:
1658 raise ExtractorError(u'[youtube] No video results')
1660 new_ids = list(video['id'] for video in api_response['items'])
1661 video_ids += new_ids
1663 limit = min(n, api_response['totalItems'])
1666 if len(video_ids) > n:
1667 video_ids = video_ids[:n]
1668 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1669 return self.playlist_result(videos, query)
1672 class YoutubeShowIE(InfoExtractor):
1673 IE_DESC = u'YouTube.com (multi-season) shows'
1674 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1675 IE_NAME = u'youtube:show'
1677 def _real_extract(self, url):
1678 mobj = re.match(self._VALID_URL, url)
1679 show_name = mobj.group(1)
1680 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1681 # There's one playlist for each season of the show
1682 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1683 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1684 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1687 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1689 Base class for extractors that fetch info from
1690 http://www.youtube.com/feed_ajax
1691 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1693 _LOGIN_REQUIRED = True
1695 # use action_load_personal_feed instead of action_load_system_feed
1696 _PERSONAL_FEED = False
1699 def _FEED_TEMPLATE(self):
1700 action = 'action_load_system_feed'
1701 if self._PERSONAL_FEED:
1702 action = 'action_load_personal_feed'
1703 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1707 return u'youtube:%s' % self._FEED_NAME
1709 def _real_initialize(self):
1712 def _real_extract(self, url):
1714 # The step argument is available only in 2.7 or higher
1715 for i in itertools.count(0):
1716 paging = i*self._PAGING_STEP
1717 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1718 u'%s feed' % self._FEED_NAME,
1719 u'Downloading page %s' % i)
1720 info = json.loads(info)
1721 feed_html = info['feed_html']
1722 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1723 ids = orderedSet(m.group(1) for m in m_ids)
1724 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1725 if info['paging'] is None:
1727 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1729 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1730 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1731 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1732 _FEED_NAME = 'subscriptions'
1733 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1735 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1736 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1737 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1738 _FEED_NAME = 'recommended'
1739 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1741 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1742 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1743 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1744 _FEED_NAME = 'watch_later'
1745 _PLAYLIST_TITLE = u'Youtube Watch Later'
1747 _PERSONAL_FEED = True
1749 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1750 IE_NAME = u'youtube:favorites'
1751 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1752 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
1753 _LOGIN_REQUIRED = True
1755 def _real_extract(self, url):
1756 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1757 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1758 return self.url_result(playlist_id, 'YoutubePlaylist')