[allocine] Update tests
[youtube-dl] / youtube_dl / extractor / youtube.py
index f868b19290a65ec0992ebbf6da80cafc7699f072..623056bd962782fddfc2ed3841e2c5e826a35507 100644 (file)
@@ -7,13 +7,14 @@ import itertools
 import json
 import os.path
 import re
 import json
 import os.path
 import re
-import string
 import struct
 import traceback
 import zlib
 
 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 import struct
 import traceback
 import zlib
 
 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
+from ..jsinterp import JSInterpreter
+from ..swfinterp import SWFInterpreter
 from ..utils import (
     compat_chr,
     compat_parse_qs,
 from ..utils import (
     compat_chr,
     compat_parse_qs,
@@ -151,6 +152,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                              )
                          ))
                          |youtu\.be/                                          # just youtu.be/xxxx
                              )
                          ))
                          |youtu\.be/                                          # just youtu.be/xxxx
+                         |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
@@ -176,32 +178,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
 
         # 3d videos
 
 
         # 3d videos
-        '82': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
-        '83': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
-        '84': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
-        '85': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': '3D', 'preference': -20},
-        '100': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': '3D', 'preference': -20},
-        '101': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': '3D', 'preference': -20},
-        '102': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': '3D', 'preference': -20},
+        '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
+        '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
+        '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
+        '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
+        '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
+        '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
+        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
 
         # Apple HTTP Live Streaming
 
         # Apple HTTP Live Streaming
-        '92': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
-        '93': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'HLS', 'preference': -10},
-        '94': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'HLS', 'preference': -10},
-        '95': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'HLS', 'preference': -10},
-        '96': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'HLS', 'preference': -10},
-        '132': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'HLS', 'preference': -10},
-        '151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
+        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
+        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
+        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
+        '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
+        '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
+        '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
+        '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
 
         # DASH mp4 video
 
         # DASH mp4 video
-        '133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40},
-        '134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40},
-        '135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
-        '136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
-        '137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
-        '138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
-        '160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
-        '264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
+        '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 
         # Dash mp4 audio
         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
 
         # Dash mp4 audio
         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
@@ -209,23 +211,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
 
         # Dash webm
         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
 
         # Dash webm
-        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
-        '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
-        '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '245': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '246': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
-        '247': {'ext': 'webm', 'height': 720, 'resolution': '720p', 'format_note': 'DASH webm', 'preference': -40},
-        '248': {'ext': 'webm', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH webm', 'preference': -40},
+        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 
         # Dash webm audio
 
         # Dash webm audio
-        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
-        '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
+        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
+        '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
 
         # RTMP (unnamed)
         '_rtmp': {'protocol': 'rtmp'},
 
         # RTMP (unnamed)
         '_rtmp': {'protocol': 'rtmp'},
@@ -241,7 +245,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 u"uploader": u"Philipp Hagemeister",
                 u"uploader_id": u"phihag",
                 u"upload_date": u"20121002",
                 u"uploader": u"Philipp Hagemeister",
                 u"uploader_id": u"phihag",
                 u"upload_date": u"20121002",
-                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
+                u"categories": [u'Science & Technology'],
             }
         },
         {
             }
         },
         {
@@ -251,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             u"info_dict": {
                 u"upload_date": u"20120506",
                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
             u"info_dict": {
                 u"upload_date": u"20120506",
                 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:5b292926389560516e384ac437c0ec07",
+                u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
                 u"uploader": u"Icona Pop",
                 u"uploader_id": u"IconaPop"
             }
                 u"uploader": u"Icona Pop",
                 u"uploader_id": u"IconaPop"
             }
@@ -303,7 +308,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 u'id': u'IB3lcPjvWLA',
                 u'ext': u'm4a',
                 u'title': u'Afrojack - The Spark ft. Spree Wilson',
                 u'id': u'IB3lcPjvWLA',
                 u'ext': u'm4a',
                 u'title': u'Afrojack - The Spark ft. Spree Wilson',
-                u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
+                u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
                 u'uploader': u'AfrojackVEVO',
                 u'uploader_id': u'AfrojackVEVO',
                 u'upload_date': u'20131011',
                 u'uploader': u'AfrojackVEVO',
                 u'uploader_id': u'AfrojackVEVO',
                 u'upload_date': u'20131011',
@@ -343,8 +348,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         self.to_screen(u'RTMP download detected')
 
     def _extract_signature_function(self, video_id, player_url, slen):
         self.to_screen(u'RTMP download detected')
 
     def _extract_signature_function(self, video_id, player_url, slen):
-        id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
-                        player_url)
+        id_m = re.match(
+            r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
+            player_url)
         player_type = id_m.group('ext')
         player_id = id_m.group('id')
 
         player_type = id_m.group('ext')
         player_id = id_m.group('id')
 
@@ -437,598 +443,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
 
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
-            r'signature=([a-zA-Z]+)', jscode,
-            u'Initial JS player signature function name')
-
-        functions = {}
-
-        def argidx(varname):
-            return string.lowercase.index(varname)
-
-        def interpret_statement(stmt, local_vars, allow_recursion=20):
-            if allow_recursion < 0:
-                raise ExtractorError(u'Recursion limit reached')
-
-            if stmt.startswith(u'var '):
-                stmt = stmt[len(u'var '):]
-            ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
-                             r'=(?P<expr>.*)$', stmt)
-            if ass_m:
-                if ass_m.groupdict().get('index'):
-                    def assign(val):
-                        lvar = local_vars[ass_m.group('out')]
-                        idx = interpret_expression(ass_m.group('index'),
-                                                   local_vars, allow_recursion)
-                        assert isinstance(idx, int)
-                        lvar[idx] = val
-                        return val
-                    expr = ass_m.group('expr')
-                else:
-                    def assign(val):
-                        local_vars[ass_m.group('out')] = val
-                        return val
-                    expr = ass_m.group('expr')
-            elif stmt.startswith(u'return '):
-                assign = lambda v: v
-                expr = stmt[len(u'return '):]
-            else:
-                raise ExtractorError(
-                    u'Cannot determine left side of statement in %r' % stmt)
-
-            v = interpret_expression(expr, local_vars, allow_recursion)
-            return assign(v)
-
-        def interpret_expression(expr, local_vars, allow_recursion):
-            if expr.isdigit():
-                return int(expr)
-
-            if expr.isalpha():
-                return local_vars[expr]
-
-            m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
-            if m:
-                member = m.group('member')
-                val = local_vars[m.group('in')]
-                if member == 'split("")':
-                    return list(val)
-                if member == 'join("")':
-                    return u''.join(val)
-                if member == 'length':
-                    return len(val)
-                if member == 'reverse()':
-                    return val[::-1]
-                slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
-                if slice_m:
-                    idx = interpret_expression(
-                        slice_m.group('idx'), local_vars, allow_recursion-1)
-                    return val[idx:]
-
-            m = re.match(
-                r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
-            if m:
-                val = local_vars[m.group('in')]
-                idx = interpret_expression(m.group('idx'), local_vars,
-                                           allow_recursion-1)
-                return val[idx]
-
-            m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
-            if m:
-                a = interpret_expression(m.group('a'),
-                                         local_vars, allow_recursion)
-                b = interpret_expression(m.group('b'),
-                                         local_vars, allow_recursion)
-                return a % b
-
-            m = re.match(
-                r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
-            if m:
-                fname = m.group('func')
-                if fname not in functions:
-                    functions[fname] = extract_function(fname)
-                argvals = [int(v) if v.isdigit() else local_vars[v]
-                           for v in m.group('args').split(',')]
-                return functions[fname](argvals)
-            raise ExtractorError(u'Unsupported JS expression %r' % expr)
-
-        def extract_function(funcname):
-            func_m = re.search(
-                r'function ' + re.escape(funcname) +
-                r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
-                jscode)
-            argnames = func_m.group('args').split(',')
-
-            def resf(args):
-                local_vars = dict(zip(argnames, args))
-                for stmt in func_m.group('code').split(';'):
-                    res = interpret_statement(stmt, local_vars)
-                return res
-            return resf
-
-        initial_function = extract_function(funcname)
+            r'signature=([$a-zA-Z]+)', jscode,
+             u'Initial JS player signature function name')
+
+        jsi = JSInterpreter(jscode)
+        initial_function = jsi.extract_function(funcname)
         return lambda s: initial_function([s])
 
     def _parse_sig_swf(self, file_contents):
         return lambda s: initial_function([s])
 
     def _parse_sig_swf(self, file_contents):
-        if file_contents[1:3] != b'WS':
-            raise ExtractorError(
-                u'Not an SWF file; header is %r' % file_contents[:3])
-        if file_contents[:1] == b'C':
-            content = zlib.decompress(file_contents[8:])
-        else:
-            raise NotImplementedError(u'Unsupported compression format %r' %
-                                      file_contents[:1])
-
-        def extract_tags(content):
-            pos = 0
-            while pos < len(content):
-                header16 = struct.unpack('<H', content[pos:pos+2])[0]
-                pos += 2
-                tag_code = header16 >> 6
-                tag_len = header16 & 0x3f
-                if tag_len == 0x3f:
-                    tag_len = struct.unpack('<I', content[pos:pos+4])[0]
-                    pos += 4
-                assert pos+tag_len <= len(content)
-                yield (tag_code, content[pos:pos+tag_len])
-                pos += tag_len
-
-        code_tag = next(tag
-                        for tag_code, tag in extract_tags(content)
-                        if tag_code == 82)
-        p = code_tag.index(b'\0', 4) + 1
-        code_reader = io.BytesIO(code_tag[p:])
-
-        # Parse ABC (AVM2 ByteCode)
-        def read_int(reader=None):
-            if reader is None:
-                reader = code_reader
-            res = 0
-            shift = 0
-            for _ in range(5):
-                buf = reader.read(1)
-                assert len(buf) == 1
-                b = struct.unpack('<B', buf)[0]
-                res = res | ((b & 0x7f) << shift)
-                if b & 0x80 == 0:
-                    break
-                shift += 7
-            return res
-
-        def u30(reader=None):
-            res = read_int(reader)
-            assert res & 0xf0000000 == 0
-            return res
-        u32 = read_int
-
-        def s32(reader=None):
-            v = read_int(reader)
-            if v & 0x80000000 != 0:
-                v = - ((v ^ 0xffffffff) + 1)
-            return v
-
-        def read_string(reader=None):
-            if reader is None:
-                reader = code_reader
-            slen = u30(reader)
-            resb = reader.read(slen)
-            assert len(resb) == slen
-            return resb.decode('utf-8')
-
-        def read_bytes(count, reader=None):
-            if reader is None:
-                reader = code_reader
-            resb = reader.read(count)
-            assert len(resb) == count
-            return resb
-
-        def read_byte(reader=None):
-            resb = read_bytes(1, reader=reader)
-            res = struct.unpack('<B', resb)[0]
-            return res
-
-        # minor_version + major_version
-        read_bytes(2 + 2)
-
-        # Constant pool
-        int_count = u30()
-        for _c in range(1, int_count):
-            s32()
-        uint_count = u30()
-        for _c in range(1, uint_count):
-            u32()
-        double_count = u30()
-        read_bytes((double_count-1) * 8)
-        string_count = u30()
-        constant_strings = [u'']
-        for _c in range(1, string_count):
-            s = read_string()
-            constant_strings.append(s)
-        namespace_count = u30()
-        for _c in range(1, namespace_count):
-            read_bytes(1)  # kind
-            u30()  # name
-        ns_set_count = u30()
-        for _c in range(1, ns_set_count):
-            count = u30()
-            for _c2 in range(count):
-                u30()
-        multiname_count = u30()
-        MULTINAME_SIZES = {
-            0x07: 2,  # QName
-            0x0d: 2,  # QNameA
-            0x0f: 1,  # RTQName
-            0x10: 1,  # RTQNameA
-            0x11: 0,  # RTQNameL
-            0x12: 0,  # RTQNameLA
-            0x09: 2,  # Multiname
-            0x0e: 2,  # MultinameA
-            0x1b: 1,  # MultinameL
-            0x1c: 1,  # MultinameLA
-        }
-        multinames = [u'']
-        for _c in range(1, multiname_count):
-            kind = u30()
-            assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
-            if kind == 0x07:
-                u30()  # namespace_idx
-                name_idx = u30()
-                multinames.append(constant_strings[name_idx])
-            else:
-                multinames.append('[MULTINAME kind: %d]' % kind)
-                for _c2 in range(MULTINAME_SIZES[kind]):
-                    u30()
-
-        # Methods
-        method_count = u30()
-        MethodInfo = collections.namedtuple(
-            'MethodInfo',
-            ['NEED_ARGUMENTS', 'NEED_REST'])
-        method_infos = []
-        for method_id in range(method_count):
-            param_count = u30()
-            u30()  # return type
-            for _ in range(param_count):
-                u30()  # param type
-            u30()  # name index (always 0 for youtube)
-            flags = read_byte()
-            if flags & 0x08 != 0:
-                # Options present
-                option_count = u30()
-                for c in range(option_count):
-                    u30()  # val
-                    read_bytes(1)  # kind
-            if flags & 0x80 != 0:
-                # Param names present
-                for _ in range(param_count):
-                    u30()  # param name
-            mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
-            method_infos.append(mi)
-
-        # Metadata
-        metadata_count = u30()
-        for _c in range(metadata_count):
-            u30()  # name
-            item_count = u30()
-            for _c2 in range(item_count):
-                u30()  # key
-                u30()  # value
-
-        def parse_traits_info():
-            trait_name_idx = u30()
-            kind_full = read_byte()
-            kind = kind_full & 0x0f
-            attrs = kind_full >> 4
-            methods = {}
-            if kind in [0x00, 0x06]:  # Slot or Const
-                u30()  # Slot id
-                u30()  # type_name_idx
-                vindex = u30()
-                if vindex != 0:
-                    read_byte()  # vkind
-            elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
-                u30()  # disp_id
-                method_idx = u30()
-                methods[multinames[trait_name_idx]] = method_idx
-            elif kind == 0x04:  # Class
-                u30()  # slot_id
-                u30()  # classi
-            elif kind == 0x05:  # Function
-                u30()  # slot_id
-                function_idx = u30()
-                methods[function_idx] = multinames[trait_name_idx]
-            else:
-                raise ExtractorError(u'Unsupported trait kind %d' % kind)
-
-            if attrs & 0x4 != 0:  # Metadata present
-                metadata_count = u30()
-                for _c3 in range(metadata_count):
-                    u30()  # metadata index
-
-            return methods
-
-        # Classes
+        swfi = SWFInterpreter(file_contents)
         TARGET_CLASSNAME = u'SignatureDecipher'
         TARGET_CLASSNAME = u'SignatureDecipher'
-        searched_idx = multinames.index(TARGET_CLASSNAME)
-        searched_class_id = None
-        class_count = u30()
-        for class_id in range(class_count):
-            name_idx = u30()
-            if name_idx == searched_idx:
-                # We found the class we're looking for!
-                searched_class_id = class_id
-            u30()  # super_name idx
-            flags = read_byte()
-            if flags & 0x08 != 0:  # Protected namespace is present
-                u30()  # protected_ns_idx
-            intrf_count = u30()
-            for _c2 in range(intrf_count):
-                u30()
-            u30()  # iinit
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-
-        if searched_class_id is None:
-            raise ExtractorError(u'Target class %r not found' %
-                                 TARGET_CLASSNAME)
-
-        method_names = {}
-        method_idxs = {}
-        for class_id in range(class_count):
-            u30()  # cinit
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                trait_methods = parse_traits_info()
-                if class_id == searched_class_id:
-                    method_names.update(trait_methods.items())
-                    method_idxs.update(dict(
-                        (idx, name)
-                        for name, idx in trait_methods.items()))
-
-        # Scripts
-        script_count = u30()
-        for _c in range(script_count):
-            u30()  # init
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-
-        # Method bodies
-        method_body_count = u30()
-        Method = collections.namedtuple('Method', ['code', 'local_count'])
-        methods = {}
-        for _c in range(method_body_count):
-            method_idx = u30()
-            u30()  # max_stack
-            local_count = u30()
-            u30()  # init_scope_depth
-            u30()  # max_scope_depth
-            code_length = u30()
-            code = read_bytes(code_length)
-            if method_idx in method_idxs:
-                m = Method(code, local_count)
-                methods[method_idxs[method_idx]] = m
-            exception_count = u30()
-            for _c2 in range(exception_count):
-                u30()  # from
-                u30()  # to
-                u30()  # target
-                u30()  # exc_type
-                u30()  # var_name
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-
-        assert p + code_reader.tell() == len(code_tag)
-        assert len(methods) == len(method_idxs)
-
-        method_pyfunctions = {}
-
-        def extract_function(func_name):
-            if func_name in method_pyfunctions:
-                return method_pyfunctions[func_name]
-            if func_name not in methods:
-                raise ExtractorError(u'Cannot find function %r' % func_name)
-            m = methods[func_name]
-
-            def resfunc(args):
-                registers = ['(this)'] + list(args) + [None] * m.local_count
-                stack = []
-                coder = io.BytesIO(m.code)
-                while True:
-                    opcode = struct.unpack('!B', coder.read(1))[0]
-                    if opcode == 36:  # pushbyte
-                        v = struct.unpack('!B', coder.read(1))[0]
-                        stack.append(v)
-                    elif opcode == 44:  # pushstring
-                        idx = u30(coder)
-                        stack.append(constant_strings[idx])
-                    elif opcode == 48:  # pushscope
-                        # We don't implement the scope register, so we'll just
-                        # ignore the popped value
-                        stack.pop()
-                    elif opcode == 70:  # callproperty
-                        index = u30(coder)
-                        mname = multinames[index]
-                        arg_count = u30(coder)
-                        args = list(reversed(
-                            [stack.pop() for _ in range(arg_count)]))
-                        obj = stack.pop()
-                        if mname == u'split':
-                            assert len(args) == 1
-                            assert isinstance(args[0], compat_str)
-                            assert isinstance(obj, compat_str)
-                            if args[0] == u'':
-                                res = list(obj)
-                            else:
-                                res = obj.split(args[0])
-                            stack.append(res)
-                        elif mname == u'slice':
-                            assert len(args) == 1
-                            assert isinstance(args[0], int)
-                            assert isinstance(obj, list)
-                            res = obj[args[0]:]
-                            stack.append(res)
-                        elif mname == u'join':
-                            assert len(args) == 1
-                            assert isinstance(args[0], compat_str)
-                            assert isinstance(obj, list)
-                            res = args[0].join(obj)
-                            stack.append(res)
-                        elif mname in method_pyfunctions:
-                            stack.append(method_pyfunctions[mname](args))
-                        else:
-                            raise NotImplementedError(
-                                u'Unsupported property %r on %r'
-                                % (mname, obj))
-                    elif opcode == 72:  # returnvalue
-                        res = stack.pop()
-                        return res
-                    elif opcode == 79:  # callpropvoid
-                        index = u30(coder)
-                        mname = multinames[index]
-                        arg_count = u30(coder)
-                        args = list(reversed(
-                            [stack.pop() for _ in range(arg_count)]))
-                        obj = stack.pop()
-                        if mname == u'reverse':
-                            assert isinstance(obj, list)
-                            obj.reverse()
-                        else:
-                            raise NotImplementedError(
-                                u'Unsupported (void) property %r on %r'
-                                % (mname, obj))
-                    elif opcode == 93:  # findpropstrict
-                        index = u30(coder)
-                        mname = multinames[index]
-                        res = extract_function(mname)
-                        stack.append(res)
-                    elif opcode == 97:  # setproperty
-                        index = u30(coder)
-                        value = stack.pop()
-                        idx = stack.pop()
-                        obj = stack.pop()
-                        assert isinstance(obj, list)
-                        assert isinstance(idx, int)
-                        obj[idx] = value
-                    elif opcode == 98:  # getlocal
-                        index = u30(coder)
-                        stack.append(registers[index])
-                    elif opcode == 99:  # setlocal
-                        index = u30(coder)
-                        value = stack.pop()
-                        registers[index] = value
-                    elif opcode == 102:  # getproperty
-                        index = u30(coder)
-                        pname = multinames[index]
-                        if pname == u'length':
-                            obj = stack.pop()
-                            assert isinstance(obj, list)
-                            stack.append(len(obj))
-                        else:  # Assume attribute access
-                            idx = stack.pop()
-                            assert isinstance(idx, int)
-                            obj = stack.pop()
-                            assert isinstance(obj, list)
-                            stack.append(obj[idx])
-                    elif opcode == 128:  # coerce
-                        u30(coder)
-                    elif opcode == 133:  # coerce_s
-                        assert isinstance(stack[-1], (type(None), compat_str))
-                    elif opcode == 164:  # modulo
-                        value2 = stack.pop()
-                        value1 = stack.pop()
-                        res = value1 % value2
-                        stack.append(res)
-                    elif opcode == 208:  # getlocal_0
-                        stack.append(registers[0])
-                    elif opcode == 209:  # getlocal_1
-                        stack.append(registers[1])
-                    elif opcode == 210:  # getlocal_2
-                        stack.append(registers[2])
-                    elif opcode == 211:  # getlocal_3
-                        stack.append(registers[3])
-                    elif opcode == 214:  # setlocal_2
-                        registers[2] = stack.pop()
-                    elif opcode == 215:  # setlocal_3
-                        registers[3] = stack.pop()
-                    else:
-                        raise NotImplementedError(
-                            u'Unsupported opcode %d' % opcode)
-
-            method_pyfunctions[func_name] = resfunc
-            return resfunc
-
-        initial_function = extract_function(u'decipher')
+        searched_class = swfi.extract_class(TARGET_CLASSNAME)
+        initial_function = swfi.extract_function(searched_class, u'decipher')
         return lambda s: initial_function([s])
 
     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
         """Turn the encrypted s field into a working signature"""
 
         return lambda s: initial_function([s])
 
     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
         """Turn the encrypted s field into a working signature"""
 
-        if player_url is not None:
-            if player_url.startswith(u'//'):
-                player_url = u'https:' + player_url
-            try:
-                player_id = (player_url, len(s))
-                if player_id not in self._player_cache:
-                    func = self._extract_signature_function(
-                        video_id, player_url, len(s)
-                    )
-                    self._player_cache[player_id] = func
-                func = self._player_cache[player_id]
-                if self._downloader.params.get('youtube_print_sig_code'):
-                    self._print_sig_code(func, len(s))
-                return func(s)
-            except Exception:
-                tb = traceback.format_exc()
-                self._downloader.report_warning(
-                    u'Automatic signature extraction failed: ' + tb)
-
-            self._downloader.report_warning(
-                u'Warning: Falling back to static signature algorithm')
-
-        return self._static_decrypt_signature(
-            s, video_id, player_url, age_gate)
-
-    def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
-        if age_gate:
-            # The videos with age protection use another player, so the
-            # algorithms can be different.
-            if len(s) == 86:
-                return s[2:63] + s[82] + s[64:82] + s[63]
-
-        if len(s) == 93:
-            return s[86:29:-1] + s[88] + s[28:5:-1]
-        elif len(s) == 92:
-            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
-        elif len(s) == 91:
-            return s[84:27:-1] + s[86] + s[26:5:-1]
-        elif len(s) == 90:
-            return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
-        elif len(s) == 89:
-            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
-        elif len(s) == 88:
-            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
-        elif len(s) == 87:
-            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
-        elif len(s) == 86:
-            return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
-        elif len(s) == 85:
-            return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
-        elif len(s) == 84:
-            return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
-        elif len(s) == 83:
-            return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
-        elif len(s) == 82:
-            return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
-        elif len(s) == 81:
-            return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
-        elif len(s) == 80:
-            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
-        elif len(s) == 79:
-            return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
+        if player_url is None:
+            raise ExtractorError(u'Cannot decrypt signature without player_url')
 
 
-        else:
-            raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
+        if player_url.startswith(u'//'):
+            player_url = u'https:' + player_url
+        try:
+            player_id = (player_url, len(s))
+            if player_id not in self._player_cache:
+                func = self._extract_signature_function(
+                    video_id, player_url, len(s)
+                )
+                self._player_cache[player_id] = func
+            func = self._player_cache[player_id]
+            if self._downloader.params.get('youtube_print_sig_code'):
+                self._print_sig_code(func, len(s))
+            return func(s)
+        except Exception as e:
+            tb = traceback.format_exc()
+            raise ExtractorError(
+                u'Automatic signature extraction failed: ' + tb, cause=e)
 
     def _get_available_subtitles(self, video_id, webpage):
         try:
 
     def _get_available_subtitles(self, video_id, webpage):
         try:
@@ -1130,14 +581,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
 
     def _real_extract(self, url):
         return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
 
     def _real_extract(self, url):
+        proto = (
+            u'http' if self._downloader.params.get('prefer_insecure', False)
+            else u'https')
+
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         mobj = re.search(self._NEXT_URL_RE, url)
         if mobj:
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         mobj = re.search(self._NEXT_URL_RE, url)
         if mobj:
-            url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+            url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
         video_id = self.extract_id(url)
 
         # Get video webpage
         video_id = self.extract_id(url)
 
         # Get video webpage
-        url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
         video_webpage = self._download_webpage(url, video_id)
 
         # Attempt to extract SWF player URL
         video_webpage = self._download_webpage(url, video_id)
 
         # Attempt to extract SWF player URL
@@ -1162,7 +617,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                                                   'asv': 3,
                                                   'sts':'1588',
                                                   })
                                                   'asv': 3,
                                                   'sts':'1588',
                                                   })
-            video_info_url = 'https://www.youtube.com/get_video_info?' + data
+            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
             video_info_webpage = self._download_webpage(video_info_url, video_id,
                                     note=False,
                                     errnote='unable to download video info webpage')
             video_info_webpage = self._download_webpage(video_info_url, video_id,
                                     note=False,
                                     errnote='unable to download video info webpage')
@@ -1170,7 +625,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         else:
             age_gate = False
             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
         else:
             age_gate = False
             for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
-                video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+                video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
                         % (video_id, el_type))
                 video_info_webpage = self._download_webpage(video_info_url, video_id,
                                         note=False,
                         % (video_id, el_type))
                 video_info_webpage = self._download_webpage(video_info_url, video_id,
                                         note=False,
@@ -1180,9 +635,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                     break
         if 'token' not in video_info:
             if 'reason' in video_info:
                     break
         if 'token' not in video_info:
             if 'reason' in video_info:
-                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
+                raise ExtractorError(
+                    u'YouTube said: %s' % video_info['reason'][0],
+                    expected=True, video_id=video_id)
             else:
             else:
-                raise ExtractorError(u'"token" parameter not in video info for unknown reason')
+                raise ExtractorError(
+                    u'"token" parameter not in video info for unknown reason',
+                    video_id=video_id)
 
         if 'view_count' in video_info:
             view_count = int(video_info['view_count'][0])
 
         if 'view_count' in video_info:
             view_count = int(video_info['view_count'][0])
@@ -1211,7 +670,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # title
         if 'title' in video_info:
 
         # title
         if 'title' in video_info:
-            video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
+            video_title = video_info['title'][0]
         else:
             self._downloader.report_warning(u'Unable to extract video title')
             video_title = u'_'
         else:
             self._downloader.report_warning(u'Unable to extract video title')
             video_title = u'_'
@@ -1230,11 +689,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # upload date
         upload_date = None
 
         # upload date
         upload_date = None
-        mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
+        mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
+        if mobj is None:
+            mobj = re.search(
+                r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>',
+                video_webpage)
         if mobj is not None:
             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
             upload_date = unified_strdate(upload_date)
 
         if mobj is not None:
             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
             upload_date = unified_strdate(upload_date)
 
+        m_cat_container = get_element_by_id("eow-category", video_webpage)
+        if m_cat_container:
+            category = self._html_search_regex(
+                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
+                default=None)
+            video_categories = None if category is None else [category]
+        else:
+            video_categories = None
+
         # description
         video_description = get_element_by_id("eow-description", video_webpage)
         if video_description:
         # description
         video_description = get_element_by_id("eow-description", video_webpage)
         if video_description:
@@ -1285,10 +757,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # Decide which formats to download
         try:
 
         # Decide which formats to download
         try:
-            mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
+            mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
             if not mobj:
                 raise ValueError('Could not find vevo ID')
             if not mobj:
                 raise ValueError('Could not find vevo ID')
-            ytplayer_config = json.loads(mobj.group(1))
+            json_code = uppercase_escape(mobj.group(1))
+            ytplayer_config = json.loads(json_code)
             args = ytplayer_config['args']
             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
             # this signatures are encrypted
             args = ytplayer_config['args']
             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
             # this signatures are encrypted
@@ -1342,31 +815,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                         url += '&signature=' + url_data['sig'][0]
                     elif 's' in url_data:
                         encrypted_sig = url_data['s'][0]
                         url += '&signature=' + url_data['sig'][0]
                     elif 's' in url_data:
                         encrypted_sig = url_data['s'][0]
+
+                        if not age_gate:
+                            jsplayer_url_json = self._search_regex(
+                                r'"assets":.+?"js":\s*("[^"]+")',
+                                video_webpage, u'JS player URL')
+                            player_url = json.loads(jsplayer_url_json)
+                        if player_url is None:
+                            player_url_json = self._search_regex(
+                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+                                video_webpage, u'age gate player URL')
+                            player_url = json.loads(player_url_json)
+
                         if self._downloader.params.get('verbose'):
                         if self._downloader.params.get('verbose'):
-                            if age_gate:
-                                if player_url is None:
-                                    player_version = 'unknown'
-                                else:
+                            if player_url is None:
+                                player_version = 'unknown'
+                                player_desc = 'unknown'
+                            else:
+                                if player_url.endswith('swf'):
                                     player_version = self._search_regex(
                                         r'-(.+)\.swf$', player_url,
                                         u'flash player', fatal=False)
                                     player_version = self._search_regex(
                                         r'-(.+)\.swf$', player_url,
                                         u'flash player', fatal=False)
-                                player_desc = 'flash player %s' % player_version
-                            else:
-                                player_version = self._search_regex(
-                                    r'html5player-(.+?)\.js', video_webpage,
-                                    'html5 player', fatal=False)
-                                player_desc = u'html5 player %s' % player_version
+                                    player_desc = 'flash player %s' % player_version
+                                else:
+                                    player_version = self._search_regex(
+                                        r'html5player-(.+?)\.js', video_webpage,
+                                        'html5 player', fatal=False)
+                                    player_desc = u'html5 player %s' % player_version
 
                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
 
 
                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
 
-                        if not age_gate:
-                            jsplayer_url_json = self._search_regex(
-                                r'"assets":.+?"js":\s*("[^"]+")',
-                                video_webpage, u'JS player URL')
-                            player_url = json.loads(jsplayer_url_json)
-
                         signature = self._decrypt_signature(
                             encrypted_sig, video_id, player_url, age_gate)
                         url += '&signature=' + signature
                         signature = self._decrypt_signature(
                             encrypted_sig, video_id, player_url, age_gate)
                         url += '&signature=' + signature
@@ -1440,11 +920,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             'title':        video_title,
             'thumbnail':    video_thumbnail,
             'description':  video_description,
             'title':        video_title,
             'thumbnail':    video_thumbnail,
             'description':  video_description,
+            'categories':   video_categories,
             'subtitles':    video_subtitles,
             'duration':     video_duration,
             'age_limit':    18 if age_gate else 0,
             'annotations':  video_annotations,
             'subtitles':    video_subtitles,
             'duration':     video_duration,
             'age_limit':    18 if age_gate else 0,
             'annotations':  video_annotations,
-            'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+            'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
             'view_count':   view_count,
             'like_count': like_count,
             'dislike_count': dislike_count,
             'view_count':   view_count,
             'like_count': like_count,
             'dislike_count': dislike_count,
@@ -1463,13 +944,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                         |  p/
                         )
                         (
                         |  p/
                         )
                         (
-                            (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+                            (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
                             # Top tracks, they can also include dots 
                             |(?:MC)[\w\.]*
                         )
                         .*
                      |
                             # Top tracks, they can also include dots 
                             |(?:MC)[\w\.]*
                         )
                         .*
                      |
-                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
+                        ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                      )"""
     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
                      )"""
     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
     _MORE_PAGES_INDICATOR = r'data-link-type="next"'
@@ -1492,12 +973,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         title_span = (search_title('playlist-title') or
             search_title('title long-title') or search_title('title'))
         title = clean_html(title_span)
         title_span = (search_title('playlist-title') or
             search_title('title long-title') or search_title('title'))
         title = clean_html(title_span)
-        video_re = r'''(?x)data-index="\d+".*?
-                       data-video-username="(.*?)".*?
+        video_re = r'''(?x)data-video-username=".*?".*?
                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
-        matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
-        # Some of the videos may have beend deleted, their username field is empty
-        ids = [video_id for (username, video_id) in matches if username]
+        ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
         url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)
         url_results = self._ids_to_results(ids)
 
         return self.playlist_result(url_results, playlist_id, title)
@@ -1517,7 +995,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
                 return self.url_result(video_id, 'Youtube', video_id=video_id)
             else:
                 self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
                 return self.url_result(video_id, 'Youtube', video_id=video_id)
             else:
-                self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+                self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 
         if playlist_id.startswith('RD'):
             # Mixes require a custom extraction process
 
         if playlist_id.startswith('RD'):
             # Mixes require a custom extraction process
@@ -1530,6 +1008,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
         page = self._download_webpage(url, playlist_id)
         more_widget_html = content_html = page
 
         page = self._download_webpage(url, playlist_id)
         more_widget_html = content_html = page
 
+        # Check if the playlist exists or is private
+        if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
+            raise ExtractorError(
+                u'The playlist doesn\'t exist or is private, use --username or '
+                '--netrc to access it.',
+                expected=True)
+
         # Extract the video ids from the playlist pages
         ids = []
 
         # Extract the video ids from the playlist pages
         ids = []
 
@@ -1545,12 +1030,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                 break
 
             more = self._download_json(
                 break
 
             more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
             content_html = more['content_html']
             more_widget_html = more['load_more_widget_html']
 
         playlist_title = self._html_search_regex(
             content_html = more['content_html']
             more_widget_html = more['load_more_widget_html']
 
         playlist_title = self._html_search_regex(
-                r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
+            r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
+            page, u'title')
 
         url_results = self._ids_to_results(ids)
         return self.playlist_result(url_results, playlist_id, playlist_title)
 
         url_results = self._ids_to_results(ids)
         return self.playlist_result(url_results, playlist_id, playlist_title)
@@ -1646,7 +1134,7 @@ class YoutubeChannelIE(InfoExtractor):
 
 class YoutubeUserIE(InfoExtractor):
     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
 
 class YoutubeUserIE(InfoExtractor):
     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
     _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
     _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1708,7 +1196,7 @@ class YoutubeUserIE(InfoExtractor):
 
 class YoutubeSearchIE(SearchInfoExtractor):
     IE_DESC = u'YouTube.com searches'
 
 class YoutubeSearchIE(SearchInfoExtractor):
     IE_DESC = u'YouTube.com searches'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
     _MAX_RESULTS = 1000
     IE_NAME = u'youtube:search'
     _SEARCH_KEY = 'ytsearch'
     _MAX_RESULTS = 1000
     IE_NAME = u'youtube:search'
     _SEARCH_KEY = 'ytsearch'
@@ -1719,9 +1207,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
         video_ids = []
         pagenum = 0
         limit = n
         video_ids = []
         pagenum = 0
         limit = n
+        PAGE_SIZE = 50
 
 
-        while (50 * pagenum) < limit:
-            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+        while (PAGE_SIZE * pagenum) < limit:
+            result_url = self._API_URL % (
+                compat_urllib_parse.quote_plus(query.encode('utf-8')),
+                (PAGE_SIZE * pagenum) + 1)
             data_json = self._download_webpage(
                 result_url, video_id=u'query "%s"' % query,
                 note=u'Downloading page %s' % (pagenum + 1),
             data_json = self._download_webpage(
                 result_url, video_id=u'query "%s"' % query,
                 note=u'Downloading page %s' % (pagenum + 1),
@@ -1745,12 +1236,50 @@ class YoutubeSearchIE(SearchInfoExtractor):
                   for video_id in video_ids]
         return self.playlist_result(videos, query)
 
                   for video_id in video_ids]
         return self.playlist_result(videos, query)
 
+
 class YoutubeSearchDateIE(YoutubeSearchIE):
     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
     _SEARCH_KEY = 'ytsearchdate'
     IE_DESC = u'YouTube.com searches, newest videos first'
 
 class YoutubeSearchDateIE(YoutubeSearchIE):
     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
     _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
     _SEARCH_KEY = 'ytsearchdate'
     IE_DESC = u'YouTube.com searches, newest videos first'
 
+
+class YoutubeSearchURLIE(InfoExtractor):
+    IE_DESC = u'YouTube.com search URLs'
+    IE_NAME = u'youtube:search_url'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        query = compat_urllib_parse.unquote_plus(mobj.group('query'))
+
+        webpage = self._download_webpage(url, query)
+        result_code = self._search_regex(
+            r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML')
+
+        part_codes = re.findall(
+            r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
+        entries = []
+        for part_code in part_codes:
+            part_title = self._html_search_regex(
+                [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
+            part_url_snippet = self._html_search_regex(
+                r'(?s)href="([^"]+)"', part_code, 'item URL')
+            part_url = compat_urlparse.urljoin(
+                'https://www.youtube.com/', part_url_snippet)
+            entries.append({
+                '_type': 'url',
+                'url': part_url,
+                'title': part_title,
+            })
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'title': query,
+        }
+
+
 class YoutubeShowIE(InfoExtractor):
     IE_DESC = u'YouTube.com (multi-season) shows'
     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
 class YoutubeShowIE(InfoExtractor):
     IE_DESC = u'YouTube.com (multi-season) shows'
     _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
@@ -1794,23 +1323,25 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
         feed_entries = []
         paging = 0
         for i in itertools.count(1):
         feed_entries = []
         paging = 0
         for i in itertools.count(1):
-            info = self._download_webpage(self._FEED_TEMPLATE % paging,
+            info = self._download_json(self._FEED_TEMPLATE % paging,
                                           u'%s feed' % self._FEED_NAME,
                                           u'Downloading page %s' % i)
                                           u'%s feed' % self._FEED_NAME,
                                           u'Downloading page %s' % i)
-            info = json.loads(info)
-            feed_html = info['feed_html']
+            feed_html = info.get('feed_html') or info.get('content_html')
             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
             ids = orderedSet(m.group(1) for m in m_ids)
             feed_entries.extend(
                 self.url_result(video_id, 'Youtube', video_id=video_id)
                 for video_id in ids)
             m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
             ids = orderedSet(m.group(1) for m in m_ids)
             feed_entries.extend(
                 self.url_result(video_id, 'Youtube', video_id=video_id)
                 for video_id in ids)
-            if info['paging'] is None:
+            mobj = re.search(
+                r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
+                feed_html)
+            if mobj is None:
                 break
                 break
-            paging = info['paging']
+            paging = mobj.group('paging')
         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 
 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
         return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
 
 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
     _FEED_NAME = 'subscriptions'
     _PLAYLIST_TITLE = u'Youtube Subscriptions'
     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
     _FEED_NAME = 'subscriptions'
     _PLAYLIST_TITLE = u'Youtube Subscriptions'
@@ -1851,10 +1382,21 @@ class YoutubeTruncatedURLIE(InfoExtractor):
     IE_NAME = 'youtube:truncated_url'
     IE_DESC = False  # Do not list
     _VALID_URL = r'''(?x)
     IE_NAME = 'youtube:truncated_url'
     IE_DESC = False  # Do not list
     _VALID_URL = r'''(?x)
-        (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$|
+        (?:https?://)?[^/]+/watch\?(?:
+            feature=[a-z_]+|
+            annotation_id=annotation_[^&]+
+        )?$|
         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
     '''
 
         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
     '''
 
+    _TESTS = [{
+        'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.youtube.com/watch?',
+        'only_matching': True,
+    }]
+
     def _real_extract(self, url):
         raise ExtractorError(
             u'Did you forget to quote the URL? Remember that & is a meta '
     def _real_extract(self, url):
         raise ExtractorError(
             u'Did you forget to quote the URL? Remember that & is a meta '