_ Git - youtube-dl/blob - youtube_dl/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5
   6 import itertools
   7 import json
   8 import os.path
   9 import re
  10 import time
  11 import traceback
  12
  13 from .common import InfoExtractor, SearchInfoExtractor
  14 from ..jsinterp import JSInterpreter
  15 from ..swfinterp import SWFInterpreter
  16 from ..compat import (
  17     compat_chr,
  18     compat_parse_qs,
  19     compat_urllib_parse,
  20     compat_urllib_parse_unquote,
  21     compat_urllib_parse_unquote_plus,
  22     compat_urllib_parse_urlparse,
  23     compat_urlparse,
  24     compat_str,
  25 )
  26 from ..utils import (
  27     clean_html,
  28     encode_dict,
  29     ExtractorError,
  30     float_or_none,
  31     get_element_by_attribute,
  32     get_element_by_id,
  33     int_or_none,
  34     orderedSet,
  35     parse_duration,
  36     remove_quotes,
  37     remove_start,
  38     sanitized_Request,
  39     smuggle_url,
  40     str_to_int,
  41     unescapeHTML,
  42     unified_strdate,
  43     unsmuggle_url,
  44     uppercase_escape,
  45     ISO3166Utils,
  46 )
  47
  48
  49 class YoutubeBaseInfoExtractor(InfoExtractor):
  50     """Provide base functions for Youtube extractors"""
  51     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  52     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  53     _NETRC_MACHINE = 'youtube'
  54     # If True it will raise an error if no login info is provided
  55     _LOGIN_REQUIRED = False
  56
  57     def _set_language(self):
  58         self._set_cookie(
  59             '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  60             # YouTube sets the expire time to about two months
  61             expire_time=time.time() + 2 * 30 * 24 * 3600)
  62
  63     def _ids_to_results(self, ids):
  64         return [
  65             self.url_result(vid_id, 'Youtube', video_id=vid_id)
  66             for vid_id in ids]
  67
  68     def _login(self):
  69         """
  70         Attempt to log in to YouTube.
  71         True is returned if successful or skipped.
  72         False is returned if login failed.
  73
  74         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  75         """
  76         (username, password) = self._get_login_info()
  77         # No authentication to be performed
  78         if username is None:
  79             if self._LOGIN_REQUIRED:
  80                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  81             return True
  82
  83         login_page = self._download_webpage(
  84             self._LOGIN_URL, None,
  85             note='Downloading login page',
  86             errnote='unable to fetch login page', fatal=False)
  87         if login_page is False:
  88             return
  89
  90         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
  91                                   login_page, 'Login GALX parameter')
  92
  93         # Log in
  94         login_form_strs = {
  95             'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
  96             'Email': username,
  97             'GALX': galx,
  98             'Passwd': password,
  99
 100             'PersistentCookie': 'yes',
 101             '_utf8': '霱',
 102             'bgresponse': 'js_disabled',
 103             'checkConnection': '',
 104             'checkedDomains': 'youtube',
 105             'dnConn': '',
 106             'pstMsg': '0',
 107             'rmShown': '1',
 108             'secTok': '',
 109             'signIn': 'Sign in',
 110             'timeStmp': '',
 111             'service': 'youtube',
 112             'uilel': '3',
 113             'hl': 'en_US',
 114         }
 115
 116         login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
 117
 118         req = sanitized_Request(self._LOGIN_URL, login_data)
 119         login_results = self._download_webpage(
 120             req, None,
 121             note='Logging in', errnote='unable to log in', fatal=False)
 122         if login_results is False:
 123             return False
 124
 125         if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
 126             raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
 127
 128         # Two-Factor
 129         # TODO add SMS and phone call support - these require making a request and then prompting the user
 130
 131         if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
 132             tfa_code = self._get_tfa_info('2-step verification code')
 133
 134             if not tfa_code:
 135                 self._downloader.report_warning(
 136                     'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 137                     '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 138                 return False
 139
 140             tfa_code = remove_start(tfa_code, 'G-')
 141
 142             tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
 143
 144             tfa_form_strs.update({
 145                 'Pin': tfa_code,
 146                 'TrustDevice': 'on',
 147             })
 148
 149             tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
 150
 151             tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
 152             tfa_results = self._download_webpage(
 153                 tfa_req, None,
 154                 note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
 155
 156             if tfa_results is False:
 157                 return False
 158
 159             if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
 160                 self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
 161                 return False
 162             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
 163                 self._downloader.report_warning('unable to log in - did the page structure change?')
 164                 return False
 165             if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
 166                 self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
 167                 return False
 168
 169         if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
 170             self._downloader.report_warning('unable to log in: bad username or password')
 171             return False
 172         return True
 173
 174     def _real_initialize(self):
 175         if self._downloader is None:
 176             return
 177         self._set_language()
 178         if not self._login():
 179             return
 180
 181
 182 class YoutubeEntryListBaseInfoExtractor(InfoExtractor):
 183     # Extract entries from page with "Load more" button
 184     def _entries(self, page, playlist_id):
 185         more_widget_html = content_html = page
 186         for page_num in itertools.count(1):
 187             for entry in self._process_page(content_html):
 188                 yield entry
 189
 190             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
 191             if not mobj:
 192                 break
 193
 194             more = self._download_json(
 195                 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
 196                 'Downloading page #%s' % page_num,
 197                 transform_source=uppercase_escape)
 198             content_html = more['content_html']
 199             if not content_html.strip():
 200                 # Some webpages show a "Load more" button but they don't
 201                 # have more videos
 202                 break
 203             more_widget_html = more['load_more_widget_html']
 204
 205
 206 class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 207     def _process_page(self, content):
 208         for video_id, video_title in self.extract_videos_from_page(content):
 209             yield self.url_result(video_id, 'Youtube', video_id, video_title)
 210
 211     def extract_videos_from_page(self, page):
 212         ids_in_page = []
 213         titles_in_page = []
 214         for mobj in re.finditer(self._VIDEO_RE, page):
 215             # The link with index 0 is not the first video of the playlist (not sure if still actual)
 216             if 'index' in mobj.groupdict() and mobj.group('id') == '0':
 217                 continue
 218             video_id = mobj.group('id')
 219             video_title = unescapeHTML(mobj.group('title'))
 220             if video_title:
 221                 video_title = video_title.strip()
 222             try:
 223                 idx = ids_in_page.index(video_id)
 224                 if video_title and not titles_in_page[idx]:
 225                     titles_in_page[idx] = video_title
 226             except ValueError:
 227                 ids_in_page.append(video_id)
 228                 titles_in_page.append(video_title)
 229         return zip(ids_in_page, titles_in_page)
 230
 231
 232 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 233     def _process_page(self, content):
 234         for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
 235             yield self.url_result(
 236                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
 237
 238     def _real_extract(self, url):
 239         playlist_id = self._match_id(url)
 240         webpage = self._download_webpage(url, playlist_id)
 241         title = self._og_search_title(webpage, fatal=False)
 242         return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
 243
 244
 245 class YoutubeIE(YoutubeBaseInfoExtractor):
 246     IE_DESC = 'YouTube.com'
 247     _VALID_URL = r"""(?x)^
 248                      (
 249                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 250                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
 251                             (?:www\.)?deturl\.com/www\.youtube\.com/|
 252                             (?:www\.)?pwnyoutube\.com/|
 253                             (?:www\.)?yourepeat\.com/|
 254                             tube\.majestyc\.net/|
 255                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
 256                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 257                          (?:                                                  # the various things that can precede the ID:
 258                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 259                              |(?:                                             # or the v= param in all its forms
 260                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 261                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 262                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 263                                  v=
 264                              )
 265                          ))
 266                          |(?:
 267                             youtu\.be|                                        # just youtu.be/xxxx
 268                             vid\.plus                                         # or vid.plus/xxxx
 269                          )/
 270                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 271                          )
 272                      )?                                                       # all until now is optional -> you can pass the naked ID
 273                      ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
 274                      (?!.*?&list=)                                            # combined list/video URLs are handled by the playlist IE
 275                      (?(1).+)?                                                # if we found the ID, everything can follow
 276                      $"""
 277     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 278     _formats = {
 279         '5': {'ext': 'flv', 'width': 400, 'height': 240},
 280         '6': {'ext': 'flv', 'width': 450, 'height': 270},
 281         '13': {'ext': '3gp'},
 282         '17': {'ext': '3gp', 'width': 176, 'height': 144},
 283         '18': {'ext': 'mp4', 'width': 640, 'height': 360},
 284         '22': {'ext': 'mp4', 'width': 1280, 'height': 720},
 285         '34': {'ext': 'flv', 'width': 640, 'height': 360},
 286         '35': {'ext': 'flv', 'width': 854, 'height': 480},
 287         '36': {'ext': '3gp', 'width': 320, 'height': 240},
 288         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
 289         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
 290         '43': {'ext': 'webm', 'width': 640, 'height': 360},
 291         '44': {'ext': 'webm', 'width': 854, 'height': 480},
 292         '45': {'ext': 'webm', 'width': 1280, 'height': 720},
 293         '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
 294         '59': {'ext': 'mp4', 'width': 854, 'height': 480},
 295         '78': {'ext': 'mp4', 'width': 854, 'height': 480},
 296
 297
 298         # 3d videos
 299         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
 300         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
 301         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
 302         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
 303         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
 304         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
 305         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
 306
 307         # Apple HTTP Live Streaming
 308         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 309         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
 310         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
 311         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
 312         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
 313         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
 314         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
 315
 316         # DASH mp4 video
 317         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 318         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 319         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 320         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 321         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 322         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
 323         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 324         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 325         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
 326         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
 327         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
 328
 329         # Dash mp4 audio
 330         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
 331         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
 332         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
 333
 334         # Dash webm
 335         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 336         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 337         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 338         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 339         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 340         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
 341         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
 342         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 343         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 344         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 345         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 346         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 347         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 348         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 349         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 350         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 351         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
 352         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
 353         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
 354         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
 355         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
 356         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
 357
 358         # Dash webm audio
 359         '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
 360         '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
 361
 362         # Dash webm audio with opus inside
 363         '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
 364         '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
 365         '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
 366
 367         # RTMP (unnamed)
 368         '_rtmp': {'protocol': 'rtmp'},
 369     }
 370
 371     IE_NAME = 'youtube'
 372     _TESTS = [
 373         {
 374             'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
 375             'info_dict': {
 376                 'id': 'BaW_jenozKc',
 377                 'ext': 'mp4',
 378                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 379                 'uploader': 'Philipp Hagemeister',
 380                 'uploader_id': 'phihag',
 381                 'upload_date': '20121002',
 382                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 383                 'categories': ['Science & Technology'],
 384                 'tags': ['youtube-dl'],
 385                 'like_count': int,
 386                 'dislike_count': int,
 387                 'start_time': 1,
 388                 'end_time': 9,
 389             }
 390         },
 391         {
 392             'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
 393             'note': 'Test generic use_cipher_signature video (#897)',
 394             'info_dict': {
 395                 'id': 'UxxajLWwzqY',
 396                 'ext': 'mp4',
 397                 'upload_date': '20120506',
 398                 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
 399                 'alt_title': 'I Love It (feat. Charli XCX)',
 400                 'description': 'md5:782e8651347686cba06e58f71ab51773',
 401                 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
 402                          'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
 403                          'iconic ep', 'iconic', 'love', 'it'],
 404                 'uploader': 'Icona Pop',
 405                 'uploader_id': 'IconaPop',
 406                 'creator': 'Icona Pop',
 407             }
 408         },
 409         {
 410             'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
 411             'note': 'Test VEVO video with age protection (#956)',
 412             'info_dict': {
 413                 'id': '07FYdnEawAQ',
 414                 'ext': 'mp4',
 415                 'upload_date': '20130703',
 416                 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
 417                 'alt_title': 'Tunnel Vision',
 418                 'description': 'md5:64249768eec3bc4276236606ea996373',
 419                 'uploader': 'justintimberlakeVEVO',
 420                 'uploader_id': 'justintimberlakeVEVO',
 421                 'creator': 'Justin Timberlake',
 422                 'age_limit': 18,
 423             }
 424         },
 425         {
 426             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 427             'note': 'Embed-only video (#1746)',
 428             'info_dict': {
 429                 'id': 'yZIXLfi8CZQ',
 430                 'ext': 'mp4',
 431                 'upload_date': '20120608',
 432                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 433                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 434                 'uploader': 'SET India',
 435                 'uploader_id': 'setindia',
 436                 'age_limit': 18,
 437             }
 438         },
 439         {
 440             'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
 441             'note': 'Use the first video ID in the URL',
 442             'info_dict': {
 443                 'id': 'BaW_jenozKc',
 444                 'ext': 'mp4',
 445                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 446                 'uploader': 'Philipp Hagemeister',
 447                 'uploader_id': 'phihag',
 448                 'upload_date': '20121002',
 449                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 450                 'categories': ['Science & Technology'],
 451                 'tags': ['youtube-dl'],
 452                 'like_count': int,
 453                 'dislike_count': int,
 454             },
 455             'params': {
 456                 'skip_download': True,
 457             },
 458         },
 459         {
 460             'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
 461             'note': '256k DASH audio (format 141) via DASH manifest',
 462             'info_dict': {
 463                 'id': 'a9LDPn-MO4I',
 464                 'ext': 'm4a',
 465                 'upload_date': '20121002',
 466                 'uploader_id': '8KVIDEO',
 467                 'description': '',
 468                 'uploader': '8KVIDEO',
 469                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 470             },
 471             'params': {
 472                 'youtube_include_dash_manifest': True,
 473                 'format': '141',
 474             },
 475         },
 476         # DASH manifest with encrypted signature
 477         {
 478             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
 479             'info_dict': {
 480                 'id': 'IB3lcPjvWLA',
 481                 'ext': 'm4a',
 482                 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
 483                 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
 484                 'uploader': 'AfrojackVEVO',
 485                 'uploader_id': 'AfrojackVEVO',
 486                 'upload_date': '20131011',
 487             },
 488             'params': {
 489                 'youtube_include_dash_manifest': True,
 490                 'format': '141',
 491             },
 492         },
 493         # JS player signature function name containing $
 494         {
 495             'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
 496             'info_dict': {
 497                 'id': 'nfWlot6h_JM',
 498                 'ext': 'm4a',
 499                 'title': 'Taylor Swift - Shake It Off',
 500                 'alt_title': 'Shake It Off',
 501                 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
 502                 'uploader': 'TaylorSwiftVEVO',
 503                 'uploader_id': 'TaylorSwiftVEVO',
 504                 'upload_date': '20140818',
 505                 'creator': 'Taylor Swift',
 506             },
 507             'params': {
 508                 'youtube_include_dash_manifest': True,
 509                 'format': '141',
 510             },
 511         },
 512         # Controversy video
 513         {
 514             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
 515             'info_dict': {
 516                 'id': 'T4XJQO3qol8',
 517                 'ext': 'mp4',
 518                 'upload_date': '20100909',
 519                 'uploader': 'The Amazing Atheist',
 520                 'uploader_id': 'TheAmazingAtheist',
 521                 'title': 'Burning Everyone\'s Koran',
 522                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
 523             }
 524         },
 525         # Normal age-gate video (No vevo, embed allowed)
 526         {
 527             'url': 'http://youtube.com/watch?v=HtVdAasjOgU',
 528             'info_dict': {
 529                 'id': 'HtVdAasjOgU',
 530                 'ext': 'mp4',
 531                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
 532                 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
 533                 'uploader': 'The Witcher',
 534                 'uploader_id': 'WitcherGame',
 535                 'upload_date': '20140605',
 536                 'age_limit': 18,
 537             },
 538         },
 539         # Age-gate video with encrypted signature
 540         {
 541             'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
 542             'info_dict': {
 543                 'id': '6kLq3WMV1nU',
 544                 'ext': 'mp4',
 545                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
 546                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
 547                 'uploader': 'LloydVEVO',
 548                 'uploader_id': 'LloydVEVO',
 549                 'upload_date': '20110629',
 550                 'age_limit': 18,
 551             },
 552         },
 553         # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
 554         {
 555             'url': '__2ABJjxzNo',
 556             'info_dict': {
 557                 'id': '__2ABJjxzNo',
 558                 'ext': 'mp4',
 559                 'upload_date': '20100430',
 560                 'uploader_id': 'deadmau5',
 561                 'creator': 'deadmau5',
 562                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
 563                 'uploader': 'deadmau5',
 564                 'title': 'Deadmau5 - Some Chords (HD)',
 565                 'alt_title': 'Some Chords',
 566             },
 567             'expected_warnings': [
 568                 'DASH manifest missing',
 569             ]
 570         },
 571         # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
 572         {
 573             'url': 'lqQg6PlCWgI',
 574             'info_dict': {
 575                 'id': 'lqQg6PlCWgI',
 576                 'ext': 'mp4',
 577                 'upload_date': '20150827',
 578                 'uploader_id': 'olympic',
 579                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
 580                 'uploader': 'Olympics',
 581                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
 582             },
 583             'params': {
 584                 'skip_download': 'requires avconv',
 585             }
 586         },
 587         # Non-square pixels
 588         {
 589             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
 590             'info_dict': {
 591                 'id': '_b-2C3KPAM0',
 592                 'ext': 'mp4',
 593                 'stretched_ratio': 16 / 9.,
 594                 'upload_date': '20110310',
 595                 'uploader_id': 'AllenMeow',
 596                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
 597                 'uploader': '孫艾倫',
 598                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
 599             },
 600         },
 601         # url_encoded_fmt_stream_map is empty string
 602         {
 603             'url': 'qEJwOuvDf7I',
 604             'info_dict': {
 605                 'id': 'qEJwOuvDf7I',
 606                 'ext': 'webm',
 607                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
 608                 'description': '',
 609                 'upload_date': '20150404',
 610                 'uploader_id': 'spbelect',
 611                 'uploader': 'Наблюдатели Петербурга',
 612             },
 613             'params': {
 614                 'skip_download': 'requires avconv',
 615             }
 616         },
 617         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
 618         {
 619             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
 620             'info_dict': {
 621                 'id': 'FIl7x6_3R5Y',
 622                 'ext': 'mp4',
 623                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
 624                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
 625                 'upload_date': '20150625',
 626                 'uploader_id': 'dorappi2000',
 627                 'uploader': 'dorappi2000',
 628                 'formats': 'mincount:33',
 629             },
 630         },
 631         # DASH manifest with segment_list
 632         {
 633             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
 634             'md5': '8ce563a1d667b599d21064e982ab9e31',
 635             'info_dict': {
 636                 'id': 'CsmdDsKjzN8',
 637                 'ext': 'mp4',
 638                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
 639                 'uploader': 'Airtek',
 640                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
 641                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
 642                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
 643             },
 644             'params': {
 645                 'youtube_include_dash_manifest': True,
 646                 'format': '135',  # bestvideo
 647             }
 648         },
 649         {
 650             # Multifeed videos (multiple cameras), URL is for Main Camera
 651             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
 652             'info_dict': {
 653                 'id': 'jqWvoWXjCVs',
 654                 'title': 'teamPGP: Rocket League Noob Stream',
 655                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
 656             },
 657             'playlist': [{
 658                 'info_dict': {
 659                     'id': 'jqWvoWXjCVs',
 660                     'ext': 'mp4',
 661                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
 662                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 663                     'upload_date': '20150721',
 664                     'uploader': 'Beer Games Beer',
 665                     'uploader_id': 'beergamesbeer',
 666                 },
 667             }, {
 668                 'info_dict': {
 669                     'id': '6h8e8xoXJzg',
 670                     'ext': 'mp4',
 671                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
 672                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 673                     'upload_date': '20150721',
 674                     'uploader': 'Beer Games Beer',
 675                     'uploader_id': 'beergamesbeer',
 676                 },
 677             }, {
 678                 'info_dict': {
 679                     'id': 'PUOgX5z9xZw',
 680                     'ext': 'mp4',
 681                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
 682                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 683                     'upload_date': '20150721',
 684                     'uploader': 'Beer Games Beer',
 685                     'uploader_id': 'beergamesbeer',
 686                 },
 687             }, {
 688                 'info_dict': {
 689                     'id': 'teuwxikvS5k',
 690                     'ext': 'mp4',
 691                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
 692                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
 693                     'upload_date': '20150721',
 694                     'uploader': 'Beer Games Beer',
 695                     'uploader_id': 'beergamesbeer',
 696                 },
 697             }],
 698             'params': {
 699                 'skip_download': True,
 700             },
 701         },
 702         {
 703             'url': 'http://vid.plus/FlRa-iH7PGw',
 704             'only_matching': True,
 705         },
 706         {
 707             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
 708             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
 709             'info_dict': {
 710                 'id': 'lsguqyKfVQg',
 711                 'ext': 'mp4',
 712                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
 713                 'alt_title': 'Dark Walk',
 714                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
 715                 'upload_date': '20151119',
 716                 'uploader_id': 'IronSoulElf',
 717                 'uploader': 'IronSoulElf',
 718                 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
 719             },
 720             'params': {
 721                 'skip_download': True,
 722             },
 723         },
 724         {
 725             # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
 726             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
 727             'only_matching': True,
 728         },
 729         {
 730             # Video with yt:stretch=17:0
 731             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
 732             'info_dict': {
 733                 'id': 'Q39EVAstoRM',
 734                 'ext': 'mp4',
 735                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
 736                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
 737                 'upload_date': '20151107',
 738                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
 739                 'uploader': 'CH GAMER DROID',
 740             },
 741             'params': {
 742                 'skip_download': True,
 743             },
 744         },
 745         {
 746             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
 747             'only_matching': True,
 748         }
 749     ]
 750
 751     def __init__(self, *args, **kwargs):
 752         super(YoutubeIE, self).__init__(*args, **kwargs)
 753         self._player_cache = {}
 754
 755     def report_video_info_webpage_download(self, video_id):
 756         """Report attempt to download video info webpage."""
 757         self.to_screen('%s: Downloading video info webpage' % video_id)
 758
 759     def report_information_extraction(self, video_id):
 760         """Report attempt to extract video information."""
 761         self.to_screen('%s: Extracting video information' % video_id)
 762
 763     def report_unavailable_format(self, video_id, format):
 764         """Report extracted video URL."""
 765         self.to_screen('%s: Format %s not available' % (video_id, format))
 766
 767     def report_rtmp_download(self):
 768         """Indicate the download will use the RTMP protocol."""
 769         self.to_screen('RTMP download detected')
 770
 771     def _signature_cache_id(self, example_sig):
 772         """ Return a string representation of a signature """
 773         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 774
 775     def _extract_signature_function(self, video_id, player_url, example_sig):
 776         id_m = re.match(
 777             r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
 778             player_url)
 779         if not id_m:
 780             raise ExtractorError('Cannot identify player %r' % player_url)
 781         player_type = id_m.group('ext')
 782         player_id = id_m.group('id')
 783
 784         # Read from filesystem cache
 785         func_id = '%s_%s_%s' % (
 786             player_type, player_id, self._signature_cache_id(example_sig))
 787         assert os.path.basename(func_id) == func_id
 788
 789         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
 790         if cache_spec is not None:
 791             return lambda s: ''.join(s[i] for i in cache_spec)
 792
 793         download_note = (
 794             'Downloading player %s' % player_url
 795             if self._downloader.params.get('verbose') else
 796             'Downloading %s player %s' % (player_type, player_id)
 797         )
 798         if player_type == 'js':
 799             code = self._download_webpage(
 800                 player_url, video_id,
 801                 note=download_note,
 802                 errnote='Download of %s failed' % player_url)
 803             res = self._parse_sig_js(code)
 804         elif player_type == 'swf':
 805             urlh = self._request_webpage(
 806                 player_url, video_id,
 807                 note=download_note,
 808                 errnote='Download of %s failed' % player_url)
 809             code = urlh.read()
 810             res = self._parse_sig_swf(code)
 811         else:
 812             assert False, 'Invalid player type %r' % player_type
 813
 814         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 815         cache_res = res(test_string)
 816         cache_spec = [ord(c) for c in cache_res]
 817
 818         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
 819         return res
 820
 821     def _print_sig_code(self, func, example_sig):
 822         def gen_sig_code(idxs):
 823             def _genslice(start, end, step):
 824                 starts = '' if start == 0 else str(start)
 825                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
 826                 steps = '' if step == 1 else (':%d' % step)
 827                 return 's[%s%s%s]' % (starts, ends, steps)
 828
 829             step = None
 830             # Quelch pyflakes warnings - start will be set when step is set
 831             start = '(Never used)'
 832             for i, prev in zip(idxs[1:], idxs[:-1]):
 833                 if step is not None:
 834                     if i - prev == step:
 835                         continue
 836                     yield _genslice(start, prev, step)
 837                     step = None
 838                     continue
 839                 if i - prev in [-1, 1]:
 840                     step = i - prev
 841                     start = prev
 842                     continue
 843                 else:
 844                     yield 's[%d]' % prev
 845             if step is None:
 846                 yield 's[%d]' % i
 847             else:
 848                 yield _genslice(start, i, step)
 849
 850         test_string = ''.join(map(compat_chr, range(len(example_sig))))
 851         cache_res = func(test_string)
 852         cache_spec = [ord(c) for c in cache_res]
 853         expr_code = ' + '.join(gen_sig_code(cache_spec))
 854         signature_id_tuple = '(%s)' % (
 855             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
 856         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
 857                 '    return %s\n') % (signature_id_tuple, expr_code)
 858         self.to_screen('Extracted signature function:\n' + code)
 859
 860     def _parse_sig_js(self, jscode):
 861         funcname = self._search_regex(
 862             r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
 863             'Initial JS player signature function name')
 864
 865         jsi = JSInterpreter(jscode)
 866         initial_function = jsi.extract_function(funcname)
 867         return lambda s: initial_function([s])
 868
 869     def _parse_sig_swf(self, file_contents):
 870         swfi = SWFInterpreter(file_contents)
 871         TARGET_CLASSNAME = 'SignatureDecipher'
 872         searched_class = swfi.extract_class(TARGET_CLASSNAME)
 873         initial_function = swfi.extract_function(searched_class, 'decipher')
 874         return lambda s: initial_function([s])
 875
 876     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
 877         """Turn the encrypted s field into a working signature"""
 878
 879         if player_url is None:
 880             raise ExtractorError('Cannot decrypt signature without player_url')
 881
 882         if player_url.startswith('//'):
 883             player_url = 'https:' + player_url
 884         try:
 885             player_id = (player_url, self._signature_cache_id(s))
 886             if player_id not in self._player_cache:
 887                 func = self._extract_signature_function(
 888                     video_id, player_url, s
 889                 )
 890                 self._player_cache[player_id] = func
 891             func = self._player_cache[player_id]
 892             if self._downloader.params.get('youtube_print_sig_code'):
 893                 self._print_sig_code(func, s)
 894             return func(s)
 895         except Exception as e:
 896             tb = traceback.format_exc()
 897             raise ExtractorError(
 898                 'Signature extraction failed: ' + tb, cause=e)
 899
 900     def _get_subtitles(self, video_id, webpage):
 901         try:
 902             subs_doc = self._download_xml(
 903                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
 904                 video_id, note=False)
 905         except ExtractorError as err:
 906             self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
 907             return {}
 908
 909         sub_lang_list = {}
 910         for track in subs_doc.findall('track'):
 911             lang = track.attrib['lang_code']
 912             if lang in sub_lang_list:
 913                 continue
 914             sub_formats = []
 915             for ext in ['sbv', 'vtt', 'srt']:
 916                 params = compat_urllib_parse.urlencode({
 917                     'lang': lang,
 918                     'v': video_id,
 919                     'fmt': ext,
 920                     'name': track.attrib['name'].encode('utf-8'),
 921                 })
 922                 sub_formats.append({
 923                     'url': 'https://www.youtube.com/api/timedtext?' + params,
 924                     'ext': ext,
 925                 })
 926             sub_lang_list[lang] = sub_formats
 927         if not sub_lang_list:
 928             self._downloader.report_warning('video doesn\'t have subtitles')
 929             return {}
 930         return sub_lang_list
 931
 932     def _get_ytplayer_config(self, video_id, webpage):
 933         patterns = (
 934             # User data may contain arbitrary character sequences that may affect
 935             # JSON extraction with regex, e.g. when '};' is contained the second
 936             # regex won't capture the whole JSON. Yet working around by trying more
 937             # concrete regex first keeping in mind proper quoted string handling
 938             # to be implemented in future that will replace this workaround (see
 939             # https://github.com/rg3/youtube-dl/issues/7468,
 940             # https://github.com/rg3/youtube-dl/pull/7599)
 941             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
 942             r';ytplayer\.config\s*=\s*({.+?});',
 943         )
 944         config = self._search_regex(
 945             patterns, webpage, 'ytplayer.config', default=None)
 946         if config:
 947             return self._parse_json(
 948                 uppercase_escape(config), video_id, fatal=False)
 949
 950     def _get_automatic_captions(self, video_id, webpage):
 951         """We need the webpage for getting the captions url, pass it as an
 952            argument to speed up the process."""
 953         self.to_screen('%s: Looking for automatic captions' % video_id)
 954         player_config = self._get_ytplayer_config(video_id, webpage)
 955         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
 956         if not player_config:
 957             self._downloader.report_warning(err_msg)
 958             return {}
 959         try:
 960             args = player_config['args']
 961             caption_url = args['ttsurl']
 962             timestamp = args['timestamp']
 963             # We get the available subtitles
 964             list_params = compat_urllib_parse.urlencode({
 965                 'type': 'list',
 966                 'tlangs': 1,
 967                 'asrs': 1,
 968             })
 969             list_url = caption_url + '&' + list_params
 970             caption_list = self._download_xml(list_url, video_id)
 971             original_lang_node = caption_list.find('track')
 972             if original_lang_node is None:
 973                 self._downloader.report_warning('Video doesn\'t have automatic captions')
 974                 return {}
 975             original_lang = original_lang_node.attrib['lang_code']
 976             caption_kind = original_lang_node.attrib.get('kind', '')
 977
 978             sub_lang_list = {}
 979             for lang_node in caption_list.findall('target'):
 980                 sub_lang = lang_node.attrib['lang_code']
 981                 sub_formats = []
 982                 for ext in ['sbv', 'vtt', 'srt']:
 983                     params = compat_urllib_parse.urlencode({
 984                         'lang': original_lang,
 985                         'tlang': sub_lang,
 986                         'fmt': ext,
 987                         'ts': timestamp,
 988                         'kind': caption_kind,
 989                     })
 990                     sub_formats.append({
 991                         'url': caption_url + '&' + params,
 992                         'ext': ext,
 993                     })
 994                 sub_lang_list[sub_lang] = sub_formats
 995             return sub_lang_list
 996         # An extractor error can be raise by the download process if there are
 997         # no automatic captions but there are subtitles
 998         except (KeyError, ExtractorError):
 999             self._downloader.report_warning(err_msg)
1000             return {}
1001
1002     @classmethod
1003     def extract_id(cls, url):
1004         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1005         if mobj is None:
1006             raise ExtractorError('Invalid URL: %s' % url)
1007         video_id = mobj.group(2)
1008         return video_id
1009
1010     def _extract_from_m3u8(self, manifest_url, video_id):
1011         url_map = {}
1012
1013         def _get_urls(_manifest):
1014             lines = _manifest.split('\n')
1015             urls = filter(lambda l: l and not l.startswith('#'),
1016                           lines)
1017             return urls
1018         manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
1019         formats_urls = _get_urls(manifest)
1020         for format_url in formats_urls:
1021             itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
1022             url_map[itag] = format_url
1023         return url_map
1024
1025     def _extract_annotations(self, video_id):
1026         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
1027         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1028
1029     def _parse_dash_manifest(
1030             self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
1031         def decrypt_sig(mobj):
1032             s = mobj.group(1)
1033             dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
1034             return '/signature/%s' % dec_s
1035         dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
1036         dash_doc = self._download_xml(
1037             dash_manifest_url, video_id,
1038             note='Downloading DASH manifest',
1039             errnote='Could not download DASH manifest',
1040             fatal=fatal)
1041
1042         if dash_doc is False:
1043             return []
1044
1045         formats = []
1046         for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
1047             mime_type = a.attrib.get('mimeType')
1048             for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
1049                 url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
1050                 if url_el is None:
1051                     continue
1052                 if mime_type == 'text/vtt':
1053                     # TODO implement WebVTT downloading
1054                     pass
1055                 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
1056                     segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
1057                     format_id = r.attrib['id']
1058                     video_url = url_el.text
1059                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
1060                     f = {
1061                         'format_id': format_id,
1062                         'url': video_url,
1063                         'width': int_or_none(r.attrib.get('width')),
1064                         'height': int_or_none(r.attrib.get('height')),
1065                         'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
1066                         'asr': int_or_none(r.attrib.get('audioSamplingRate')),
1067                         'filesize': filesize,
1068                         'fps': int_or_none(r.attrib.get('frameRate')),
1069                     }
1070                     if segment_list is not None:
1071                         f.update({
1072                             'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
1073                             'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
1074                             'protocol': 'http_dash_segments',
1075                         })
1076                     try:
1077                         existing_format = next(
1078                             fo for fo in formats
1079                             if fo['format_id'] == format_id)
1080                     except StopIteration:
1081                         full_info = self._formats.get(format_id, {}).copy()
1082                         full_info.update(f)
1083                         codecs = r.attrib.get('codecs')
1084                         if codecs:
1085                             if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
1086                                 full_info['vcodec'] = codecs
1087                             elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
1088                                 full_info['acodec'] = codecs
1089                         formats.append(full_info)
1090                     else:
1091                         existing_format.update(f)
1092                 else:
1093                     self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
1094         return formats
1095
1096     def _real_extract(self, url):
1097         url, smuggled_data = unsmuggle_url(url, {})
1098
1099         proto = (
1100             'http' if self._downloader.params.get('prefer_insecure', False)
1101             else 'https')
1102
1103         start_time = None
1104         end_time = None
1105         parsed_url = compat_urllib_parse_urlparse(url)
1106         for component in [parsed_url.fragment, parsed_url.query]:
1107             query = compat_parse_qs(component)
1108             if start_time is None and 't' in query:
1109                 start_time = parse_duration(query['t'][0])
1110             if start_time is None and 'start' in query:
1111                 start_time = parse_duration(query['start'][0])
1112             if end_time is None and 'end' in query:
1113                 end_time = parse_duration(query['end'][0])
1114
1115         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1116         mobj = re.search(self._NEXT_URL_RE, url)
1117         if mobj:
1118             url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
1119         video_id = self.extract_id(url)
1120
1121         # Get video webpage
1122         url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
1123         video_webpage = self._download_webpage(url, video_id)
1124
1125         # Attempt to extract SWF player URL
1126         mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1127         if mobj is not None:
1128             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1129         else:
1130             player_url = None
1131
1132         dash_mpds = []
1133
1134         def add_dash_mpd(video_info):
1135             dash_mpd = video_info.get('dashmpd')
1136             if dash_mpd and dash_mpd[0] not in dash_mpds:
1137                 dash_mpds.append(dash_mpd[0])
1138
1139         # Get video info
1140         embed_webpage = None
1141         is_live = None
1142         if re.search(r'player-age-gate-content">', video_webpage) is not None:
1143             age_gate = True
1144             # We simulate the access to the video from www.youtube.com/v/{video_id}
1145             # this can be viewed without login into Youtube
1146             url = proto + '://www.youtube.com/embed/%s' % video_id
1147             embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
1148             data = compat_urllib_parse.urlencode({
1149                 'video_id': video_id,
1150                 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1151                 'sts': self._search_regex(
1152                     r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1153             })
1154             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1155             video_info_webpage = self._download_webpage(
1156                 video_info_url, video_id,
1157                 note='Refetching age-gated info webpage',
1158                 errnote='unable to download video info webpage')
1159             video_info = compat_parse_qs(video_info_webpage)
1160             add_dash_mpd(video_info)
1161         else:
1162             age_gate = False
1163             video_info = None
1164             # Try looking directly into the video webpage
1165             ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1166             if ytplayer_config:
1167                 args = ytplayer_config['args']
1168                 if args.get('url_encoded_fmt_stream_map'):
1169                     # Convert to the same format returned by compat_parse_qs
1170                     video_info = dict((k, [v]) for k, v in args.items())
1171                     add_dash_mpd(video_info)
1172                 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1173                     is_live = True
1174             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1175                 # We also try looking in get_video_info since it may contain different dashmpd
1176                 # URL that points to a DASH manifest with possibly different itag set (some itags
1177                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1178                 # manifest pointed by get_video_info's dashmpd).
1179                 # The general idea is to take a union of itags of both DASH manifests (for example
1180                 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
1181                 self.report_video_info_webpage_download(video_id)
1182                 for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
1183                     video_info_url = (
1184                         '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1185                         % (proto, video_id, el_type))
1186                     video_info_webpage = self._download_webpage(
1187                         video_info_url,
1188                         video_id, note=False,
1189                         errnote='unable to download video info webpage')
1190                     get_video_info = compat_parse_qs(video_info_webpage)
1191                     if get_video_info.get('use_cipher_signature') != ['True']:
1192                         add_dash_mpd(get_video_info)
1193                     if not video_info:
1194                         video_info = get_video_info
1195                     if 'token' in get_video_info:
1196                         # Different get_video_info requests may report different results, e.g.
1197                         # some may report video unavailability, but some may serve it without
1198                         # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1199                         # the original webpage as well as el=info and el=embedded get_video_info
1200                         # requests report video unavailability due to geo restriction while
1201                         # el=detailpage succeeds and returns valid data). This is probably
1202                         # due to YouTube measures against IP ranges of hosting providers.
1203                         # Working around by preferring the first succeeded video_info containing
1204                         # the token if no such video_info yet was found.
1205                         if 'token' not in video_info:
1206                             video_info = get_video_info
1207                         break
1208         if 'token' not in video_info:
1209             if 'reason' in video_info:
1210                 if 'The uploader has not made this video available in your country.' in video_info['reason']:
1211                     regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
1212                     if regions_allowed:
1213                         raise ExtractorError('YouTube said: This video is available in %s only' % (
1214                             ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
1215                             expected=True)
1216                 raise ExtractorError(
1217                     'YouTube said: %s' % video_info['reason'][0],
1218                     expected=True, video_id=video_id)
1219             else:
1220                 raise ExtractorError(
1221                     '"token" parameter not in video info for unknown reason',
1222                     video_id=video_id)
1223
1224         # title
1225         if 'title' in video_info:
1226             video_title = video_info['title'][0]
1227         else:
1228             self._downloader.report_warning('Unable to extract video title')
1229             video_title = '_'
1230
1231         # description
1232         video_description = get_element_by_id("eow-description", video_webpage)
1233         if video_description:
1234             video_description = re.sub(r'''(?x)
1235                 <a\s+
1236                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1237                     title="([^"]+)"\s+
1238                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
1239                     class="yt-uix-redirect-link"\s*>
1240                 [^<]+
1241                 </a>
1242             ''', r'\1', video_description)
1243             video_description = clean_html(video_description)
1244         else:
1245             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1246             if fd_mobj:
1247                 video_description = unescapeHTML(fd_mobj.group(1))
1248             else:
1249                 video_description = ''
1250
1251         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1252             if not self._downloader.params.get('noplaylist'):
1253                 entries = []
1254                 feed_ids = []
1255                 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
1256                 for feed in multifeed_metadata_list.split(','):
1257                     feed_data = compat_parse_qs(feed)
1258                     entries.append({
1259                         '_type': 'url_transparent',
1260                         'ie_key': 'Youtube',
1261                         'url': smuggle_url(
1262                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1263                             {'force_singlefeed': True}),
1264                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1265                     })
1266                     feed_ids.append(feed_data['id'][0])
1267                 self.to_screen(
1268                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1269                     % (', '.join(feed_ids), video_id))
1270                 return self.playlist_result(entries, video_id, video_title, video_description)
1271             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1272
1273         if 'view_count' in video_info:
1274             view_count = int(video_info['view_count'][0])
1275         else:
1276             view_count = None
1277
1278         # Check for "rental" videos
1279         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
1280             raise ExtractorError('"rental" videos not supported')
1281
1282         # Start extracting information
1283         self.report_information_extraction(video_id)
1284
1285         # uploader
1286         if 'author' not in video_info:
1287             raise ExtractorError('Unable to extract uploader name')
1288         video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])
1289
1290         # uploader_id
1291         video_uploader_id = None
1292         mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
1293         if mobj is not None:
1294             video_uploader_id = mobj.group(1)
1295         else:
1296             self._downloader.report_warning('unable to extract uploader nickname')
1297
1298         # thumbnail image
1299         # We try first to get a high quality image:
1300         m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1301                             video_webpage, re.DOTALL)
1302         if m_thumb is not None:
1303             video_thumbnail = m_thumb.group(1)
1304         elif 'thumbnail_url' not in video_info:
1305             self._downloader.report_warning('unable to extract video thumbnail')
1306             video_thumbnail = None
1307         else:   # don't panic if we can't find it
1308             video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1309
1310         # upload date
1311         upload_date = self._html_search_meta(
1312             'datePublished', video_webpage, 'upload date', default=None)
1313         if not upload_date:
1314             upload_date = self._search_regex(
1315                 [r'(?s)id="eow-date.*?>(.*?)</span>',
1316                  r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
1317                 video_webpage, 'upload date', default=None)
1318             if upload_date:
1319                 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1320         upload_date = unified_strdate(upload_date)
1321
1322         m_music = re.search(
1323             r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
1324             video_webpage)
1325         if m_music:
1326             video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1327             video_creator = clean_html(m_music.group('creator'))
1328         else:
1329             video_alt_title = video_creator = None
1330
1331         m_cat_container = self._search_regex(
1332             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1333             video_webpage, 'categories', default=None)
1334         if m_cat_container:
1335             category = self._html_search_regex(
1336                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1337                 default=None)
1338             video_categories = None if category is None else [category]
1339         else:
1340             video_categories = None
1341
1342         video_tags = [
1343             unescapeHTML(m.group('content'))
1344             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1345
1346         def _extract_count(count_name):
1347             return str_to_int(self._search_regex(
1348                 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1349                 % re.escape(count_name),
1350                 video_webpage, count_name, default=None))
1351
1352         like_count = _extract_count('like')
1353         dislike_count = _extract_count('dislike')
1354
1355         # subtitles
1356         video_subtitles = self.extract_subtitles(video_id, video_webpage)
1357         automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1358
1359         if 'length_seconds' not in video_info:
1360             self._downloader.report_warning('unable to extract video duration')
1361             video_duration = None
1362         else:
1363             video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))
1364
1365         # annotations
1366         video_annotations = None
1367         if self._downloader.params.get('writeannotations', False):
1368             video_annotations = self._extract_annotations(video_id)
1369
1370         def _map_to_format_list(urlmap):
1371             formats = []
1372             for itag, video_real_url in urlmap.items():
1373                 dct = {
1374                     'format_id': itag,
1375                     'url': video_real_url,
1376                     'player_url': player_url,
1377                 }
1378                 if itag in self._formats:
1379                     dct.update(self._formats[itag])
1380                 formats.append(dct)
1381             return formats
1382
1383         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1384             self.report_rtmp_download()
1385             formats = [{
1386                 'format_id': '_rtmp',
1387                 'protocol': 'rtmp',
1388                 'url': video_info['conn'][0],
1389                 'player_url': player_url,
1390             }]
1391         elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
1392             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
1393             if 'rtmpe%3Dyes' in encoded_url_map:
1394                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
1395             formats = []
1396             for url_data_str in encoded_url_map.split(','):
1397                 url_data = compat_parse_qs(url_data_str)
1398                 if 'itag' not in url_data or 'url' not in url_data:
1399                     continue
1400                 format_id = url_data['itag'][0]
1401                 url = url_data['url'][0]
1402
1403                 if 'sig' in url_data:
1404                     url += '&signature=' + url_data['sig'][0]
1405                 elif 's' in url_data:
1406                     encrypted_sig = url_data['s'][0]
1407                     ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1408
1409                     jsplayer_url_json = self._search_regex(
1410                         ASSETS_RE,
1411                         embed_webpage if age_gate else video_webpage,
1412                         'JS player URL (1)', default=None)
1413                     if not jsplayer_url_json and not age_gate:
1414                         # We need the embed website after all
1415                         if embed_webpage is None:
1416                             embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1417                             embed_webpage = self._download_webpage(
1418                                 embed_url, video_id, 'Downloading embed webpage')
1419                         jsplayer_url_json = self._search_regex(
1420                             ASSETS_RE, embed_webpage, 'JS player URL')
1421
1422                     player_url = json.loads(jsplayer_url_json)
1423                     if player_url is None:
1424                         player_url_json = self._search_regex(
1425                             r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
1426                             video_webpage, 'age gate player URL')
1427                         player_url = json.loads(player_url_json)
1428
1429                     if self._downloader.params.get('verbose'):
1430                         if player_url is None:
1431                             player_version = 'unknown'
1432                             player_desc = 'unknown'
1433                         else:
1434                             if player_url.endswith('swf'):
1435                                 player_version = self._search_regex(
1436                                     r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
1437                                     'flash player', fatal=False)
1438                                 player_desc = 'flash player %s' % player_version
1439                             else:
1440                                 player_version = self._search_regex(
1441                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
1442                                     player_url,
1443                                     'html5 player', fatal=False)
1444                                 player_desc = 'html5 player %s' % player_version
1445
1446                         parts_sizes = self._signature_cache_id(encrypted_sig)
1447                         self.to_screen('{%s} signature length %s, %s' %
1448                                        (format_id, parts_sizes, player_desc))
1449
1450                     signature = self._decrypt_signature(
1451                         encrypted_sig, video_id, player_url, age_gate)
1452                     url += '&signature=' + signature
1453                 if 'ratebypass' not in url:
1454                     url += '&ratebypass=yes'
1455
1456                 # Some itags are not included in DASH manifest thus corresponding formats will
1457                 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1458                 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1459                 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1460                 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
1461                 dct = {
1462                     'format_id': format_id,
1463                     'url': url,
1464                     'player_url': player_url,
1465                     'filesize': int_or_none(url_data.get('clen', [None])[0]),
1466                     'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
1467                     'width': width,
1468                     'height': height,
1469                     'fps': int_or_none(url_data.get('fps', [None])[0]),
1470                     'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
1471                 }
1472                 type_ = url_data.get('type', [None])[0]
1473                 if type_:
1474                     type_split = type_.split(';')
1475                     kind_ext = type_split[0].split('/')
1476                     if len(kind_ext) == 2:
1477                         kind, ext = kind_ext
1478                         dct['ext'] = ext
1479                         if kind in ('audio', 'video'):
1480                             codecs = None
1481                             for mobj in re.finditer(
1482                                     r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1483                                 if mobj.group('key') == 'codecs':
1484                                     codecs = mobj.group('val')
1485                                     break
1486                             if codecs:
1487                                 codecs = codecs.split(',')
1488                                 if len(codecs) == 2:
1489                                     acodec, vcodec = codecs[0], codecs[1]
1490                                 else:
1491                                     acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
1492                                 dct.update({
1493                                     'acodec': acodec,
1494                                     'vcodec': vcodec,
1495                                 })
1496                 if format_id in self._formats:
1497                     dct.update(self._formats[format_id])
1498                 formats.append(dct)
1499         elif video_info.get('hlsvp'):
1500             manifest_url = video_info['hlsvp'][0]
1501             url_map = self._extract_from_m3u8(manifest_url, video_id)
1502             formats = _map_to_format_list(url_map)
1503             # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
1504             for a_format in formats:
1505                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
1506         else:
1507             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
1508
1509         # Look for the DASH manifest
1510         if self._downloader.params.get('youtube_include_dash_manifest', True):
1511             dash_mpd_fatal = True
1512             for dash_manifest_url in dash_mpds:
1513                 dash_formats = {}
1514                 try:
1515                     for df in self._parse_dash_manifest(
1516                             video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
1517                         # Do not overwrite DASH format found in some previous DASH manifest
1518                         if df['format_id'] not in dash_formats:
1519                             dash_formats[df['format_id']] = df
1520                         # Additional DASH manifests may end up in HTTP Error 403 therefore
1521                         # allow them to fail without bug report message if we already have
1522                         # some DASH manifest succeeded. This is temporary workaround to reduce
1523                         # burst of bug reports until we figure out the reason and whether it
1524                         # can be fixed at all.
1525                         dash_mpd_fatal = False
1526                 except (ExtractorError, KeyError) as e:
1527                     self.report_warning(
1528                         'Skipping DASH manifest: %r' % e, video_id)
1529                 if dash_formats:
1530                     # Remove the formats we found through non-DASH, they
1531                     # contain less info and it can be wrong, because we use
1532                     # fixed values (for example the resolution). See
1533                     # https://github.com/rg3/youtube-dl/issues/5774 for an
1534                     # example.
1535                     formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
1536                     formats.extend(dash_formats.values())
1537
1538         # Check for malformed aspect ratio
1539         stretched_m = re.search(
1540             r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
1541             video_webpage)
1542         if stretched_m:
1543             w = float(stretched_m.group('w'))
1544             h = float(stretched_m.group('h'))
1545             # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
1546             # We will only process correct ratios.
1547             if w > 0 and h > 0:
1548                 ratio = w / h
1549                 for f in formats:
1550                     if f.get('vcodec') != 'none':
1551                         f['stretched_ratio'] = ratio
1552
1553         self._sort_formats(formats)
1554
1555         return {
1556             'id': video_id,
1557             'uploader': video_uploader,
1558             'uploader_id': video_uploader_id,
1559             'upload_date': upload_date,
1560             'creator': video_creator,
1561             'title': video_title,
1562             'alt_title': video_alt_title,
1563             'thumbnail': video_thumbnail,
1564             'description': video_description,
1565             'categories': video_categories,
1566             'tags': video_tags,
1567             'subtitles': video_subtitles,
1568             'automatic_captions': automatic_captions,
1569             'duration': video_duration,
1570             'age_limit': 18 if age_gate else 0,
1571             'annotations': video_annotations,
1572             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
1573             'view_count': view_count,
1574             'like_count': like_count,
1575             'dislike_count': dislike_count,
1576             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
1577             'formats': formats,
1578             'is_live': is_live,
1579             'start_time': start_time,
1580             'end_time': end_time,
1581         }
1582
1583
1584 class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
1585     IE_DESC = 'YouTube.com playlists'
1586     _VALID_URL = r"""(?x)(?:
1587                         (?:https?://)?
1588                         (?:\w+\.)?
1589                         youtube\.com/
1590                         (?:
1591                            (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
1592                            \? (?:.*?[&;])*? (?:p|a|list)=
1593                         |  p/
1594                         )
1595                         (
1596                             (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
1597                             # Top tracks, they can also include dots
1598                             |(?:MC)[\w\.]*
1599                         )
1600                         .*
1601                      |
1602                         ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
1603                      )"""
1604     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
1605     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
1606     IE_NAME = 'youtube:playlist'
1607     _TESTS = [{
1608         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1609         'info_dict': {
1610             'title': 'ytdl test PL',
1611             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
1612         },
1613         'playlist_count': 3,
1614     }, {
1615         'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1616         'info_dict': {
1617             'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
1618             'title': 'YDL_Empty_List',
1619         },
1620         'playlist_count': 0,
1621     }, {
1622         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
1623         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1624         'info_dict': {
1625             'title': '29C3: Not my department',
1626             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
1627         },
1628         'playlist_count': 95,
1629     }, {
1630         'note': 'issue #673',
1631         'url': 'PLBB231211A4F62143',
1632         'info_dict': {
1633             'title': '[OLD]Team Fortress 2 (Class-based LP)',
1634             'id': 'PLBB231211A4F62143',
1635         },
1636         'playlist_mincount': 26,
1637     }, {
1638         'note': 'Large playlist',
1639         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
1640         'info_dict': {
1641             'title': 'Uploads from Cauchemar',
1642             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
1643         },
1644         'playlist_mincount': 799,
1645     }, {
1646         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1647         'info_dict': {
1648             'title': 'YDL_safe_search',
1649             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
1650         },
1651         'playlist_count': 2,
1652     }, {
1653         'note': 'embedded',
1654         'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1655         'playlist_count': 4,
1656         'info_dict': {
1657             'title': 'JODA15',
1658             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
1659         }
1660     }, {
1661         'note': 'Embedded SWF player',
1662         'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
1663         'playlist_count': 4,
1664         'info_dict': {
1665             'title': 'JODA7',
1666             'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
1667         }
1668     }, {
1669         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
1670         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
1671         'info_dict': {
1672             'title': 'Uploads from Interstellar Movie',
1673             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
1674         },
1675         'playlist_mincout': 21,
1676     }]
1677
1678     def _real_initialize(self):
1679         self._login()
1680
1681     def _extract_mix(self, playlist_id):
1682         # The mixes are generated from a single video
1683         # the id of the playlist is just 'RD' + video_id
1684         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
1685         webpage = self._download_webpage(
1686             url, playlist_id, 'Downloading Youtube mix')
1687         search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
1688         title_span = (
1689             search_title('playlist-title') or
1690             search_title('title long-title') or
1691             search_title('title'))
1692         title = clean_html(title_span)
1693         ids = orderedSet(re.findall(
1694             r'''(?xs)data-video-username=".*?".*?
1695                        href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
1696             webpage))
1697         url_results = self._ids_to_results(ids)
1698
1699         return self.playlist_result(url_results, playlist_id, title)
1700
1701     def _extract_playlist(self, playlist_id):
1702         url = self._TEMPLATE_URL % playlist_id
1703         page = self._download_webpage(url, playlist_id)
1704
1705         for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page):
1706             match = match.strip()
1707             # Check if the playlist exists or is private
1708             if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match):
1709                 raise ExtractorError(
1710                     'The playlist doesn\'t exist or is private, use --username or '
1711                     '--netrc to access it.',
1712                     expected=True)
1713             elif re.match(r'[^<]*Invalid parameters[^<]*', match):
1714                 raise ExtractorError(
1715                     'Invalid parameters. Maybe URL is incorrect.',
1716                     expected=True)
1717             elif re.match(r'[^<]*Choose your language[^<]*', match):
1718                 continue
1719             else:
1720                 self.report_warning('Youtube gives an alert message: ' + match)
1721
1722         playlist_title = self._html_search_regex(
1723             r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
1724             page, 'title')
1725
1726         return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title)
1727
1728     def _real_extract(self, url):
1729         # Extract playlist id
1730         mobj = re.match(self._VALID_URL, url)
1731         if mobj is None:
1732             raise ExtractorError('Invalid URL: %s' % url)
1733         playlist_id = mobj.group(1) or mobj.group(2)
1734
1735         # Check if it's a video-specific URL
1736         query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
1737         if 'v' in query_dict:
1738             video_id = query_dict['v'][0]
1739             if self._downloader.params.get('noplaylist'):
1740                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
1741                 return self.url_result(video_id, 'Youtube', video_id=video_id)
1742             else:
1743                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
1744
1745         if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
1746             # Mixes require a custom extraction process
1747             return self._extract_mix(playlist_id)
1748
1749         return self._extract_playlist(playlist_id)
1750
1751
1752 class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
1753     IE_DESC = 'YouTube.com channels'
1754     _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
1755     _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
1756     _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
1757     IE_NAME = 'youtube:channel'
1758     _TESTS = [{
1759         'note': 'paginated channel',
1760         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
1761         'playlist_mincount': 91,
1762         'info_dict': {
1763             'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
1764             'title': 'Uploads from lex will',
1765         }
1766     }, {
1767         'note': 'Age restricted channel',
1768         # from https://www.youtube.com/user/DeusExOfficial
1769         'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
1770         'playlist_mincount': 64,
1771         'info_dict': {
1772             'id': 'UUs0ifCMCm1icqRbqhUINa0w',
1773             'title': 'Uploads from Deus Ex',
1774         },
1775     }]
1776
1777     def _real_extract(self, url):
1778         channel_id = self._match_id(url)
1779
1780         url = self._TEMPLATE_URL % channel_id
1781
1782         # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
1783         # Workaround by extracting as a playlist if managed to obtain channel playlist URL
1784         # otherwise fallback on channel by page extraction
1785         channel_page = self._download_webpage(
1786             url + '?view=57', channel_id,
1787             'Downloading channel page', fatal=False)
1788         if channel_page is False:
1789             channel_playlist_id = False
1790         else:
1791             channel_playlist_id = self._html_search_meta(
1792                 'channelId', channel_page, 'channel id', default=None)
1793             if not channel_playlist_id:
1794                 channel_playlist_id = self._search_regex(
1795                     r'data-(?:channel-external-|yt)id="([^"]+)"',
1796                     channel_page, 'channel id', default=None)
1797         if channel_playlist_id and channel_playlist_id.startswith('UC'):
1798             playlist_id = 'UU' + channel_playlist_id[2:]
1799             return self.url_result(
1800                 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
1801
1802         channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
1803         autogenerated = re.search(r'''(?x)
1804                 class="[^"]*?(?:
1805                     channel-header-autogenerated-label|
1806                     yt-channel-title-autogenerated
1807                 )[^"]*"''', channel_page) is not None
1808
1809         if autogenerated:
1810             # The videos are contained in a single page
1811             # the ajax pages can't be used, they are empty
1812             entries = [
1813                 self.url_result(
1814                     video_id, 'Youtube', video_id=video_id,
1815                     video_title=video_title)
1816                 for video_id, video_title in self.extract_videos_from_page(channel_page)]
1817             return self.playlist_result(entries, channel_id)
1818
1819         return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
1820
1821
1822 class YoutubeUserIE(YoutubeChannelIE):
1823     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
1824     _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
1825     _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
1826     IE_NAME = 'youtube:user'
1827
1828     _TESTS = [{
1829         'url': 'https://www.youtube.com/user/TheLinuxFoundation',
1830         'playlist_mincount': 320,
1831         'info_dict': {
1832             'title': 'TheLinuxFoundation',
1833         }
1834     }, {
1835         'url': 'ytuser:phihag',
1836         'only_matching': True,
1837     }]
1838
1839     @classmethod
1840     def suitable(cls, url):
1841         # Don't return True if the url can be extracted with other youtube
1842         # extractor, the regex would is too permissive and it would match.
1843         other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
1844         if any(ie.suitable(url) for ie in other_ies):
1845             return False
1846         else:
1847             return super(YoutubeUserIE, cls).suitable(url)
1848
1849
1850 class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
1851     IE_DESC = 'YouTube.com user playlists'
1852     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists'
1853     IE_NAME = 'youtube:user:playlists'
1854
1855     _TESTS = [{
1856         'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
1857         'playlist_mincount': 4,
1858         'info_dict': {
1859             'id': 'ThirstForScience',
1860             'title': 'Thirst for Science',
1861         },
1862     }, {
1863         # with "Load more" button
1864         'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
1865         'playlist_mincount': 70,
1866         'info_dict': {
1867             'id': 'igorkle1',
1868             'title': 'Игорь Клейнер',
1869         },
1870     }]
1871
1872
1873 class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
1874     IE_DESC = 'YouTube.com searches'
1875     # there doesn't appear to be a real limit, for example if you search for
1876     # 'python' you get more than 8.000.000 results
1877     _MAX_RESULTS = float('inf')
1878     IE_NAME = 'youtube:search'
1879     _SEARCH_KEY = 'ytsearch'
1880     _EXTRA_QUERY_ARGS = {}
1881     _TESTS = []
1882
1883     def _get_n_results(self, query, n):
1884         """Get a specified number of results for a query"""
1885
1886         videos = []
1887         limit = n
1888
1889         for pagenum in itertools.count(1):
1890             url_query = {
1891                 'search_query': query.encode('utf-8'),
1892                 'page': pagenum,
1893                 'spf': 'navigate',
1894             }
1895             url_query.update(self._EXTRA_QUERY_ARGS)
1896             result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query)
1897             data = self._download_json(
1898                 result_url, video_id='query "%s"' % query,
1899                 note='Downloading page %s' % pagenum,
1900                 errnote='Unable to download API page')
1901             html_content = data[1]['body']['content']
1902
1903             if 'class="search-message' in html_content:
1904                 raise ExtractorError(
1905                     '[youtube] No video results', expected=True)
1906
1907             new_videos = self._ids_to_results(orderedSet(re.findall(
1908                 r'href="/watch\?v=(.{11})', html_content)))
1909             videos += new_videos
1910             if not new_videos or len(videos) > limit:
1911                 break
1912
1913         if len(videos) > n:
1914             videos = videos[:n]
1915         return self.playlist_result(videos, query)
1916
1917
1918 class YoutubeSearchDateIE(YoutubeSearchIE):
1919     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
1920     _SEARCH_KEY = 'ytsearchdate'
1921     IE_DESC = 'YouTube.com searches, newest videos first'
1922     _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
1923
1924
1925 class YoutubeSearchURLIE(InfoExtractor):
1926     IE_DESC = 'YouTube.com search URLs'
1927     IE_NAME = 'youtube:search_url'
1928     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
1929     _TESTS = [{
1930         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
1931         'playlist_mincount': 5,
1932         'info_dict': {
1933             'title': 'youtube-dl test video',
1934         }
1935     }]
1936
1937     def _real_extract(self, url):
1938         mobj = re.match(self._VALID_URL, url)
1939         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
1940
1941         webpage = self._download_webpage(url, query)
1942         result_code = self._search_regex(
1943             r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
1944
1945         part_codes = re.findall(
1946             r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
1947         entries = []
1948         for part_code in part_codes:
1949             part_title = self._html_search_regex(
1950                 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False)
1951             part_url_snippet = self._html_search_regex(
1952                 r'(?s)href="([^"]+)"', part_code, 'item URL')
1953             part_url = compat_urlparse.urljoin(
1954                 'https://www.youtube.com/', part_url_snippet)
1955             entries.append({
1956                 '_type': 'url',
1957                 'url': part_url,
1958                 'title': part_title,
1959             })
1960
1961         return {
1962             '_type': 'playlist',
1963             'entries': entries,
1964             'title': query,
1965         }
1966
1967
1968 class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
1969     IE_DESC = 'YouTube.com (multi-season) shows'
1970     _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
1971     IE_NAME = 'youtube:show'
1972     _TESTS = [{
1973         'url': 'https://www.youtube.com/show/airdisasters',
1974         'playlist_mincount': 5,
1975         'info_dict': {
1976             'id': 'airdisasters',
1977             'title': 'Air Disasters',
1978         }
1979     }]
1980
1981     def _real_extract(self, url):
1982         playlist_id = self._match_id(url)
1983         return super(YoutubeShowIE, self)._real_extract(
1984             'https://www.youtube.com/show/%s/playlists' % playlist_id)
1985
1986
1987 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1988     """
1989     Base class for feed extractors
1990     Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1991     """
1992     _LOGIN_REQUIRED = True
1993
1994     @property
1995     def IE_NAME(self):
1996         return 'youtube:%s' % self._FEED_NAME
1997
1998     def _real_initialize(self):
1999         self._login()
2000
2001     def _real_extract(self, url):
2002         page = self._download_webpage(
2003             'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
2004
2005         # The extraction process is the same as for playlists, but the regex
2006         # for the video ids doesn't contain an index
2007         ids = []
2008         more_widget_html = content_html = page
2009         for page_num in itertools.count(1):
2010             matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
2011
2012             # 'recommended' feed has infinite 'load more' and each new portion spins
2013             # the same videos in (sometimes) slightly different order, so we'll check
2014             # for unicity and break when portion has no new videos
2015             new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
2016             if not new_ids:
2017                 break
2018
2019             ids.extend(new_ids)
2020
2021             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2022             if not mobj:
2023                 break
2024
2025             more = self._download_json(
2026                 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2027                 'Downloading page #%s' % page_num,
2028                 transform_source=uppercase_escape)
2029             content_html = more['content_html']
2030             more_widget_html = more['load_more_widget_html']
2031
2032         return self.playlist_result(
2033             self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
2034
2035
2036 class YoutubeWatchLaterIE(YoutubePlaylistIE):
2037     IE_NAME = 'youtube:watchlater'
2038     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
2039     _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
2040
2041     _TESTS = []  # override PlaylistIE tests
2042
2043     def _real_extract(self, url):
2044         return self._extract_playlist('WL')
2045
2046
2047 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
2048     IE_NAME = 'youtube:favorites'
2049     IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
2050     _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
2051     _LOGIN_REQUIRED = True
2052
2053     def _real_extract(self, url):
2054         webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
2055         playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
2056         return self.url_result(playlist_id, 'YoutubePlaylist')
2057
2058
2059 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2060     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
2061     _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
2062     _FEED_NAME = 'recommended'
2063     _PLAYLIST_TITLE = 'Youtube Recommended videos'
2064
2065
2066 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2067     IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
2068     _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
2069     _FEED_NAME = 'subscriptions'
2070     _PLAYLIST_TITLE = 'Youtube Subscriptions'
2071
2072
2073 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2074     IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
2075     _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
2076     _FEED_NAME = 'history'
2077     _PLAYLIST_TITLE = 'Youtube History'
2078
2079
2080 class YoutubeTruncatedURLIE(InfoExtractor):
2081     IE_NAME = 'youtube:truncated_url'
2082     IE_DESC = False  # Do not list
2083     _VALID_URL = r'''(?x)
2084         (?:https?://)?
2085         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2086         (?:watch\?(?:
2087             feature=[a-z_]+|
2088             annotation_id=annotation_[^&]+|
2089             x-yt-cl=[0-9]+|
2090             hl=[^&]*|
2091             t=[0-9]+
2092         )?
2093         |
2094             attribution_link\?a=[^&]+
2095         )
2096         $
2097     '''
2098
2099     _TESTS = [{
2100         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041',
2101         'only_matching': True,
2102     }, {
2103         'url': 'http://www.youtube.com/watch?',
2104         'only_matching': True,
2105     }, {
2106         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2107         'only_matching': True,
2108     }, {
2109         'url': 'https://www.youtube.com/watch?feature=foo',
2110         'only_matching': True,
2111     }, {
2112         'url': 'https://www.youtube.com/watch?hl=en-GB',
2113         'only_matching': True,
2114     }, {
2115         'url': 'https://www.youtube.com/watch?t=2372',
2116         'only_matching': True,
2117     }]
2118
2119     def _real_extract(self, url):
2120         raise ExtractorError(
2121             'Did you forget to quote the URL? Remember that & is a meta '
2122             'character in most shells, so you want to put the URL in quotes, '
2123             'like  youtube-dl '
2124             '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
2125             ' or simply  youtube-dl BaW_jenozKc  .',
2126             expected=True)
2127
2128
2129 class YoutubeTruncatedIDIE(InfoExtractor):
2130     IE_NAME = 'youtube:truncated_id'
2131     IE_DESC = False  # Do not list
2132     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
2133
2134     _TESTS = [{
2135         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2136         'only_matching': True,
2137     }]
2138
2139     def _real_extract(self, url):
2140         video_id = self._match_id(url)
2141         raise ExtractorError(
2142             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2143             expected=True)