[youtube] Fix missing format details for 60fps DASH formats
[youtube-dl] / youtube_dl / extractor / youtube.py
index bfa9a12a8fbd31d6158462ccac8fb327bb6481b1..3a2c7c562452e81b7a7872889d5ccde8ece55d25 100644 (file)
@@ -33,6 +33,7 @@ from ..utils import (
     int_or_none,
     orderedSet,
     parse_duration,
+    remove_start,
     smuggle_url,
     str_to_int,
     unescapeHTML,
@@ -129,35 +130,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         # TODO add SMS and phone call support - these require making a request and then prompting the user
 
         if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
-            tfa_code = self._get_tfa_info()
+            tfa_code = self._get_tfa_info('2-step verification code')
 
-            if tfa_code is None:
-                self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
-                self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
+            if not tfa_code:
+                self._downloader.report_warning(
+                    'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
+                    '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
                 return False
 
-            def find_value(element_id):
-                match = re.search(r'id="%s"\s+value="(.+?)">' % element_id, login_results, re.M | re.U)
-                if match is None:
-                    self._downloader.report_warning('Failed to get %s - did the page structure change?' % id)
-                return match.group(1)
-
-            challengeId = find_value('challengeId')
-            challengeType = find_value('challengeType')
-            gxf = find_value('gxf')
-
-            tfa_form_strs = {
-                'challengeId': challengeId,
-                'challengeType': challengeType,  # This doesn't appear to change
-                'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
-                'service': 'youtube',
-                'hl': 'en_US',
-                'checkedDomains': 'youtube',
-                'pstMsg': '0',
-                'gxf': gxf,
+            tfa_code = remove_start(tfa_code, 'G-')
+
+            tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
+
+            tfa_form_strs.update({
                 'Pin': tfa_code,
                 'TrustDevice': 'on',
-            }
+            })
+
             tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
             tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
 
@@ -170,7 +159,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 return False
 
             if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
-                self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
+                self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
                 return False
             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
                 self._downloader.report_warning('unable to log in - did the page structure change?')
@@ -213,7 +202,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                  v=
                              )
                          ))
-                         |youtu\.be/                                          # just youtu.be/xxxx
+                         |(?:
+                            youtu\.be|                                        # just youtu.be/xxxx
+                            vid\.plus                                         # or vid.plus/xxxx
+                         )/
                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
@@ -635,6 +627,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
+        },
+        {
+            'url': 'http://vid.plus/FlRa-iH7PGw',
+            'only_matching': True,
         }
     ]
 
@@ -664,7 +660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
-            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
+            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
             player_url)
         if not id_m:
             raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1247,7 +1243,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
             if 'rtmpe%3Dyes' in encoded_url_map:
                 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
-            url_map = {}
+            formats = []
             for url_data_str in encoded_url_map.split(','):
                 url_data = compat_parse_qs(url_data_str)
                 if 'itag' not in url_data or 'url' not in url_data:
@@ -1293,7 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 player_desc = 'flash player %s' % player_version
                             else:
                                 player_version = self._search_regex(
-                                    r'html5player-([^/]+?)(?:/html5player)?\.js',
+                                    r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
                                     player_url,
                                     'html5 player', fatal=False)
                                 player_desc = 'html5 player %s' % player_version
@@ -1307,8 +1303,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     url += '&signature=' + signature
                 if 'ratebypass' not in url:
                     url += '&ratebypass=yes'
-                url_map[format_id] = url
-            formats = _map_to_format_list(url_map)
+
+                width = None
+                height = None
+                size_str = url_data.get('size', [''])[0]
+                if size_str.count('x') == 1:
+                    width, height = [int_or_none(x) for x in size_str.split('x')]
+
+                format_url = {
+                    'format_id': format_id,
+                    'url': url,
+                    'player_url': player_url,
+                    # As of this writing these are only defined for DASH formats:
+                    'filesize': int_or_none(url_data.get('clen', [None])[0]),
+                    'tbr': float_or_none(url_data.get('bitrate', [None])[0], scale=1024),
+                    'width': width,
+                    'height': height,
+                    'fps': int_or_none(url_data.get('fps', [None])[0]),
+                }
+
+                # drop Nones so they do not overwrite the defaults from self._formats
+                format_url = dict((k, v) for k, v in format_url.items() if v is not None)
+
+                format_full = self._formats.get(format_id, {}).copy()
+                format_full.update(format_url)
+
+                formats.append(format_full)
+
         elif video_info.get('hlsvp'):
             manifest_url = video_info['hlsvp'][0]
             url_map = self._extract_from_m3u8(manifest_url, video_id)
@@ -1773,7 +1794,7 @@ class YoutubeSearchURLIE(InfoExtractor):
             r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
 
         part_codes = re.findall(
-            r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
+            r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
         entries = []
         for part_code in part_codes:
             part_title = self._html_search_regex(