[youtube] Fix missing format details for 60fps DASH formats

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 887c46d95dcef1febe45166e9dbd2a4c49083e73..3a2c7c562452e81b7a7872889d5ccde8ece55d25 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -202,7 +202,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                   v=
                               )
                           ))
-                         |youtu\.be/                                          # just youtu.be/xxxx
+                         |(?:
+                            youtu\.be|                                        # just youtu.be/xxxx
+                            vid\.plus                                         # or vid.plus/xxxx
+                         )/
                           |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                           )
                       )?                                                       # all until now is optional -> you can pass the naked ID
@@ -624,6 +627,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              'params': {
                  'skip_download': True,
              },
+        },
+        {
+            'url': 'http://vid.plus/FlRa-iH7PGw',
+            'only_matching': True,
          }
      ]
  
@@ -653,7 +660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
  
      def _extract_signature_function(self, video_id, player_url, example_sig):
          id_m = re.match(
-            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
+            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
              player_url)
          if not id_m:
              raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1236,7 +1243,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
              if 'rtmpe%3Dyes' in encoded_url_map:
                  raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
-            url_map = {}
+            formats = []
              for url_data_str in encoded_url_map.split(','):
                  url_data = compat_parse_qs(url_data_str)
                  if 'itag' not in url_data or 'url' not in url_data:
@@ -1282,7 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                  player_desc = 'flash player %s' % player_version
                              else:
                                  player_version = self._search_regex(
-                                    r'html5player-([^/]+?)(?:/html5player)?\.js',
+                                    r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
                                      player_url,
                                      'html5 player', fatal=False)
                                  player_desc = 'html5 player %s' % player_version
@@ -1296,8 +1303,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                      url += '&signature=' + signature
                  if 'ratebypass' not in url:
                      url += '&ratebypass=yes'
-                url_map[format_id] = url
-            formats = _map_to_format_list(url_map)
+
+                width = None
+                height = None
+                size_str = url_data.get('size', [''])[0]
+                if size_str.count('x') == 1:
+                    width, height = [int_or_none(x) for x in size_str.split('x')]
+
+                format_url = {
+                    'format_id': format_id,
+                    'url': url,
+                    'player_url': player_url,
+                    # As of this writing these are only defined for DASH formats:
+                    'filesize': int_or_none(url_data.get('clen', [None])[0]),
+                    'tbr': float_or_none(url_data.get('bitrate', [None])[0], scale=1024),
+                    'width': width,
+                    'height': height,
+                    'fps': int_or_none(url_data.get('fps', [None])[0]),
+                }
+
+                # drop Nones so they do not overwrite the defaults from self._formats
+                format_url = dict((k, v) for k, v in format_url.items() if v is not None)
+
+                format_full = self._formats.get(format_id, {}).copy()
+                format_full.update(format_url)
+
+                formats.append(format_full)
+
          elif video_info.get('hlsvp'):
              manifest_url = video_info['hlsvp'][0]
              url_map = self._extract_from_m3u8(manifest_url, video_id)
@@ -1762,7 +1794,7 @@ class YoutubeSearchURLIE(InfoExtractor):
              r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
  
          part_codes = re.findall(
-            r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
+            r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
          entries = []
          for part_code in part_codes:
              part_title = self._html_search_regex(