[youtube] Fix extraction (closes #20758, closes #20759, closes #20761, closes #20762...
[youtube-dl] / youtube_dl / extractor / mtv.py
index 25af5ddfda4765132fec413caca9a09fc2ba2bb9..7a3b57abd852f4ffb8e49bdce9d9e8029fca65c3 100644 (file)
@@ -115,10 +115,17 @@ class MTVServicesInfoExtractor(InfoExtractor):
             if transcript.get('kind') != 'captions':
                 continue
             lang = transcript.get('srclang')
-            subtitles[lang] = [{
-                'url': compat_str(typographic.get('src')),
-                'ext': typographic.get('format')
-            } for typographic in transcript.findall('./typographic')]
+            for typographic in transcript.findall('./typographic'):
+                sub_src = typographic.get('src')
+                if not sub_src:
+                    continue
+                ext = typographic.get('format')
+                if ext == 'cea-608':
+                    ext = 'scc'
+                subtitles.setdefault(lang, []).append({
+                    'url': compat_str(sub_src),
+                    'ext': ext
+                })
         return subtitles
 
     def _get_video_info(self, itemdoc, use_hls=True):
@@ -258,7 +265,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
 
         if mgid is None or ':' not in mgid:
             mgid = self._search_regex(
-                [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
+                [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
                 webpage, 'mgid', default=None)
 
         if not mgid: