]> git.bitcoin.ninja Git - youtube-dl/blobdiff - youtube_dl/extractor/common.py
[infoq] Use BokeCC extractor function
[youtube-dl] / youtube_dl / extractor / common.py
index f411ea7633568915e4e97df61958d13c6a8aca80..14f57563529ad5782b3c70c24d4760c78c3b334d 100644 (file)
@@ -46,6 +46,7 @@ from ..utils import (
     xpath_with_ns,
     determine_protocol,
     parse_duration,
+    mimetype2ext,
 )
 
 
@@ -899,6 +900,16 @@ class InfoExtractor(object):
                     item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
                 formats)
 
+    @staticmethod
+    def _remove_duplicate_formats(formats):
+        format_urls = set()
+        unique_formats = []
+        for f in formats:
+            if f['url'] not in format_urls:
+                format_urls.add(f['url'])
+                unique_formats.append(f)
+        formats[:] = unique_formats
+
     def _is_valid_url(self, url, video_id, item='video'):
         url = self._proto_relative_url(url, scheme='http:')
         # For now assume non HTTP(S) URLs always valid
@@ -1277,16 +1288,7 @@ class InfoExtractor(object):
             if not src or src in urls:
                 continue
             urls.append(src)
-            ext = textstream.get('ext') or determine_ext(src)
-            if not ext:
-                type_ = textstream.get('type')
-                SUBTITLES_TYPES = {
-                    'text/vtt': 'vtt',
-                    'text/srt': 'srt',
-                    'application/smptett+xml': 'tt',
-                }
-                if type_ in SUBTITLES_TYPES:
-                    ext = SUBTITLES_TYPES[type_]
+            ext = textstream.get('ext') or determine_ext(src) or mimetype2ext(textstream.get('type'))
             lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
             subtitles.setdefault(lang, []).append({
                 'url': src,