[extractor/generic] Add a test for m3u playlist served without proper Content-Type

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index a2e7ba5ad8418bd3f6827cac53966a85f837c16d..f28a65d9be347eebc20dde60bb1bb07a1ae7fcf0 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -253,6 +253,21 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              }
          },
+        # m3u8 served with Content-Type: text/plain
+        {
+            'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
+            'info_dict': {
+                'id': 'index',
+                'ext': 'mp4',
+                'title': 'index',
+                'upload_date': '20140720',
+                'formats': 'mincount:11',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
+        },
          # google redirect
          {
              'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -1277,10 +1292,8 @@ class GenericIE(InfoExtractor):
                      'url': url,
                      'vcodec': 'none' if m.group('type') == 'audio' else None
                  }]
-            info_dict.update({
-                'direct': True,
-                'formats': formats,
-            })
+                info_dict['direct'] = True
+            info_dict['formats'] = formats
              return info_dict
  
          if not self._downloader.params.get('test', False) and not is_intentional:
@@ -1301,9 +1314,15 @@ class GenericIE(InfoExtractor):
              request.add_header('Accept-Encoding', '*')
              full_response = self._request_webpage(request, video_id)
  
+        first_bytes = full_response.read(512)
+
+        # Is it an M3U playlist?
+        if first_bytes.startswith('#EXTM3U'):
+            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+            return info_dict
+
          # Maybe it's a direct link to a video?
          # Be careful not to download the whole thing!
-        first_bytes = full_response.read(512)
          if not is_html(first_bytes):
              self._downloader.report_warning(
                  'URL could be a direct video link, returning it as such.')