[extractor/generic] Properly extract format id from Content-Type
authorSergey M․ <dstftw@gmail.com>
Fri, 18 Mar 2016 15:50:10 +0000 (21:50 +0600)
committerSergey M․ <dstftw@gmail.com>
Fri, 18 Mar 2016 15:50:10 +0000 (21:50 +0600)
Fixes extraction for cases like: audio/x-mpegURL; charset=utf-8

youtube_dl/extractor/generic.py

index 8121f04a5e02cf672dc6ab0f152d21df8b98034e..b75db12521caee855e41b086e931d2e7d79a49d2 100644 (file)
@@ -1249,7 +1249,7 @@ class GenericIE(InfoExtractor):
 
         # Check for direct link to a video
         content_type = head_response.headers.get('Content-Type', '')
-        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
+        m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
         if m:
             upload_date = unified_strdate(
                 head_response.headers.get('Last-Modified'))