[youtube] fix hd720 format position
[youtube-dl] / youtube_dl / extractor / channel9.py
index b1cb585309380eb4127658d539ec73355a2f2f1c..81108e70424f5a98bc97dcdb2ee9b11c6869ed08 100644 (file)
@@ -4,22 +4,16 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
     ExtractorError,
-    unescapeHTML,
     int_or_none,
     parse_iso8601,
-    clean_html,
+    qualities,
+    unescapeHTML,
 )
 
 
 class Channel9IE(InfoExtractor):
-    '''
-    Common extractor for channel9.msdn.com.
-
-    The type of provided URL (video or playlist) is determined according to
-    meta Search.PageType from web page HTML rather than URL itself, as it is
-    not always possible to do.
-    '''
     IE_DESC = 'Channel 9'
     IE_NAME = 'channel9'
     _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
@@ -87,6 +81,12 @@ class Channel9IE(InfoExtractor):
 
     _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
+            webpage)
+
     def _extract_list(self, video_id, rss_url=None):
         if not rss_url:
             rss_url = self._RSS_URL % video_id
@@ -120,22 +120,75 @@ class Channel9IE(InfoExtractor):
             content_data = self._download_json(content_url, content_id)
             title = content_data['Title']
 
+            QUALITIES = (
+                'mp3',
+                'wmv', 'mp4',
+                'wmv-low', 'mp4-low',
+                'wmv-mid', 'mp4-mid',
+                'wmv-high', 'mp4-high',
+            )
+
+            quality_key = qualities(QUALITIES)
+
+            def quality(quality_id, format_url):
+                return (len(QUALITIES) if '_Source.' in format_url
+                        else quality_key(quality_id))
+
             formats = []
-            qualities = [
-                'VideoMP4Low',
-                'VideoWMV',
-                'VideoMP4Medium',
-                'VideoMP4High',
-                'VideoWMVHQ',
-            ]
-            for q in qualities:
-                q_url = content_data.get(q)
-                if not q_url:
+            urls = set()
+
+            SITE_QUALITIES = {
+                'MP3': 'mp3',
+                'MP4': 'mp4',
+                'Low Quality WMV': 'wmv-low',
+                'Low Quality MP4': 'mp4-low',
+                'Mid Quality WMV': 'wmv-mid',
+                'Mid Quality MP4': 'mp4-mid',
+                'High Quality WMV': 'wmv-high',
+                'High Quality MP4': 'mp4-high',
+            }
+
+            formats_select = self._search_regex(
+                r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
+                'formats select', default=None)
+            if formats_select:
+                for mobj in re.finditer(
+                        r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
+                        formats_select):
+                    format_url = mobj.group('url')
+                    if format_url in urls:
+                        continue
+                    urls.add(format_url)
+                    format_id = mobj.group('format')
+                    quality_id = SITE_QUALITIES.get(format_id, format_id)
+                    formats.append({
+                        'url': format_url,
+                        'format_id': quality_id,
+                        'quality': quality(quality_id, format_url),
+                        'vcodec': 'none' if quality_id == 'mp3' else None,
+                    })
+
+            API_QUALITIES = {
+                'VideoMP4Low': 'mp4-low',
+                'VideoWMV': 'wmv-mid',
+                'VideoMP4Medium': 'mp4-mid',
+                'VideoMP4High': 'mp4-high',
+                'VideoWMVHQ': 'wmv-hq',
+            }
+
+            for format_id, q in API_QUALITIES.items():
+                q_url = content_data.get(format_id)
+                if not q_url or q_url in urls:
                     continue
+                urls.add(q_url)
                 formats.append({
-                    'format_id': q,
                     'url': q_url,
+                    'format_id': q,
+                    'quality': quality(q, q_url),
                 })
+
+            self._sort_formats(formats)
+
             slides = content_data.get('Slides')
             zip_file = content_data.get('ZipFile')