Merge remote-tracking branch 'origin/master'
authorPhilipp Hagemeister <phihag@phihag.de>
Fri, 4 Oct 2013 09:14:20 +0000 (11:14 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Fri, 4 Oct 2013 09:14:20 +0000 (11:14 +0200)
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/mtv.py
youtube_dl/extractor/ted.py

index bf8d711eea44c8d60855f458407391d66ef2664d..69b2beecebac319ef92e8043ab75ad71fad46a25 100644 (file)
@@ -51,12 +51,12 @@ class ComedyCentralIE(InfoExtractor):
         '400': 'mp4',
     }
     _video_dimensions = {
-        '3500': '1280x720',
-        '2200': '960x540',
-        '1700': '768x432',
-        '1200': '640x360',
-        '750': '512x288',
-        '400': '384x216',
+        '3500': (1280, 720),
+        '2200': (960, 540),
+        '1700': (768, 432),
+        '1200': (640, 360),
+        '750': (512, 288),
+        '400': (384, 216),
     }
 
     @classmethod
@@ -64,11 +64,13 @@ class ComedyCentralIE(InfoExtractor):
         """Receives a URL and returns True if suitable for this IE."""
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 
-    def _print_formats(self, formats):
-        print('Available formats:')
-        for x in formats:
-            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
-
+    @staticmethod
+    def _transform_rtmp_url(rtmp_video_url):
+        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+        if not m:
+            raise ExtractorError(u'Cannot transform RTMP url')
+        base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
+        return base + m.group('finalid')
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -155,40 +157,31 @@ class ComedyCentralIE(InfoExtractor):
                 self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
                 continue
 
-            if self._downloader.params.get('listformats', None):
-                self._print_formats([i[0] for i in turls])
-                return
-
-            # For now, just pick the highest bitrate
-            format,rtmp_video_url = turls[-1]
-
-            # Get the format arg from the arg stream
-            req_format = self._downloader.params.get('format', None)
-
-            # Select format if we can find one
-            for f,v in turls:
-                if f == req_format:
-                    format, rtmp_video_url = f, v
-                    break
-
-            m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
-            if not m:
-                raise ExtractorError(u'Cannot transform RTMP url')
-            base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
-            video_url = base + m.group('finalid')
+            formats = []
+            for format, rtmp_video_url in turls:
+                w, h = self._video_dimensions.get(format, (None, None))
+                formats.append({
+                    'url': self._transform_rtmp_url(rtmp_video_url),
+                    'ext': self._video_extensions.get(format, 'mp4'),
+                    'format_id': format,
+                    'height': h,
+                    'width': w,
+                })
 
             effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
             info = {
                 'id': shortMediaId,
-                'url': video_url,
+                'formats': formats,
                 'uploader': showId,
                 'upload_date': officialDate,
                 'title': effTitle,
-                'ext': 'mp4',
-                'format': format,
                 'thumbnail': None,
                 'description': compat_str(officialTitle),
             }
+
+            # TODO: Remove when #980 has been merged
+            info.update(info['formats'][-1])
+
             results.append(info)
 
         return results
index 77726ee2432fc2bcd6df6ce89dcc560419524051..69cdcdc1b5b75d1cac5733b34565f087c9dcddec 100644 (file)
@@ -35,6 +35,8 @@ class InfoExtractor(object):
     title:          Video title, unescaped.
     ext:            Video filename extension.
 
+    Instead of url and ext, formats can also specified.
+
     The following fields are optional:
 
     format:         The video format, defaults to ext (used for --get-format)
@@ -52,8 +54,19 @@ class InfoExtractor(object):
     view_count:     How many users have watched the video on the platform.
     urlhandle:      [internal] The urlHandle to be used to download the file,
                     like returned by urllib.request.urlopen
-
-    The fields should all be Unicode strings.
+    formats:        A list of dictionaries for each format available, it must
+                    be ordered from worst to best quality. Potential fields:
+                    * url       Mandatory. The URL of the video file
+                    * ext       Will be calculated from url if missing
+                    * format    A human-readable description of the format
+                                ("mp4 container with h264/opus").
+                                Calculated from width and height if missing.
+                    * format_id A short description of the format
+                                ("mp4_h264_opus" or "19")
+                    * width     Width of the video, if known
+                    * height    Height of the video, if known
+
+    Unless mentioned otherwise, the fields should be Unicode strings.
 
     Subclasses of this one should re-define the _real_initialize() and
     _real_extract() methods and define a _VALID_URL regexp.
index 8f956571d54dc4a42a4f3726642929e4b2497f13..001a576a8710fb45a7c3e4d4dfc9f634af39dc87 100644 (file)
@@ -54,23 +54,26 @@ class MTVIE(InfoExtractor):
     def _get_thumbnail_url(self, uri, itemdoc):
         return 'http://mtv.mtvnimages.com/uri/' + uri
 
-    def _extract_video_url(self, metadataXml):
+    def _extract_video_formats(self, metadataXml):
         if '/error_country_block.swf' in metadataXml:
             raise ExtractorError(u'This video is not available from your country.', expected=True)
         mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
         renditions = mdoc.findall('.//rendition')
 
-        # For now, always pick the highest quality.
-        rendition = renditions[-1]
-
-        try:
-            _,_,ext = rendition.attrib['type'].partition('/')
-            format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
-            rtmp_video_url = rendition.find('./src').text
-        except KeyError:
-            raise ExtractorError('Invalid rendition field.')
-        video_url = self._transform_rtmp_url(rtmp_video_url)
-        return {'ext': ext, 'url': video_url, 'format': format}
+        formats = []
+        for rendition in mdoc.findall('.//rendition'):
+            try:
+                _, _, ext = rendition.attrib['type'].partition('/')
+                rtmp_video_url = rendition.find('./src').text
+                formats.append({'ext': ext,
+                                'url': self._transform_rtmp_url(rtmp_video_url),
+                                'format_id': rendition.get('bitrate'),
+                                'width': int(rendition.get('width')),
+                                'height': int(rendition.get('height')),
+                                })
+            except (KeyError, TypeError):
+                raise ExtractorError('Invalid rendition field.')
+        return formats
 
     def _get_video_info(self, itemdoc):
         uri = itemdoc.find('guid').text
@@ -81,19 +84,25 @@ class MTVIE(InfoExtractor):
             mediagen_url += '&acceptMethods=fms'
         mediagen_page = self._download_webpage(mediagen_url, video_id,
                                                u'Downloading video urls')
-        video_info = self._extract_video_url(mediagen_page)
 
         description_node = itemdoc.find('description')
         if description_node is not None:
             description = description_node.text
         else:
             description = None
-        video_info.update({'title': itemdoc.find('title').text,
-                           'id': video_id,
-                           'thumbnail': self._get_thumbnail_url(uri, itemdoc),
-                           'description': description,
-                           })
-        return video_info
+
+        info = {
+            'title': itemdoc.find('title').text,
+            'formats': self._extract_video_formats(mediagen_page),
+            'id': video_id,
+            'thumbnail': self._get_thumbnail_url(uri, itemdoc),
+            'description': description,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info.update(info['formats'][-1])
+
+        return info
 
     def _get_videos_info(self, uri):
         video_id = self._id_from_uri(uri)
index 4c11f7a03c37136c0c80677e55b66598c647edeb..dfa1176a3e4e4eef333dcb829773c189bf9916ba 100644 (file)
@@ -77,12 +77,20 @@ class TEDIE(InfoExtractor):
         
         thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
                                        webpage, 'thumbnail')
+        formats = [{
+            'ext': 'mp4',
+            'url': stream['file'],
+            'format': stream['id']
+            } for stream in info['htmlStreams']]
         info = {
-                'id': info['id'],
-                'url': info['htmlStreams'][-1]['file'],
-                'ext': 'mp4',
-                'title': title,
-                'thumbnail': thumbnail,
-                'description': desc,
-                }
+            'id': info['id'],
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': desc,
+            'formats': formats,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info.update(info['formats'][-1])
+
         return info