[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / dispeak.py
index 6ebc3255a1d567f6aebd3b38fea25c2d4a499271..c345e0274192383790f32eeb1db1f3e013870238 100644 (file)
@@ -12,11 +12,11 @@ from ..utils import (
 )
 
 
-class DigitalSpeakingIE(InfoExtractor):
-    _VALID_URL = r'http://evt.dispeak.com/([^/]+/)+xml/(?P<id>[^.]+).xml'
+class DigitallySpeakingIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
 
-    _TEST = {
-        # From http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml
+    _TESTS = [{
+        # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
         'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
         'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
         'info_dict': {
@@ -24,7 +24,15 @@ class DigitalSpeakingIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Tenacious Design and The Interface of \'Destiny\'',
         },
-    }
+    }, {
+        # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
+        'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
+        'only_matching': True,
+    }, {
+        # From http://www.gdcvault.com/play/1013700/Advanced-Material
+        'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
+        'only_matching': True,
+    }]
 
     def _parse_mp4(self, metadata):
         video_formats = []
@@ -50,26 +58,32 @@ class DigitalSpeakingIE(InfoExtractor):
             stream_name = xpath_text(a_format, 'streamName', fatal=True)
             video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
             url = video_root + video_path
-            vbr = xpath_text(a_format, 'bitrate')
+            bitrate = xpath_text(a_format, 'bitrate')
+            tbr = int_or_none(bitrate)
+            vbr = int_or_none(self._search_regex(
+                r'-(\d+)\.mp4', video_path, 'vbr', default=None))
+            abr = tbr - vbr if tbr and vbr else None
             video_formats.append({
+                'format_id': bitrate,
                 'url': url,
-                'vbr': int_or_none(vbr),
+                'tbr': tbr,
+                'vbr': vbr,
+                'abr': abr,
             })
         return video_formats
 
     def _parse_flv(self, metadata):
         formats = []
         akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
-        audios = metadata.find('./audios')
-        if audios is not None:
-            for audio in audios:
-                formats.append({
-                    'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-                    'play_path': remove_end(audio.get('url'), '.flv'),
-                    'ext': 'flv',
-                    'vcodec': 'none',
-                    'format_id': audio.get('code'),
-                })
+        audios = metadata.findall('./audios/audio')
+        for audio in audios:
+            formats.append({
+                'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+                'play_path': remove_end(audio.get('url'), '.flv'),
+                'ext': 'flv',
+                'vcodec': 'none',
+                'format_id': audio.get('code'),
+            })
         slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
         formats.append({
             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,