Merge branch 'brightcove_in_page_embed' of https://github.com/remitamine/youtube...
[youtube-dl] / youtube_dl / extractor / brightcove.py
index a07c0888f110f3dc9ca31f665ce89cb9793e1cc8..2c7d968a848a2c9d4dbad8960aeac2e6f0b55cfd 100644 (file)
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
 
 import re
 import json
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
     compat_parse_qs,
     compat_str,
     compat_urllib_parse,
@@ -123,7 +123,7 @@ class BrightcoveIE(InfoExtractor):
         object_str = fix_xml_ampersands(object_str)
 
         try:
-            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+            object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
         except compat_xml_parse_error:
             return
 
@@ -354,14 +354,18 @@ class BrightcoveIE(InfoExtractor):
 
 class BrightcoveInPageEmbedIE(InfoExtractor):
     _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/([a-z0-9-]+)_([a-z]+)/index.html?.*videoId=(?P<video_id>\d+)'
-    TEST = {
+    _TEST = {
         'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
+        'md5': 'c8100925723840d4b0d243f7025703be',
         'info_dict': {
             'id': '4463358922001',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Meet the man behind Popcorn Time',
-            'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
+            'description': 'md5:eac376a4fe366edc70279bfb681aea16',
+            'timestamp': 1441391203,
+            'upload_date': '20150904',
             'duration': 165768,
+            'uploader_id': '929656772001',
         }
     }
 
@@ -379,8 +383,7 @@ class BrightcoveInPageEmbedIE(InfoExtractor):
         return None
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        account_id, player_id, embed, video_id = mobj.groups()
+        account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
 
         webpage = self._download_webpage('http://players.brightcove.net/%s/%s_%s/index.min.js' % (account_id, player_id, embed), video_id)
 
@@ -403,7 +406,7 @@ class BrightcoveInPageEmbedIE(InfoExtractor):
 
         title = json_data['name']
         description = json_data.get('description')
-        thumbnail = json_data.get('name')
+        thumbnail = json_data.get('thumbnail')
         timestamp = parse_iso8601(json_data.get('published_at'))
         duration = int_or_none(json_data.get('duration'))
 
@@ -413,19 +416,18 @@ class BrightcoveInPageEmbedIE(InfoExtractor):
             if source_type == 'application/x-mpegURL':
                 formats.extend(self._extract_m3u8_formats(source.get('src'), video_id))
             else:
-                src = source.get('src')
+                src = source.get('src') or source.get('streaming_src')
                 if src:
                     formats.append({
                         'url': src,
-                        'abr': source.get('avg_bitrate'),
+                        'tbr': source.get('avg_bitrate'),
                         'width': int_or_none(source.get('width')),
                         'height': int_or_none(source.get('height')),
                         'filesize': source.get('size'),
                         'container': source.get('container'),
-                        'vcodec': source.get('container'),
+                        'vcodec': source.get('codec'),
+                        'ext': source.get('container').lower(),
                     })
-                else:
-                    formats.extend(self._extract_f4m_formats(source.get('streaming_src'), video_id))
 
         self._sort_formats(formats)
 
@@ -437,4 +439,5 @@ class BrightcoveInPageEmbedIE(InfoExtractor):
             'timestamp': timestamp,
             'duration': duration,
             'formats': formats,
+            'uploader_id': account_id,
         }