[usatoday] Add new extractor(closes #8655)
authorremitamine <remitamine@gmail.com>
Sun, 13 Mar 2016 21:36:15 +0000 (22:36 +0100)
committerremitamine <remitamine@gmail.com>
Sun, 13 Mar 2016 21:36:15 +0000 (22:36 +0100)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/usatoday.py [new file with mode: 0644]

index c5b80f4aadc423765b6bb3a4ef63289849642655..7874e7699b7092e3e2bfb2a7a2ce37ce37a4972b 100644 (file)
@@ -821,6 +821,7 @@ from .udn import UDNEmbedIE
 from .digiteka import DigitekaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
 from .digiteka import DigitekaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
+from .usatoday import USATodayIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .ustudio import UstudioIE
 from .varzesh3 import Varzesh3IE
 from .ustream import UstreamIE, UstreamChannelIE
 from .ustudio import UstudioIE
 from .varzesh3 import Varzesh3IE
diff --git a/youtube_dl/extractor/usatoday.py b/youtube_dl/extractor/usatoday.py
new file mode 100644 (file)
index 0000000..e5678dc
--- /dev/null
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    get_element_by_attribute,
+    parse_duration,
+    update_url_query,
+    ExtractorError,
+)
+from ..compat import compat_str
+
+
+class USATodayIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)'
+    _TEST = {
+        'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/',
+        'md5': '4d40974481fa3475f8bccfd20c5361f8',
+        'info_dict': {
+            'id': '81729424',
+            'ext': 'mp4',
+            'title': 'US, France warn Syrian regime ahead of new peace talks',
+            'timestamp': 1457891045,
+            'description': 'md5:7e50464fdf2126b0f533748d3c78d58f',
+            'uploader_id': '29906170001',
+            'upload_date': '20160313',
+        }
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(update_url_query(url, {'ajax': 'true'}), display_id)
+        ui_video_data = get_element_by_attribute('class', 'ui-video-data', webpage)
+        if not ui_video_data:
+            raise ExtractorError('no video on the webpage', expected=True)
+        video_data = self._parse_json(ui_video_data, display_id)
+
+        return {
+            '_type': 'url_transparent',
+            'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'],
+            'id': compat_str(video_data['id']),
+            'title': video_data['title'],
+            'thumbnail': video_data.get('thumbnail'),
+            'description': video_data.get('description'),
+            'duration': parse_duration(video_data.get('length')),
+            'ie_key': 'BrightcoveNew',
+        }