Merge branch 'FounderSG-Weiqitv'
authorYen Chi Hsuan <yan12125@gmail.com>
Tue, 19 Jan 2016 20:06:09 +0000 (04:06 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Tue, 19 Jan 2016 20:06:09 +0000 (04:06 +0800)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/digiteka.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/lemonde.py [new file with mode: 0644]
youtube_dl/extractor/ultimedia.py [deleted file]
youtube_dl/extractor/viewster.py

index 5ec017d5daa716e12de58fe8f98fc92b517f36ba..bab3d7b46d4fc5efe3c1bd40421b2325fd740376 100644 (file)
@@ -333,6 +333,7 @@ from .kuwo import (
 from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
 from .lecture2go import Lecture2GoIE
+from .lemonde import LemondeIE
 from .letv import (
     LetvIE,
     LetvTvIE,
@@ -778,7 +779,7 @@ from .udemy import (
     UdemyCourseIE
 )
 from .udn import UDNEmbedIE
-from .ultimedia import UltimediaIE
+from .digiteka import DigitekaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
 from .ustream import UstreamIE, UstreamChannelIE
diff --git a/youtube_dl/extractor/digiteka.py b/youtube_dl/extractor/digiteka.py
new file mode 100644 (file)
index 0000000..7bb79ff
--- /dev/null
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DigitekaIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
+        (?:
+            deliver/
+            (?P<embed_type>
+                generic|
+                musique
+            )
+            (?:/[^/]+)*/
+            (?:
+                src|
+                article
+            )|
+            default/index/video
+            (?P<site_type>
+                generic|
+                music
+            )
+            /id
+        )/(?P<id>[\d+a-z]+)'''
+    _TESTS = [{
+        # news
+        'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+        'md5': '276a0e49de58c7e85d32b057837952a2',
+        'info_dict': {
+            'id': 's8uk0r',
+            'ext': 'mp4',
+            'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 74,
+            'upload_date': '20150317',
+            'timestamp': 1426604939,
+            'uploader_id': '3fszv',
+        },
+    }, {
+        # music
+        'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+        'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+        'info_dict': {
+            'id': 'xvpfp8',
+            'ext': 'mp4',
+            'title': 'Two - C\'est La Vie (clip)',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 233,
+            'upload_date': '20150224',
+            'timestamp': 1424760500,
+            'uploader_id': '3rfzk',
+        },
+    }, {
+        'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        video_type = mobj.group('embed_type') or mobj.group('site_type')
+        if video_type == 'music':
+            video_type = 'musique'
+
+        deliver_info = self._download_json(
+            'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
+            video_id)
+
+        yt_id = deliver_info.get('yt_id')
+        if yt_id:
+            return self.url_result(yt_id, 'Youtube')
+
+        jwconf = deliver_info['jwconf']
+
+        formats = []
+        for source in jwconf['playlist'][0]['sources']:
+            formats.append({
+                'url': source['file'],
+                'format_id': source.get('label'),
+            })
+
+        self._sort_formats(formats)
+
+        title = deliver_info['title']
+        thumbnail = jwconf.get('image')
+        duration = int_or_none(deliver_info.get('duration'))
+        timestamp = int_or_none(deliver_info.get('release_time'))
+        uploader_id = deliver_info.get('owner_id')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'uploader_id': uploader_id,
+            'formats': formats,
+        }
index 0baa17e8d80024c1c2f6fed25230a0c99469247b..26d3698c8b1e99a6899037a0a514e0a6480b2293 100644 (file)
@@ -57,7 +57,7 @@ from .pladform import PladformIE
 from .videomore import VideomoreIE
 from .googledrive import GoogleDriveIE
 from .jwplatform import JWPlatformIE
-from .ultimedia import UltimediaIE
+from .digiteka import DigitekaIE
 
 
 class GenericIE(InfoExtractor):
@@ -1814,10 +1814,10 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
 
-        # Look for Ulltimedia embeds
-        ultimedia_url = UltimediaIE._extract_url(webpage)
-        if ultimedia_url:
-            return self.url_result(self._proto_relative_url(ultimedia_url), 'Ultimedia')
+        # Look for Digiteka embeds
+        digiteka_url = DigitekaIE._extract_url(webpage)
+        if digiteka_url:
+            return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
 
         # Look for AdobeTVVideo embeds
         mobj = re.search(
diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dl/extractor/lemonde.py
new file mode 100644 (file)
index 0000000..be66fff
--- /dev/null
@@ -0,0 +1,34 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LemondeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
+        'md5': '01fb3c92de4c12c573343d63e163d302',
+        'info_dict': {
+            'id': 'lqm3kl',
+            'ext': 'mp4',
+            'title': "Comprendre l'affaire Bygmalion en 5 minutes",
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 320,
+            'upload_date': '20160119',
+            'timestamp': 1453194778,
+            'uploader_id': '3pmkp',
+        },
+    }, {
+        'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        digiteka_url = self._proto_relative_url(self._search_regex(
+            r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
+            webpage, 'digiteka url', group='url'))
+        return self.url_result(digiteka_url, 'Digiteka')
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
deleted file mode 100644 (file)
index 6032812..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class UltimediaIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-        https?://(?:www\.)?ultimedia\.com/
-        (?:
-            deliver/
-            (?P<embed_type>
-                generic|
-                musique
-            )
-            (?:/[^/]+)*/
-            (?:
-                src|
-                article
-            )|
-            default/index/video
-            (?P<site_type>
-                generic|
-                music
-            )
-            /id
-        )/(?P<id>[\d+a-z]+)'''
-    _TESTS = [{
-        # news
-        'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
-        'md5': '276a0e49de58c7e85d32b057837952a2',
-        'info_dict': {
-            'id': 's8uk0r',
-            'ext': 'mp4',
-            'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
-            'thumbnail': 're:^https?://.*\.jpg',
-            'duration': 74,
-            'upload_date': '20150317',
-            'timestamp': 1426604939,
-            'uploader_id': '3fszv',
-        },
-    }, {
-        # music
-        'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
-        'md5': '2ea3513813cf230605c7e2ffe7eca61c',
-        'info_dict': {
-            'id': 'xvpfp8',
-            'ext': 'mp4',
-            'title': 'Two - C\'est La Vie (clip)',
-            'thumbnail': 're:^https?://.*\.jpg',
-            'duration': 233,
-            'upload_date': '20150224',
-            'timestamp': 1424760500,
-            'uploader_id': '3rfzk',
-        },
-    }]
-
-    @staticmethod
-    def _extract_url(webpage):
-        mobj = re.search(
-            r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
-            webpage)
-        if mobj:
-            return mobj.group('url')
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        video_type = mobj.group('embed_type') or mobj.group('site_type')
-        if video_type == 'music':
-            video_type = 'musique'
-
-        deliver_info = self._download_json(
-            'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
-            video_id)
-
-        yt_id = deliver_info.get('yt_id')
-        if yt_id:
-            return self.url_result(yt_id, 'Youtube')
-
-        jwconf = deliver_info['jwconf']
-
-        formats = []
-        for source in jwconf['playlist'][0]['sources']:
-            formats.append({
-                'url': source['file'],
-                'format_id': source.get('label'),
-            })
-
-        self._sort_formats(formats)
-
-        title = deliver_info['title']
-        thumbnail = jwconf.get('image')
-        duration = int_or_none(deliver_info.get('duration'))
-        timestamp = int_or_none(deliver_info.get('release_time'))
-        uploader_id = deliver_info.get('owner_id')
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'timestamp': timestamp,
-            'uploader_id': uploader_id,
-            'formats': formats,
-        }
index 185b1c1194f72122c99b26abd7ffb32d41934388..fe94a479339035dfd9a70386e903e5c3770fdfea 100644 (file)
@@ -155,10 +155,10 @@ class ViewsterIE(InfoExtractor):
 
         self._sort_formats(formats)
 
-        synopsis = info.get('Synopsis', {})
+        synopsis = info.get('Synopsis') or {}
         # Prefer title outside synopsis since it's less messy
         title = (info.get('Title') or synopsis['Title']).strip()
-        description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short')
+        description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
         duration = int_or_none(info.get('Duration'))
         timestamp = parse_iso8601(info.get('ReleaseDate'))