[arte] Add support for embedded videos (Fixes #2620)
authorPhilipp Hagemeister <phihag@phihag.de>
Mon, 24 Mar 2014 21:01:47 +0000 (22:01 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Mon, 24 Mar 2014 21:01:47 +0000 (22:01 +0100)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/generic.py

index a7a339e7d7c398d953b74d42ba55f56017881e92..39b250b103a2401a3138e5abf8d4b01b2a3d0a93 100644 (file)
@@ -14,6 +14,7 @@ from .arte import (
     ArteTVConcertIE,
     ArteTVFutureIE,
     ArteTVDDCIE,
+    ArteTVEmbedIE,
 )
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
index 49fb64077c75628a2a2dffd8e2bdcb255ac52c68..257dc1f61f05b2701350004d817f0624cc2d32fc 100644 (file)
@@ -75,9 +75,7 @@ class ArteTVPlus7IE(InfoExtractor):
         return self._extract_from_json_url(json_url, video_id, lang)
 
     def _extract_from_json_url(self, json_url, video_id, lang):
-        json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
-        self.report_extraction(video_id)
-        info = json.loads(json_info)
+        info = self._download_json(json_url, video_id)
         player_info = info['videoJsonPlayer']
 
         info_dict = {
@@ -99,6 +97,8 @@ class ArteTVPlus7IE(InfoExtractor):
                 l = 'F'
             elif lang == 'de':
                 l = 'A'
+            else:
+                l = lang
             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
             return any(re.match(r, f['versionCode']) for r in regexes)
         # Some formats may not be in the same language as the url
@@ -228,3 +228,22 @@ class ArteTVConcertIE(ArteTVPlus7IE):
             'description': 'md5:486eb08f991552ade77439fe6d82c305',
         },
     }
+
+
+class ArteTVEmbedIE(ArteTVPlus7IE):
+    IE_NAME = 'arte.tv:embed'
+    _VALID_URL = r'''(?x)
+        http://www\.arte\.tv
+        /playerv2/embed\.php\?json_url=
+        (?P<json_url>
+            http://arte\.tv/papi/tvguide/videos/stream/player/
+            (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
+        )
+    '''
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        lang = mobj.group('lang')
+        json_url = mobj.group('json_url')
+        return self._extract_from_json_url(json_url, video_id, lang)
index 4d649fe717c2afa9ff2040b0c0b6cd62ce4285bb..e7ee318773ccbfddca619753b2b46c784c5ebbb7 100644 (file)
@@ -197,6 +197,21 @@ class GenericIE(InfoExtractor):
                 'description': 'No description',
             },
         },
+        # arte embed
+        {
+            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
+            'md5': '7653032cbb25bf6c80d80f217055fa43',
+            'info_dict': {
+                'id': '048195-004_PLUS7-F',
+                'ext': 'flv',
+                'title': 'X:enius',
+                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
+                'upload_date': '20140320',
+            },
+            'params': {
+                'skip_download': 'Requires rtmpdump'
+            }
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -525,6 +540,13 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'TED')
 
+        # Look for embedded arte.tv player
+        mobj = re.search(
+            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if mobj is None: