[rds] extract 9c9media formats

author Remita Amine <remitamine@gmail.com>

Thu, 30 Jun 2016 17:14:23 +0000 (18:14 +0100)

committer Remita Amine <remitamine@gmail.com>

Thu, 30 Jun 2016 17:22:35 +0000 (18:22 +0100)
author Remita Amine <remitamine@gmail.com>
Thu, 30 Jun 2016 17:14:23 +0000 (18:14 +0100)
committer Remita Amine <remitamine@gmail.com>
Thu, 30 Jun 2016 17:22:35 +0000 (18:22 +0100)
diff --git a/youtube_dl/extractor/rds.py b/youtube_dl/extractor/rds.py

index 796adfdf9dab7f07481328026bb21591a7aa5612..bf200ea4d3f8b17f171bcce01c930b5d183fcc2e 100644 (file)
--- a/youtube_dl/extractor/rds.py
+++ b/youtube_dl/extractor/rds.py
@@ -1,23 +1,23 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import (
      parse_duration,
      parse_iso8601,
+    js_to_json,
  )
+from ..compat import compat_str
  
  
  class RDSIE(InfoExtractor):
      IE_DESC = 'RDS.ca'
-    _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
+    _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
  
      _TESTS = [{
          'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
          'info_dict': {
-            'id': '3.1132799',
+            'id': '604333',
              'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
              'ext': 'mp4',
              'title': 'Fowler Jr. prend la direction de Jacksonville',
@@ -33,22 +33,17 @@ class RDSIE(InfoExtractor):
      }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id = self._match_id(url)
  
          webpage = self._download_webpage(url, display_id)
  
-        # TODO: extract f4m from 9c9media.com
-        video_url = self._search_regex(
-            r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
-            webpage, 'video url')
-
-        title = self._og_search_title(webpage) or self._html_search_meta(
+        item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
+        video_id = compat_str(item['id'])
+        title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
              'title', webpage, 'title', fatal=True)
          description = self._og_search_description(webpage) or self._html_search_meta(
              'description', webpage, 'description')
-        thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
+        thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
              [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
               r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
              webpage, 'thumbnail', fatal=False)
@@ -61,13 +56,15 @@ class RDSIE(InfoExtractor):
          age_limit = self._family_friendly_search(webpage)
  
          return {
+            '_type': 'url_transparent',
              'id': video_id,
              'display_id': display_id,
-            'url': video_url,
+            'url': '9c9media:rds_web:%s' % video_id,
              'title': title,
              'description': description,
              'thumbnail': thumbnail,
              'timestamp': timestamp,
              'duration': duration,
              'age_limit': age_limit,
+            'ie_key': 'NineCNineMedia',
          }
author	Remita Amine <remitamine@gmail.com>
	Thu, 30 Jun 2016 17:14:23 +0000 (18:14 +0100)
committer	Remita Amine <remitamine@gmail.com>
	Thu, 30 Jun 2016 17:22:35 +0000 (18:22 +0100)