[wdr:elefant] Add extractor

author Sebastian Leske <sebastian.leske@sleske.name>

Wed, 25 Oct 2017 12:59:57 +0000 (14:59 +0200)

committer Sergey M․ <dstftw@gmail.com>

Sat, 13 Jan 2018 16:29:36 +0000 (23:29 +0700)
author Sebastian Leske <sebastian.leske@sleske.name>
Wed, 25 Oct 2017 12:59:57 +0000 (14:59 +0200)
committer Sergey M․ <dstftw@gmail.com>
Sat, 13 Jan 2018 16:29:36 +0000 (23:29 +0700)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py

index 37624d37a13d9403fa876494601651040aa3466d..255df75fe5b4badb5202fd472d79d291a8630ab8 100644 (file)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1289,6 +1289,7 @@ from .watchbox import WatchBoxIE
  from .watchindianporn import WatchIndianPornIE
  from .wdr import (
      WDRIE,
+    WDRElefantIE,
      WDRMobileIE,
  )
  from .webcaster import (
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py

index 621de1e1efb73a9a377a46fe0fa702e595c3cde5..4871ae92bd20d3e15cac4540df17283719f86e16 100644 (file)
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -16,7 +16,7 @@ from ..utils import (
  
  
  class WDRBaseIE(InfoExtractor):
-    def _extract_wdr_video(self, webpage, display_id):
+    def _extract_jsonp_url(self, webpage, display_id):
          # for wdr.de the data-extension is in a tag with the class "mediaLink"
          # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn"
          # for wdrmaus, in a tag with the class "videoButton" (previously a link
@@ -35,8 +35,9 @@ class WDRBaseIE(InfoExtractor):
  
          media_link_obj = self._parse_json(json_metadata, display_id,
                                            transform_source=js_to_json)
-        jsonp_url = media_link_obj['mediaObj']['url']
+        return media_link_obj['mediaObj']['url']
  
+    def _extract_wdr_video(self, jsonp_url, display_id):
          metadata = self._download_json(
              jsonp_url, display_id, transform_source=strip_jsonp)
  
@@ -206,7 +207,8 @@ class WDRIE(WDRBaseIE):
          display_id = mobj.group('display_id')
          webpage = self._download_webpage(url, display_id)
  
-        info_dict = self._extract_wdr_video(webpage, display_id)
+        jsonp_url = self._extract_jsonp_url(webpage, display_id)
+        info_dict = self._extract_wdr_video(jsonp_url, display_id)
  
          if not info_dict:
              entries = [
@@ -239,6 +241,52 @@ class WDRIE(WDRBaseIE):
          return info_dict
  
  
+class WDRElefantIE(WDRBaseIE):
+    _VALID_URL = r'https?://(?:www\.)wdrmaus.de/elefantenseite/#(?P<display_id>.+)'
+    IE_NAME = 'wdr:elefant'
+
+    _TESTS = [
+        {
+            'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
+            'info_dict': {
+                'title': 'Folge Oster-Spezial 2015',
+                'id': 'mdb-1088195',
+                'ext': 'mp4',
+                'age_limit': None,
+                'upload_date': '20150406'
+            },
+            'params': {
+                'skip_download' : True,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+
+        # Table of Contents seems to always be at this address, so fetch it directly.
+        # The website fetches configurationJS.php5, which links to tableOfContentsJS.php5.
+        table_of_contents = self._download_json(
+            'https://www.wdrmaus.de/elefantenseite/data/tableOfContentsJS.php5', display_id)
+        if display_id not in table_of_contents:
+            raise ExtractorError(
+                'No entry in site\'s table of contents for this URL. '
+                'Is the fragment part of the URL (after the #) correct?',
+                expected=True)
+        xml_metadata_path = table_of_contents[display_id]['xmlPath']
+        xml_metadata = self._download_xml(
+            'https://www.wdrmaus.de/elefantenseite/' + xml_metadata_path, display_id)
+        zmdb_url_element = xml_metadata.find('./movie/zmdb_url')
+        if zmdb_url_element is None:
+            raise ExtractorError(
+                'The URL looks valid, but no video was found. Note that download only works '
+                'on pages showing a single video, not on video selection pages.',
+                expected=True)
+        info_dict = self._extract_wdr_video(zmdb_url_element.text, display_id)
+        return info_dict
+
+
  class WDRMobileIE(InfoExtractor):
      _VALID_URL = r'''(?x)
          https?://mobile-ondemand\.wdr\.de/
author	Sebastian Leske <sebastian.leske@sleske.name>
	Wed, 25 Oct 2017 12:59:57 +0000 (14:59 +0200)
committer	Sergey M․ <dstftw@gmail.com>
	Sat, 13 Jan 2018 16:29:36 +0000 (23:29 +0700)
youtube_dl/extractor/extractors.py		patch \| blob \| history
youtube_dl/extractor/wdr.py		patch \| blob \| history