[raiplay:playlist] Add extractor
authorTimendum <timedum@gmail.com>
Mon, 23 Oct 2017 13:32:45 +0000 (15:32 +0200)
committerSergey M․ <dstftw@gmail.com>
Fri, 8 Dec 2017 17:47:40 +0000 (00:47 +0700)
youtube_dl/extractor/extractors.py
youtube_dl/extractor/rai.py

index 9c9739ad22197d4a0e29765a33aed1dfc2baa552..d8f9f94ccba1fbaea7aa6ef84c85e0233795f37b 100644 (file)
@@ -857,6 +857,7 @@ from .rai import (
     RaiPlayIE,
     RaiPlayLiveIE,
     RaiIE,
+    RaiPlaylistIE,
 )
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
index 5bf64a56b71d6e0d4282d69476410e1108555877..62545838086bded13bec2daa937e3ed767163ac4 100644 (file)
@@ -455,3 +455,29 @@ class RaiIE(RaiBaseIE):
         info.update(relinker_info)
 
         return info
+
+
+class RaiPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
+        'info_dict': {
+            'id': 'nondirloalmiocapo',
+            'title': 'Non dirlo al mio capo',
+        },
+        'playlist_mincount': 12,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+        title = self._html_search_meta('programma', webpage, default=None)
+        video_urls = re.findall(' href="(/raiplay/video.+)"', webpage)
+        video_urls = [urljoin(url, video_url) for video_url in video_urls]
+        entries = [
+            self.url_result(
+                video_url,
+                RaiPlayIE.ie_key())
+            for video_url in video_urls if RaiPlayIE.suitable(video_url)
+        ]
+        return self.playlist_result(entries, playlist_id, title)