Merge remote-tracking branch 'olebowle/ard'
[youtube-dl] / youtube_dl / extractor / crunchyroll.py
index e3057d90036575b8ef4dad2f8605ee44e0c9c558..05b21e8725f734750a02d9b24beadabf5db6c02f 100644 (file)
@@ -24,6 +24,7 @@ from ..aes import (
     aes_cbc_decrypt,
     inc,
 )
+from .common import InfoExtractor
 
 
 class CrunchyrollIE(SubtitlesInfoExtractor):
@@ -288,3 +289,40 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             'subtitles':   subtitles,
             'formats':     formats,
         }
+
+
+class CrunchyrollShowPlaylistIE(InfoExtractor):
+    IE_NAME = "crunchyroll:playlist"
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$'
+
+    _TESTS = [{
+        'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+        'info_dict': {
+            'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+            'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
+        },
+        'playlist_count': 13,
+    }]
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, show_id)
+        title = self._html_search_regex(
+            r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
+            webpage, 'title')
+        episode_paths = re.findall(
+            r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"',
+            webpage)
+        entries = [
+            self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll')
+            for ep in episode_paths
+        ]
+        entries.reverse()
+
+        return {
+            '_type': 'playlist',
+            'id': show_id,
+            'title': title,
+            'entries': entries,
+        }