[crunchyroll] Improve series and season metadata extraction (closes #11832)
authorSergey M․ <dstftw@gmail.com>
Fri, 27 Jan 2017 16:55:55 +0000 (23:55 +0700)
committerSergey M․ <dstftw@gmail.com>
Fri, 27 Jan 2017 16:55:55 +0000 (23:55 +0700)
youtube_dl/extractor/crunchyroll.py

index 559044352705be55f14f94562f39a67edfdb157a..f811c7f33e4b56d59217565500bd47633b6dfc06 100644 (file)
@@ -166,6 +166,25 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # m3u8 download
             'skip_download': True,
         },
+    }, {
+        'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
+        'info_dict': {
+            'id': '727589',
+            'ext': 'mp4',
+            'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!",
+            'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'Kadokawa Pictures Inc.',
+            'upload_date': '20170118',
+            'series': "KONOSUBA -God's blessing on this wonderful world!",
+            'season_number': 2,
+            'episode': 'Give Me Deliverance from this Judicial Injustice!',
+            'episode_number': 1,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }, {
         'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
         'only_matching': True,
@@ -439,6 +458,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
         subtitles = self.extract_subtitles(video_id, webpage)
 
+        # webpage provide more accurate data than series_title from XML
+        series = self._html_search_regex(
+            r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
+            webpage, 'series', default=xpath_text(metadata, 'series_title'))
+
+        episode = xpath_text(metadata, 'episode_title')
+        episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+
+        season_number = int_or_none(self._search_regex(
+            r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)',
+            webpage, 'season number', default=None))
+
         return {
             'id': video_id,
             'title': video_title,
@@ -446,9 +477,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             'thumbnail': xpath_text(metadata, 'episode_image_url'),
             'uploader': video_uploader,
             'upload_date': video_upload_date,
-            'series': xpath_text(metadata, 'series_title'),
-            'episode': xpath_text(metadata, 'episode_title'),
-            'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
+            'series': series,
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
             'subtitles': subtitles,
             'formats': formats,
         }