[pokemon] Relax _VALID_URL and extend title extraction (closes #15518)
authorSergey M․ <dstftw@gmail.com>
Wed, 7 Feb 2018 20:58:35 +0000 (03:58 +0700)
committerSergey M․ <dstftw@gmail.com>
Wed, 7 Feb 2018 20:58:35 +0000 (03:58 +0700)
youtube_dl/extractor/pokemon.py

index 2d87e7e70896857f1cd45fb52052fd4f81ec9bc9..4ff617163fb7711a4b83d169a6fb3c00f7e004e7 100644 (file)
@@ -11,19 +11,34 @@ from ..utils import (
 
 
 class PokemonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))'
+    _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
     _TESTS = [{
-        'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true',
-        'md5': '9fb209ae3a569aac25de0f5afc4ee08f',
+        'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
+        'md5': '2fe8eaec69768b25ef898cda9c43062e',
         'info_dict': {
-            'id': 'd0436c00c3ce4071ac6cee8130ac54a1',
+            'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
             'ext': 'mp4',
-            'title': 'From A to Z!',
-            'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!',
-            'timestamp': 1460478136,
-            'upload_date': '20160412',
+            'title': 'The Ol’ Raise and Switch!',
+            'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
+            'timestamp': 1511824728,
+            'upload_date': '20171127',
+        },
+        'add_id': ['LimelightMedia'],
+    }, {
+        # no data-video-title
+        'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
+        'info_dict': {
+            'id': '99f3bae270bf4e5097274817239ce9c8',
+            'ext': 'mp4',
+            'title': 'Pokémon: The Rise of Darkrai',
+            'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
+            'timestamp': 1417778347,
+            'upload_date': '20141205',
+        },
+        'add_id': ['LimelightMedia'],
+        'params': {
+            'skip_download': True,
         },
-        'add_id': ['LimelightMedia']
     }, {
         'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
         'only_matching': True,
@@ -42,7 +57,9 @@ class PokemonIE(InfoExtractor):
             r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
             webpage, 'video data element'))
         video_id = video_data['data-video-id']
-        title = video_data['data-video-title']
+        title = video_data.get('data-video-title') or self._html_search_meta(
+            'pkm-title', webpage,' title', default=None) or self._search_regex(
+            r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
         return {
             '_type': 'url_transparent',
             'id': video_id,