Merge remote-tracking branch 'dstftw/generic-webpage-unescape'
[youtube-dl] / youtube_dl / extractor / generic.py
index 2254ade90e153eb25514a7765238bdc603135c48..23891325677e40165252788bba7a47e4cee4f91c 100644 (file)
@@ -185,7 +185,18 @@ class GenericIE(InfoExtractor):
                 'uploader': 'Ze Frank',
                 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
             }
-        }
+        },
+        # nowvideo embed hidden behind percent encoding
+        {
+            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+            'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+            'info_dict': {
+                'id': '06e53103ca9aa',
+                'ext': 'flv',
+                'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
+                'description': 'No description',
+            },
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -337,6 +348,11 @@ class GenericIE(InfoExtractor):
         except compat_xml_parse_error:
             pass
 
+        # Sometimes embedded video player is hidden behind percent encoding
+        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+        # Unescaping the whole page allows to handle those cases in a generic way
+        webpage = compat_urllib_parse.unquote(webpage)
+
         # it's tempting to parse this further, but you would
         # have to take into account all the variations like
         #   Video Title - Site Name