[newgrounds] Fix metadata extraction (closes #15531)
authorSergey M․ <dstftw@gmail.com>
Fri, 9 Feb 2018 14:17:02 +0000 (21:17 +0700)
committerSergey M․ <dstftw@gmail.com>
Fri, 9 Feb 2018 14:17:02 +0000 (21:17 +0700)
youtube_dl/extractor/newgrounds.py

index 0e26f8399dd8ea8777c28d0bb61483e27f954965..82e7cf52216ac431a09d4503caaf7cdb787cd876 100644 (file)
@@ -87,19 +87,21 @@ class NewgroundsIE(InfoExtractor):
         self._check_formats(formats, media_id)
         self._sort_formats(formats)
 
-        uploader = self._search_regex(
-            r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
+        uploader = self._html_search_regex(
+            (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
+             r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
             fatal=False)
 
-        timestamp = unified_timestamp(self._search_regex(
-            r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
+        timestamp = unified_timestamp(self._html_search_regex(
+            (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
+             r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
             default=None))
         duration = parse_duration(self._search_regex(
-            r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
-            default=None))
+            r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
+            'duration', default=None))
 
         filesize_approx = parse_filesize(self._html_search_regex(
-            r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
+            r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
             default=None))
         if len(formats) == 1:
             formats[0]['filesize_approx'] = filesize_approx