[newgrounds] Fix metadata extraction (closes #15531)

author Sergey M․ <dstftw@gmail.com>

Fri, 9 Feb 2018 14:17:02 +0000 (21:17 +0700)

committer Sergey M․ <dstftw@gmail.com>

Fri, 9 Feb 2018 14:17:02 +0000 (21:17 +0700)
author Sergey M․ <dstftw@gmail.com>
Fri, 9 Feb 2018 14:17:02 +0000 (21:17 +0700)
committer Sergey M․ <dstftw@gmail.com>
Fri, 9 Feb 2018 14:17:02 +0000 (21:17 +0700)
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py

index 0e26f8399dd8ea8777c28d0bb61483e27f954965..82e7cf52216ac431a09d4503caaf7cdb787cd876 100644 (file)
--- a/youtube_dl/extractor/newgrounds.py
+++ b/youtube_dl/extractor/newgrounds.py
@@ -87,19 +87,21 @@ class NewgroundsIE(InfoExtractor):
          self._check_formats(formats, media_id)
          self._sort_formats(formats)
  
-        uploader = self._search_regex(
-            r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
+        uploader = self._html_search_regex(
+            (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
+             r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
              fatal=False)
  
-        timestamp = unified_timestamp(self._search_regex(
-            r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
+        timestamp = unified_timestamp(self._html_search_regex(
+            (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
+             r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
              default=None))
          duration = parse_duration(self._search_regex(
-            r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
-            default=None))
+            r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
+            'duration', default=None))
  
          filesize_approx = parse_filesize(self._html_search_regex(
-            r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
+            r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
              default=None))
          if len(formats) == 1:
              formats[0]['filesize_approx'] = filesize_approx
author	Sergey M․ <dstftw@gmail.com>
	Fri, 9 Feb 2018 14:17:02 +0000 (21:17 +0700)
committer	Sergey M․ <dstftw@gmail.com>
	Fri, 9 Feb 2018 14:17:02 +0000 (21:17 +0700)