[youtube] Fix categories and improve tags extraction

[youtube-dl] / youtube_dl / extractor / drtuber.py
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py

index c88b3126b1676f11ee3696a2499e1f7a0a57d8b3..2baea585bf6b8882d5816052ca734b1f9dbc024c 100644 (file)
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -4,7 +4,9 @@ import re
  
  from .common import InfoExtractor
  from ..utils import (
  
  from .common import InfoExtractor
  from ..utils import (
+    int_or_none,
      NO_DEFAULT,
      NO_DEFAULT,
+    parse_duration,
      str_to_int,
  )
  
      str_to_int,
  )
  
@@ -65,8 +67,13 @@ class DrTuberIE(InfoExtractor):
                  })
          self._sort_formats(formats)
  
                  })
          self._sort_formats(formats)
  
+        duration = int_or_none(video_data.get('duration')) or parse_duration(
+            video_data.get('duration_format'))
+
          title = self._html_search_regex(
          title = self._html_search_regex(
-            (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
+            (r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
+             r'<title>([^<]+)\s*@\s+DrTuber',
+             r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
               r'<p[^>]+class="title_substrate">([^<]+)</p>',
               r'<title>([^<]+) - \d+'),
              webpage, 'title')
               r'<p[^>]+class="title_substrate">([^<]+)</p>',
               r'<title>([^<]+) - \d+'),
              webpage, 'title')
@@ -101,4 +108,5 @@ class DrTuberIE(InfoExtractor):
              'comment_count': comment_count,
              'categories': categories,
              'age_limit': self._rta_search(webpage),
              'comment_count': comment_count,
              'categories': categories,
              'age_limit': self._rta_search(webpage),
+            'duration': duration,
          }
          }