[ard:mediathek] Fix title and description extraction (closes #18349)
authorAlexander Seiler <seileralex@gmail.com>
Thu, 6 Dec 2018 20:41:02 +0000 (21:41 +0100)
committerSergey M <dstftw@gmail.com>
Thu, 6 Dec 2018 20:41:02 +0000 (03:41 +0700)
youtube_dl/extractor/ard.py

index 6bf8f61eb03c0994c87651af51ecb58cd3065060..84e96f76984224945d6ff8af93302e69b78dca02 100644 (file)
@@ -173,13 +173,18 @@ class ARDMediathekIE(InfoExtractor):
         title = self._html_search_regex(
             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
              r'<meta name="dcterms\.title" content="(.*?)"/>',
-             r'<h4 class="headline">(.*?)</h4>'],
+             r'<h4 class="headline">(.*?)</h4>',
+             r'<title[^>]*>(.*?)</title>'],
             webpage, 'title')
         description = self._html_search_meta(
             'dcterms.abstract', webpage, 'description', default=None)
         if description is None:
             description = self._html_search_meta(
-                'description', webpage, 'meta description')
+                'description', webpage, 'meta description', default=None)
+        if description is None:
+            description = self._html_search_regex(
+                r'<p\s+class="teasertext">(.+?)</p>',
+                webpage, 'teaser text', default=None)
 
         # Thumbnail is sometimes not present.
         # It is in the mobile version, but that seems to use a different URL