[motherless] Detect non-existing videos
[youtube-dl] / youtube_dl / extractor / motherless.py
index 3621ff99e76da1bffabda1a81f6181fd4a6ed61c..30e686a4e175ad258a5efde8a60dacb85fb24363 100644 (file)
@@ -5,6 +5,7 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    ExtractorError,
     str_to_int,
     unified_strdate,
 )
@@ -54,6 +55,11 @@ class MotherlessIE(InfoExtractor):
                 'thumbnail': 're:http://.*\.jpg',
                 'age_limit': 18,
             }
+        },
+        {
+            # no keywords
+            'url': 'http://motherless.com/8B4BBC1',
+            'only_matching': True,
         }
     ]
 
@@ -61,6 +67,11 @@ class MotherlessIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        if any(p in webpage for p in (
+                '<title>404 - MOTHERLESS.COM<',
+                ">The page you're looking for cannot be found.<")):
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
         title = self._html_search_regex(
             r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
         video_url = self._html_search_regex(
@@ -72,7 +83,7 @@ class MotherlessIE(InfoExtractor):
         like_count = str_to_int(self._html_search_regex(
             r'<strong>Favorited</strong>\s+([^<]+)<',
             webpage, 'like count', fatal=False))
+
         upload_date = self._html_search_regex(
             r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
         if 'Ago' in upload_date:
@@ -86,7 +97,7 @@ class MotherlessIE(InfoExtractor):
             r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
             webpage, 'uploader_id')
 
-        categories = self._html_search_meta('keywords', webpage)
+        categories = self._html_search_meta('keywords', webpage, default=None)
         if categories:
             categories = [cat.strip() for cat in categories.split(',')]