[motherless] Detect non-existing videos

[youtube-dl] / youtube_dl / extractor / motherless.py
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py

index 3621ff99e76da1bffabda1a81f6181fd4a6ed61c..30e686a4e175ad258a5efde8a60dacb85fb24363 100644 (file)
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -5,6 +5,7 @@ import re
  
  from .common import InfoExtractor
  from ..utils import (
+    ExtractorError,
      str_to_int,
      unified_strdate,
  )
@@ -54,6 +55,11 @@ class MotherlessIE(InfoExtractor):
                  'thumbnail': 're:http://.*\.jpg',
                  'age_limit': 18,
              }
+        },
+        {
+            # no keywords
+            'url': 'http://motherless.com/8B4BBC1',
+            'only_matching': True,
          }
      ]
  
@@ -61,6 +67,11 @@ class MotherlessIE(InfoExtractor):
          video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
  
+        if any(p in webpage for p in (
+                '<title>404 - MOTHERLESS.COM<',
+                ">The page you're looking for cannot be found.<")):
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
          title = self._html_search_regex(
              r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
          video_url = self._html_search_regex(
@@ -72,7 +83,7 @@ class MotherlessIE(InfoExtractor):
          like_count = str_to_int(self._html_search_regex(
              r'<strong>Favorited</strong>\s+([^<]+)<',
              webpage, 'like count', fatal=False))
- 
+
          upload_date = self._html_search_regex(
              r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
          if 'Ago' in upload_date:
@@ -86,7 +97,7 @@ class MotherlessIE(InfoExtractor):
              r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
              webpage, 'uploader_id')
  
-        categories = self._html_search_meta('keywords', webpage)
+        categories = self._html_search_meta('keywords', webpage, default=None)
          if categories:
              categories = [cat.strip() for cat in categories.split(',')]