Respect age_limit when listing extractors (Fixes #4653)

author Philipp Hagemeister <phihag@phihag.de>

Wed, 7 Jan 2015 06:20:20 +0000 (07:20 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Wed, 7 Jan 2015 06:20:20 +0000 (07:20 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Wed, 7 Jan 2015 06:20:20 +0000 (07:20 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Wed, 7 Jan 2015 06:20:20 +0000 (07:20 +0100)
diff --git a/test/helper.py b/test/helper.py

index 96d58b7c12fd9119b3b5f65eb9c41cfc3c97f500..77225e4f799755e2927a3105fa1974f08779fc8d 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL):
  
  def gettestcases(include_onlymatching=False):
      for ie in youtube_dl.extractor.gen_extractors():
-        t = getattr(ie, '_TEST', None)
-        if t:
-            assert not hasattr(ie, '_TESTS'), \
-                '%s has _TEST and _TESTS' % type(ie).__name__
-            tests = [t]
-        else:
-            tests = getattr(ie, '_TESTS', [])
-        for t in tests:
-            if not include_onlymatching and t.get('only_matching', False):
-                continue
-            t['name'] = type(ie).__name__[:-len('IE')]
-            yield t
+        for tc in ie.get_testcases(include_onlymatching):
+            yield tc
  
  
  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
diff --git a/test/test_utils.py b/test/test_utils.py

index dd49a6d179dc7cea01935be7c019292b4f3559dc..16e1a1ddfdef5be7c0ba941d8793b3c8b88faf08 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -16,6 +16,7 @@ import json
  import xml.etree.ElementTree
  
  from youtube_dl.utils import (
+    age_restricted,
      args_to_str,
      clean_html,
      DateRange,
@@ -402,5 +403,12 @@ Trying to open render node...
  Success at /dev/dri/renderD128.
  ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
  
+    def test_age_restricted(self):
+        self.assertFalse(age_restricted(None, 10))  # unrestricted content
+        self.assertFalse(age_restricted(1, None))  # unrestricted policy
+        self.assertFalse(age_restricted(8, 10))
+        self.assertTrue(age_restricted(18, 14))
+        self.assertFalse(age_restricted(18, 18))
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 806e7b239cec05e1724dc58e74aca1af671981cf..36c71f9475233731f44f5ea44dec5fec7a83588f 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -63,6 +63,7 @@ from .utils import (
      YoutubeDLHandler,
      prepend_extension,
      args_to_str,
+    age_restricted,
  )
  from .cache import Cache
  from .extractor import get_info_extractor, gen_extractors
@@ -550,13 +551,8 @@ class YoutubeDL(object):
              max_views = self.params.get('max_views')
              if max_views is not None and view_count > max_views:
                  return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
-        age_limit = self.params.get('age_limit')
-        if age_limit is not None:
-            actual_age_limit = info_dict.get('age_limit')
-            if actual_age_limit is None:
-                actual_age_limit = 0
-            if age_limit < actual_age_limit:
-                return 'Skipping "' + title + '" because it is age restricted'
+        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+            return 'Skipping "%s" because it is age restricted' % title
          if self.in_download_archive(info_dict):
              return '%s has already been recorded in archive' % video_title
          return None
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 56f560d26c6afa5f366f207e21b3dd8ec50d20b0..4c21188a9d8cb12b49d06adfd0c468a8c2b5c19d 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -38,7 +38,7 @@ from .update import update_self
  from .downloader import (
      FileDownloader,
  )
-from .extractor import gen_extractors
+from .extractor import list_extractors
  from .YoutubeDL import YoutubeDL
  
  
@@ -95,17 +95,15 @@ def _real_main(argv=None):
      _enc = preferredencoding()
      all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
  
-    extractors = gen_extractors()
-
      if opts.list_extractors:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
              compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
              matchedUrls = [url for url in all_urls if ie.suitable(url)]
              for mu in matchedUrls:
                  compat_print('  ' + mu)
          sys.exit(0)
      if opts.list_extractor_descriptions:
-        for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+        for ie in list_extractors(opts.age_limit):
              if not ie._WORKING:
                  continue
              desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index b523e9644ac3bb4268b6582d7ac2359a8d48b7e5..0145e350de8c8f7a40c7c81cea9d620ebc0f7013 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -560,6 +560,8 @@ from .zingmp3 import (
      ZingMp3AlbumIE,
  )
  
+from ..utils import age_restricted
+
  _ALL_CLASSES = [
      klass
      for name, klass in globals().items()
@@ -575,6 +577,17 @@ def gen_extractors():
      return [klass() for klass in _ALL_CLASSES]
  
  
+def list_extractors(age_limit):
+    """
+    Return a list of extractors that are suitable for the given age,
+    sorted by extractor ID.
+    """
+
+    return sorted(
+        filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+        key=lambda ie: ie.IE_NAME.lower())
+
+
  def get_info_extractor(ie_name):
      """Returns the info extractor class with the given ie_name"""
      return globals()[ie_name + 'IE']
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 562e656e0b24b2a43c69cae81994fed902eb2851..df32b5ca0ba081df6c5f4c27f2f00c1a46e7c246 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -21,6 +21,7 @@ from ..compat import (
      compat_str,
  )
  from ..utils import (
+    age_restricted,
      clean_html,
      compiled_regex_type,
      ExtractorError,
@@ -877,6 +878,35 @@ class InfoExtractor(object):
              None, '/', True, False, expire_time, '', None, None, None)
          self._downloader.cookiejar.set_cookie(cookie)
  
+    def get_testcases(self, include_onlymatching=False):
+        t = getattr(self, '_TEST', None)
+        if t:
+            assert not hasattr(self, '_TESTS'), \
+                '%s has _TEST and _TESTS' % type(self).__name__
+            tests = [t]
+        else:
+            tests = getattr(self, '_TESTS', [])
+        for t in tests:
+            if not include_onlymatching and t.get('only_matching', False):
+                continue
+            t['name'] = type(self).__name__[:-len('IE')]
+            yield t
+
+    def is_suitable(self, age_limit):
+        """ Test whether the extractor is generally suitable for the given
+        age limit (i.e. pornographic sites are not, all others usually are) """
+
+        any_restricted = False
+        for tc in self.get_testcases(include_onlymatching=False):
+            if 'playlist' in tc:
+                tc = tc['playlist'][0]
+            is_restricted = age_restricted(
+                tc.get('info_dict', {}).get('age_limit'), age_limit)
+            if not is_restricted:
+                return True
+            any_restricted = any_restricted or is_restricted
+        return not any_restricted
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py

index 95f1c8f3cf20bdd83b3e524808e5c5fb20cde680..e8490b028e53080b8e685be13577a05603a4af9e 100644 (file)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor):
          'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
          'info_dict': {
              'id': 'greenshowers',
+            'age_limit': 18,
          },
          'playlist_mincount': 155,
      }
@@ -124,6 +125,7 @@ class XTubeUserIE(InfoExtractor):
          return {
              '_type': 'playlist',
              'id': username,
+            'age_limit': 18,
              'entries': [{
                  '_type': 'url',
                  'url': eurl,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index d4951c406c73d8216803f5d9149200787883676b..29739a4833de0a782b359d958d331316cbaf1c24 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1560,3 +1560,13 @@ def urlhandle_detect_ext(url_handle):
          getheader = url_handle.info().getheader
  
      return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+    """ Returns True iff the content should be blocked """
+
+    if age_limit is None:  # No limit set
+        return False
+    if content_limit is None:
+        return False  # Content available for everyone
+    return age_limit < content_limit
author	Philipp Hagemeister <phihag@phihag.de>
	Wed, 7 Jan 2015 06:20:20 +0000 (07:20 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Wed, 7 Jan 2015 06:20:20 +0000 (07:20 +0100)
test/helper.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/xtube.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history