Add the missing age_limit tags; added a devscript to do a superficial check for porn...
authorFilippo Valsorda <filippo.valsorda@gmail.com>
Mon, 28 Oct 2013 05:50:17 +0000 (01:50 -0400)
committerFilippo Valsorda <filippo.valsorda@gmail.com>
Mon, 28 Oct 2013 05:50:17 +0000 (01:50 -0400)
devscripts/check-porn.py [new file with mode: 0644]
youtube_dl/extractor/keezmovies.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornotube.py
youtube_dl/extractor/spankwire.py
youtube_dl/extractor/tube8.py
youtube_dl/extractor/youjizz.py

diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
new file mode 100644 (file)
index 0000000..63401fe
--- /dev/null
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+"""
+This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
+if we are not 'age_limit' tagging some porn site
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import get_testcases
+from youtube_dl.utils import compat_urllib_request
+
+for test in get_testcases():
+    try:
+        webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
+    except:
+        print('\nFail: {0}'.format(test['name']))
+        continue
+
+    webpage = webpage.decode('utf8', 'replace')
+
+    if 'porn' in webpage.lower() and ('info_dict' not in test
+                                      or 'age_limit' not in test['info_dict']
+                                      or test['info_dict']['age_limit'] != 18):
+        print('\nPotential missing age_limit check: {0}'.format(test['name']))
+
+    elif 'porn' not in webpage.lower() and ('info_dict' in test and
+                                            'age_limit' in test['info_dict'] and
+                                            test['info_dict']['age_limit'] == 18):
+        print('\nPotential false negative: {0}'.format(test['name']))
+
+    else:
+        sys.stdout.write('.')
+    sys.stdout.flush()
+
+print()
index 23d5209d997c1866af408a898a511d0b5f0a92f7..5e05900da211eb9d2d6e33706a71f47dcf1abff7 100644 (file)
@@ -6,7 +6,6 @@ from ..utils import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
-    unescapeHTML,
 )
 from ..aes import (
     aes_decrypt_text
@@ -20,6 +19,7 @@ class KeezMoviesIE(InfoExtractor):
         u'md5': u'6e297b7e789329923fcf83abb67c9289',
         u'info_dict': {
             u"title": u"Petite Asian Lady Mai Playing In Bathtub",
+            u"age_limit": 18,
         }
     }
 
@@ -48,6 +48,8 @@ class KeezMoviesIE(InfoExtractor):
         format = path.split('/')[4].split('_')[:2]
         format = "-".join( format )
 
+        age_limit = self._rta_search(webpage)
+
         return {
             'id': video_id,
             'title': video_title,
@@ -55,4 +57,5 @@ class KeezMoviesIE(InfoExtractor):
             'ext': extension,
             'format': format,
             'format_id': format,
+            'age_limit': age_limit,
         }
index 3dbd2ab699e31ab27c2e191ef757ac5e70eff940..5e2454f1b79cc77f71964f609ea9dd53fd739fbc 100644 (file)
@@ -21,6 +21,7 @@ class PornHubIE(InfoExtractor):
         u'info_dict': {
             u"uploader": u"BABES-COM", 
             u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
+            u"age_limit": 18
         }
     }
 
@@ -64,4 +65,5 @@ class PornHubIE(InfoExtractor):
             'title': video_title,
             'thumbnail': thumbnail,
             'formats': formats,
+            'age_limit': 18,
         }
index 5d770ec285c3d1e3dcad04cfe49ca7780a9dd2b4..35dc5a9ffafb32d36e30f51988291dded6a6d18c 100644 (file)
@@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
         u'md5': u'374dd6dcedd24234453b295209aa69b6',
         u'info_dict': {
             u"upload_date": u"20090708", 
-            u"title": u"Marilyn-Monroe-Bathing"
+            u"title": u"Marilyn-Monroe-Bathing",
+            u"age_limit": 18
         }
     }
 
index f0d5009c717be0cd02ea7edf2b74af0a7a8e8b80..32df0a7fb5607fc254b5e964baa2bb9cbb8ee9cf 100644 (file)
@@ -22,6 +22,7 @@ class SpankwireIE(InfoExtractor):
             u"uploader": u"oreusz", 
             u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
             u"description": u"Crazy Bitch X rated music video.",
+            u"age_limit": 18,
         }
     }
 
@@ -60,6 +61,8 @@ class SpankwireIE(InfoExtractor):
             })
         formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
 
+        age_limit = self._rta_search(webpage)
+
         return {
             'id': video_id,
             'uploader': video_uploader,
@@ -67,4 +70,5 @@ class SpankwireIE(InfoExtractor):
             'thumbnail': thumbnail,
             'description': description,
             'formats': formats,
+            'age_limit': age_limit,
         }
index ebc8c1f4f11544fc2110aadfb1fbf5876502f3bc..aea9d9a24045cb92859c6b7737fad88baef057ff 100644 (file)
@@ -22,6 +22,7 @@ class Tube8IE(InfoExtractor):
             u"description": u"hot teen Kasia grinding", 
             u"uploader": u"unknown", 
             u"title": u"Kasia music video",
+            u"age_limit": 18,
         }
     }
 
@@ -60,4 +61,5 @@ class Tube8IE(InfoExtractor):
             'ext': extension,
             'format': format,
             'format_id': format,
+            'age_limit': 18,
         }
index 1265639e821bd873b74aeea08811f8c22e966ba1..1fcc518acde9dbb08fef1ccb42a9ee7ae550967a 100644 (file)
@@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor):
         u'file': u'2189178.flv',
         u'md5': u'07e15fa469ba384c7693fd246905547c',
         u'info_dict': {
-            u"title": u"Zeichentrick 1"
+            u"title": u"Zeichentrick 1",
+            u"age_limit": 18,
         }
     }
 
@@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor):
         # Get webpage content
         webpage = self._download_webpage(url, video_id)
 
+        age_limit = self._rta_search(webpage)
+
         # Get the video title
         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
             webpage, u'title').strip()
@@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor):
                 'title': video_title,
                 'ext': 'flv',
                 'format': 'flv',
-                'player_url': embed_page_url}
+                'player_url': embed_page_url,
+                'age_limit': age_limit}
 
         return [info]