Merge branch 'ted_subtitles'
[youtube-dl] / devscripts / check-porn.py
1 #!/usr/bin/env python
2
3 """
4 This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
5 if we are not 'age_limit' tagging some porn site
6 """
7
8 # Allow direct execution
9 import os
10 import sys
11 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12
13 from test.helper import get_testcases
14 from youtube_dl.utils import compat_urllib_request
15
16 for test in get_testcases():
17     try:
18         webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
19     except:
20         print('\nFail: {0}'.format(test['name']))
21         continue
22
23     webpage = webpage.decode('utf8', 'replace')
24
25     if 'porn' in webpage.lower() and ('info_dict' not in test
26                                       or 'age_limit' not in test['info_dict']
27                                       or test['info_dict']['age_limit'] != 18):
28         print('\nPotential missing age_limit check: {0}'.format(test['name']))
29
30     elif 'porn' not in webpage.lower() and ('info_dict' in test and
31                                             'age_limit' in test['info_dict'] and
32                                             test['info_dict']['age_limit'] == 18):
33         print('\nPotential false negative: {0}'.format(test['name']))
34
35     else:
36         sys.stdout.write('.')
37     sys.stdout.flush()
38
39 print()