Allow users to specify an age limit (fixes #1545)
authorPhilipp Hagemeister <phihag@phihag.de>
Sun, 6 Oct 2013 04:06:30 +0000 (06:06 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sun, 6 Oct 2013 04:08:56 +0000 (06:08 +0200)
With these changes, users can now restrict what videos are downloaded by the intented audience, by specifying their age with --age-limit YEARS .
Add rudimentary support in youtube, pornotube, and youporn.

test/test_age_restriction.py [new file with mode: 0644]
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/common.py
youtube_dl/extractor/pornotube.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/youtube.py

diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
new file mode 100644 (file)
index 0000000..943f9a3
--- /dev/null
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+import sys
+import unittest
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl import YoutubeDL
+from helper import try_rm
+
+
+def _download_restricted(url, filename, age):
+    """ Returns true iff the file has been downloaded """
+
+    params = {
+        'age_limit': age,
+        'skip_download': True,
+        'writeinfojson': True,
+        "outtmpl": "%(id)s.%(ext)s",
+    }
+    ydl = YoutubeDL(params)
+    ydl.add_default_info_extractors()
+    json_filename = filename + '.info.json'
+    try_rm(json_filename)
+    ydl.download([url])
+    res = os.path.exists(json_filename)
+    try_rm(json_filename)
+    return res
+
+
+class TestAgeRestriction(unittest.TestCase):
+    def _assert_restricted(self, url, filename, age, old_age=None):
+        self.assertTrue(_download_restricted(url, filename, old_age))
+        self.assertFalse(_download_restricted(url, filename, age))
+
+    def test_youtube(self):
+        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+
+    def test_youporn(self):
+        self._assert_restricted(
+            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+            '505835.mp4', 2, old_age=25)
+
+    def test_pornotube(self):
+        self._assert_restricted(
+            'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
+            '1689755.flv', 13)
+
+
+if __name__ == '__main__':
+    unittest.main()
index 2503fd09b976b86da60c9a82fd31495266106c97..6258c141e4206756f72824f5d6d59f074acfa41e 100644 (file)
@@ -84,6 +84,8 @@ class YoutubeDL(object):
     cachedir:          Location of the cache files in the filesystem.
                        None to disable filesystem cache.
     noplaylist:        Download single video instead of a playlist if in doubt.
+    age_limit:         An integer representing the user's age in years.
+                       Unsuitable videos for the given age are skipped.
     
     The following parameters are not used by YoutubeDL itself, they are used by
     the FileDownloader:
@@ -309,6 +311,10 @@ class YoutubeDL(object):
             dateRange = self.params.get('daterange', DateRange())
             if date not in dateRange:
                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+        age_limit = self.params.get('age_limit')
+        if age_limit is not None:
+            if age_limit < info_dict.get('age_restriction', 0):
+                return u'Skipping "' + title + '" because it is age restricted'
         return None
         
     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
index 03df835f23ebe58bd1048d336ba68019e4a2e33d..7a399273a0401fbdc36c8921146d9f09c720e146 100644 (file)
@@ -188,6 +188,9 @@ def parseOpts(overrideArguments=None):
     selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
     selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
     selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
+    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
+                         help='download only videos suitable for the given age',
+                         default=None, type=int)
 
 
     authentication.add_option('-u', '--username',
@@ -631,6 +634,7 @@ def _real_main(argv=None):
         'daterange': date,
         'cachedir': opts.cachedir,
         'youtube_print_sig_code': opts.youtube_print_sig_code,
+        'age_limit': opts.age_limit,
         })
 
     if opts.verbose:
index 69cdcdc1b5b75d1cac5733b34565f087c9dcddec..2a5a85dc67b4f7a57d04d4f21c1608aa2c47f7f3 100644 (file)
@@ -54,6 +54,7 @@ class InfoExtractor(object):
     view_count:     How many users have watched the video on the platform.
     urlhandle:      [internal] The urlHandle to be used to download the file,
                     like returned by urllib.request.urlopen
+    age_limit:      Age restriction for the video, as an integer (years)
     formats:        A list of dictionaries for each format available, it must
                     be ordered from worst to best quality. Potential fields:
                     * url       Mandatory. The URL of the video file
@@ -318,6 +319,15 @@ class InfoExtractor(object):
                                         self._og_regex('video')],
                                        html, name, **kargs)
 
+    def _rta_search(self, html):
+        # See http://www.rtalabel.org/index.php?content=howtofaq#single
+        if re.search(r'(?ix)<meta\s+name="rating"\s+'
+                     r'     content="RTA-5042-1996-1400-1577-RTA"',
+                     html):
+            return 18
+        return 0
+
+
 class SearchInfoExtractor(InfoExtractor):
     """
     Base class for paged search queries extractors.
index add76a11e5f2c0c17af76b71db6e8bd07adc6cd6..9039dff5a5bee13712dce0a39540c51aa052baa8 100644 (file)
@@ -38,6 +38,7 @@ class PornotubeIE(InfoExtractor):
         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
         upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
         if upload_date: upload_date = unified_strdate(upload_date)
+        age_limit = self._rta_search(webpage)
 
         info = {'id': video_id,
                 'url': video_url,
@@ -45,6 +46,7 @@ class PornotubeIE(InfoExtractor):
                 'upload_date': upload_date,
                 'title': video_title,
                 'ext': 'flv',
-                'format': 'flv'}
+                'format': 'flv',
+                'age_restriction': age_limit}
 
         return [info]
index c85fd4b5af0ccdd3f259bd403ddd4311f2de5fdb..e2860ec9d6dcb643819f42ade62384d23086d2a1 100644 (file)
@@ -51,6 +51,7 @@ class YouPornIE(InfoExtractor):
         req = compat_urllib_request.Request(url)
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
+        age_limit = self._rta_search(webpage)
 
         # Get JSON parameters
         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
@@ -115,7 +116,8 @@ class YouPornIE(InfoExtractor):
                 'ext': extension,
                 'format': format,
                 'thumbnail': thumbnail,
-                'description': video_description
+                'description': video_description,
+                'age_restriction': age_limit,
             })
 
         if self._downloader.params.get('listformats', None):
index 1101011ea38aedc4d2a46aecb3c357861e441929..9bcd035bde1fabe9c74dae03173a7cb0684486ca 100644 (file)
@@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'description':  video_description,
                 'player_url':   player_url,
                 'subtitles':    video_subtitles,
-                'duration':     video_duration
+                'duration':     video_duration,
+                'age_restriction': 18 if age_gate else 0,
             })
         return results