Merge remote-tracking branch 'jaimeMF/yt-playlists'
[youtube-dl] / youtube_dl / extractor / spankwire.py
index f0d5009c717be0cd02ea7edf2b74af0a7a8e8b80..9e2ad0d9962c375ca27851b3f842de302be28e56 100644 (file)
@@ -6,7 +6,6 @@ from ..utils import (
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_parse,
-    unescapeHTML,
 )
 from ..aes import (
     aes_decrypt_text
@@ -22,6 +21,7 @@ class SpankwireIE(InfoExtractor):
             u"uploader": u"oreusz", 
             u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
             u"description": u"Crazy Bitch X rated music video.",
+            u"age_limit": 18,
         }
     }
 
@@ -35,11 +35,12 @@ class SpankwireIE(InfoExtractor):
         webpage = self._download_webpage(req, video_id)
 
         video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
-        video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
-        thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
-        description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False)
-        if len(description) == 0:
-            description = None
+        video_uploader = self._html_search_regex(
+            r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
+        thumbnail = self._html_search_regex(
+            r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
+        description = self._html_search_regex(
+            r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
 
         video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
         if webpage.find('flashvars\.encrypted = "true"') != -1:
@@ -48,10 +49,10 @@ class SpankwireIE(InfoExtractor):
 
         formats = []
         for video_url in video_urls:
-            path = compat_urllib_parse_urlparse( video_url ).path
-            extension = os.path.splitext( path )[1][1:]
+            path = compat_urllib_parse_urlparse(video_url).path
+            extension = os.path.splitext(path)[1][1:]
             format = path.split('/')[4].split('_')[:2]
-            format = "-".join( format )
+            format = "-".join(format)
             formats.append({
                 'url': video_url,
                 'ext': extension,
@@ -60,6 +61,8 @@ class SpankwireIE(InfoExtractor):
             })
         formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
 
+        age_limit = self._rta_search(webpage)
+
         return {
             'id': video_id,
             'uploader': video_uploader,
@@ -67,4 +70,5 @@ class SpankwireIE(InfoExtractor):
             'thumbnail': thumbnail,
             'description': description,
             'formats': formats,
+            'age_limit': age_limit,
         }