X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyouporn.py;h=1124fe6c280cb0e23bee3a41ea323165ec714dce;hb=2c347352677f023678ffd488a51b19f54b97fa36;hp=b39fbb5fc7259b0b59a53c993ac174b8f238befb;hpb=4f13f8f798be06bc2b3c0c42818bb0785e4cde64;p=youtube-dl diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index b39fbb5fc..1124fe6c2 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -3,9 +3,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_request from ..utils import ( int_or_none, + sanitized_Request, str_to_int, unescapeHTML, unified_strdate, @@ -29,6 +29,7 @@ class YouPornIE(InfoExtractor): 'upload_date': '20101221', 'average_rating': int, 'view_count': int, + 'comment_count': int, 'categories': list, 'tags': list, 'age_limit': 18, @@ -47,6 +48,7 @@ class YouPornIE(InfoExtractor): 'upload_date': '20111125', 'average_rating': int, 'view_count': int, + 'comment_count': int, 'categories': list, 'tags': list, 'age_limit': 18, @@ -61,7 +63,7 @@ class YouPornIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') - request = compat_urllib_request.Request(url) + request = sanitized_Request(url) request.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(request, display_id) @@ -73,7 +75,7 @@ class YouPornIE(InfoExtractor): links = [] sources = self._search_regex( - r'sources\s*:\s*({.+?})', webpage, 'sources', default=None) + r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None) if sources: for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources): links.append(link) @@ -99,8 +101,9 @@ class YouPornIE(InfoExtractor): } # Video URL's path looks like this: # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 + # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # We will benefit from it by extracting some metadata - mobj = re.search(r'/(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+/', video_url) + mobj = re.search(r'(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+/', video_url) if mobj: height = int(mobj.group('height')) bitrate = int(mobj.group('bitrate')) @@ -112,15 +115,13 @@ class YouPornIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - description = self._html_search_regex( - r'(?s)]+class=["\']video-description["\'][^>]*>(.+?)', - webpage, 'description', default=None) + description = self._og_search_description(webpage, default=None) thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail') uploader = self._html_search_regex( - r'(?s)]+class=["\']videoInfoBy["\'][^>]*>\s*By:\s*(.+?)', + r'(?s)]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*(.+?)', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._html_search_regex( r'(?s)]+class=["\']videoInfoTime["\'][^>]*>(.+?)', @@ -135,6 +136,9 @@ class YouPornIE(InfoExtractor): view_count = str_to_int(self._search_regex( r'(?s)]+class=["\']videoInfoViews["\'][^>]*>.*?([\d,.]+)\s*', webpage, 'view count', fatal=False)) + comment_count = str_to_int(self._search_regex( + r'>All [Cc]omments? \(([\d,.]+)\)', + webpage, 'comment count', fatal=False)) def extract_tag_box(title): tag_box = self._search_regex( @@ -158,6 +162,7 @@ class YouPornIE(InfoExtractor): 'upload_date': upload_date, 'average_rating': average_rating, 'view_count': view_count, + 'comment_count': comment_count, 'categories': categories, 'tags': tags, 'age_limit': age_limit,