X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcriterion.py;h=f7815b905d13910e0a931f2609fa015c9ac3f00a;hb=a61ce71468cb222338ccd8039dc631f3619dc585;hp=a149d29007982e60463ec3c85bf8e239dbc882c0;hpb=7fd930c0c8a33d452027f3844962ab83ee08d807;p=youtube-dl diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index a149d2900..f7815b905 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,40 +1,39 @@ -# -*- coding: utf-8 -*- - -import re +# coding: utf-8 +from __future__ import unicode_literals from .common import InfoExtractor + class CriterionIE(InfoExtractor): - _VALID_URL = r'http://www.criterion.com/films/(.*)' + _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P[0-9]+)-.+' _TEST = { - u'url': u'http://www.criterion.com/films/184-le-samourai', - u'file': u'184.mp4', - u'md5': u'bc51beba55685509883a9a7830919ec3', - u'info_dict': { - u"title": u"Le Samouraï", - u"description" : u"In a career-defining performance, Alain Delon plays a contract killer with samurai instincts. A razor-sharp cocktail of 1940s American gangster cinema and 1960s French pop culture, maverick director Jean-Pierre Melville's masterpiece _Le Samouraï_ defines cool. " + 'url': 'http://www.criterion.com/films/184-le-samourai', + 'md5': 'bc51beba55685509883a9a7830919ec3', + 'info_dict': { + 'id': '184', + 'ext': 'mp4', + 'title': 'Le Samouraï', + 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', + 'thumbnail': r're:^https?://.*\.jpg$', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1).split('-')[0] + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', - webpage, 'video url') - title = self._search_regex(r'', - webpage, 'video title') - description = self._search_regex(r'', - webpage, 'video description') - thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', - webpage, 'thumbnail url') - ext = final_url.split('.')[-1] + final_url = self._search_regex( + r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + title = self._og_search_title(webpage) + description = self._html_search_meta('description', webpage) + thumbnail = self._search_regex( + r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', + webpage, 'thumbnail url') - return {'id': video_id, - 'url' : final_url, - 'title': title, - 'ext': ext, - 'description': description, - 'thumbnail': thumbnail, - } + return { + 'id': video_id, + 'url': final_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + }