X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcomedycentral.py;h=8af0abade8c88fea3fa7fc4e7329e10802b43a5a;hb=e423e0baaabe16e80af693d1f05ffc560747b3b8;hp=d50fcdbdbb0fc23becdf6a254769667da44cb9c4;hpb=f102372b5fc4b286619ae5c32807c023e3381087;p=youtube-dl diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index d50fcdbdb..8af0abade 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor from ..utils import ( compat_str, compat_urllib_parse, - ExtractorError, + float_or_none, unified_strdate, ) @@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor): _TEST = { 'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', - 'md5': '4167875aae411f903b751a21f357f1ee', + 'md5': 'c4f48e9eda1b16dd10add0744344b6d8', 'info_dict': { 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', 'ext': 'mp4', @@ -32,31 +32,34 @@ class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralShowsIE(InfoExtractor): - IE_DESC = 'The Daily Show / Colbert Report' + IE_DESC = 'The Daily Show / The Colbert Report' # urls can be abbreviations like :thedailyshow or :colbert # urls for episodes like: # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day # or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 - _VALID_URL = r"""^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport) - |(https?://)?(www\.)? - (?Pthedailyshow|colbertnation)\.com/ - (full-episodes/(?P.*)| + _VALID_URL = r'''(?x)^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport) + |https?://(:www\.)? + (?Pthedailyshow|thecolbertreport)\.(?:cc\.)?com/ + ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P.*)| (?P - (the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) - |(watch/(?P[^/]*)/(?P.*)))| + (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P[^/?#]+)) + |(the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) + |(watch/(?P[^/]*)/(?P.*)) + )| (?P - extended-interviews/(?P[0-9]+)/playlist_tds_extended_(?P.*?)/.*?))) - $""" + extended-interviews/(?P[0-9a-z]+)/(?:playlist_tds_extended_)?(?P.*?)(/.*?)?))) + (?:[?#].*|$)''' _TEST = { - 'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart', - 'file': '422212.mp4', + 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'md5': '4e2f5cb088a83cd8cdb7756132f9739d', 'info_dict': { - "upload_date": "20121214", - "description": "Kristen Stewart", - "uploader": "thedailyshow", - "title": "thedailyshow-kristen-stewart part 1" + 'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55', + 'ext': 'mp4', + 'upload_date': '20121213', + 'description': 'Kristen Stewart learns to let loose in "On the Road."', + 'uploader': 'thedailyshow', + 'title': 'thedailyshow kristen-stewart part 1', } } @@ -79,11 +82,6 @@ class ComedyCentralShowsIE(InfoExtractor): '400': (384, 216), } - @classmethod - def suitable(cls, url): - """Receives a URL and returns True if suitable for this IE.""" - return re.match(cls._VALID_URL, url, re.VERBOSE) is not None - @staticmethod def _transform_rtmp_url(rtmp_video_url): m = re.match(r'^rtmpe?://.*?/(?Pgsp\.comedystor/.*)$', rtmp_video_url) @@ -99,14 +97,16 @@ class ComedyCentralShowsIE(InfoExtractor): if mobj.group('shortname'): if mobj.group('shortname') in ('tds', 'thedailyshow'): - url = 'http://www.thedailyshow.com/full-episodes/' + url = 'http://thedailyshow.cc.com/full-episodes/' else: - url = 'http://www.colbertnation.com/full-episodes/' + url = 'http://thecolbertreport.cc.com/full-episodes/' mobj = re.match(self._VALID_URL, url, re.VERBOSE) assert mobj is not None if mobj.group('clip'): - if mobj.group('showname') == 'thedailyshow': + if mobj.group('videotitle'): + epTitle = mobj.group('videotitle') + elif mobj.group('showname') == 'thedailyshow': epTitle = mobj.group('tdstitle') else: epTitle = mobj.group('cntitle') @@ -120,9 +120,9 @@ class ComedyCentralShowsIE(InfoExtractor): epTitle = mobj.group('showname') else: epTitle = mobj.group('episode') + show_name = mobj.group('showname') - self.report_extraction(epTitle) - webpage,htmlHandle = self._download_webpage_handle(url, epTitle) + webpage, htmlHandle = self._download_webpage_handle(url, epTitle) if dlNewest: url = htmlHandle.geturl() mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -130,71 +130,86 @@ class ComedyCentralShowsIE(InfoExtractor): raise ExtractorError('Invalid redirected URL: ' + url) if mobj.group('episode') == '': raise ExtractorError('Redirected URL is still not specific: ' + url) - epTitle = mobj.group('episode') + epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1] mMovieParams = re.findall('(?: