X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmediaset.py;h=9760eafd5685225bda09218d309acffacd7cd647;hb=86693c4930b98e8df33736d87361400422b1adab;hp=89e23ca9b3bb7d9cbbdf3ca365fd8ec268710089;hpb=56f9c77f0e715a26b749d494c99a3f0b076bbbf8;p=youtube-dl diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index 89e23ca9b..9760eafd5 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -13,12 +15,15 @@ from ..utils import ( class MediasetIE(InfoExtractor): _VALID_URL = r'''(?x) - https?:// - (?:www\.)?video\.mediaset\.it/ - (?: - (?:video|on-demand)/(?:[^/]+/)+[^/]+_| - player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid= - )(?P[0-9]+) + (?: + mediaset:| + https?:// + (?:www\.)?video\.mediaset\.it/ + (?: + (?:video|on-demand)/(?:[^/]+/)+[^/]+_| + player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid= + ) + )(?P[0-9]+) ''' _TESTS = [{ # full episode @@ -32,7 +37,7 @@ class MediasetIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1414, 'creator': 'mediaset', - 'release_date': '20161107', + 'upload_date': '20161107', 'series': 'Hello Goodbye', 'categories': ['reality'], }, @@ -49,8 +54,19 @@ class MediasetIE(InfoExtractor): # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/) 'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true', 'only_matching': True, + }, { + 'url': 'mediaset:661824', + 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r']+\bsrc=(["\'])(?Phttps?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1', + webpage)] + def _real_extract(self, url): video_id = self._match_id(url) @@ -94,7 +110,7 @@ class MediasetIE(InfoExtractor): 'thumbnail': mediainfo.get('thumbnail'), 'duration': parse_duration(mediainfo.get('duration')), 'creator': creator, - 'release_date': unified_strdate(mediainfo.get('production-date')), + 'upload_date': unified_strdate(mediainfo.get('production-date')), 'webpage_url': mediainfo.get('url'), 'series': mediainfo.get('brand-value'), 'categories': categories,