X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fwistia.py;h=fa142b974ae873a7883ede4894421f4f8a010549;hb=56667d622c3f6e7594a04f8cd5f4371875940725;hp=6eb94fcabc1440e3069c9491b5971c3635eae48d;hpb=36ca2c55db7939aff2dc700523843a9a0f82ae2b;p=youtube-dl diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 6eb94fcab..fa142b974 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,14 +1,18 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + float_or_none, + unescapeHTML, ) class WistiaIE(InfoExtractor): - _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P[a-z0-9]+)' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]+)' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' @@ -27,8 +31,37 @@ class WistiaIE(InfoExtractor): }, { 'url': 'wistia:sh7fpupwlt', 'only_matching': True, + }, { + # with hls video + 'url': 'wistia:807fafadvk', + 'only_matching': True, + }, { + 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', + 'only_matching': True, + }, { + 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json', + 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + match = re.search( + r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage) + if match: + return unescapeHTML(match.group('url')) + + match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) + if match: + return 'wistia:%s' % match.group('id') + + match = re.search( + r'''(?sx) + ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 + ''', webpage) + if match: + return 'wistia:%s' % match.group('id') + def _real_extract(self, url): video_id = self._match_id(url) @@ -63,6 +96,8 @@ class WistiaIE(InfoExtractor): 'height': int_or_none(a.get('height')), }) else: + aext = a.get('ext') + is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' formats.append({ 'format_id': atype, 'url': aurl, @@ -73,7 +108,8 @@ class WistiaIE(InfoExtractor): 'filesize': int_or_none(a.get('size')), 'vcodec': a.get('codec'), 'container': a.get('container'), - 'ext': a.get('ext'), + 'ext': 'mp4' if is_m3u8 else aext, + 'protocol': 'm3u8' if is_m3u8 else None, 'preference': 1 if atype == 'original' else None, }) @@ -85,6 +121,6 @@ class WistiaIE(InfoExtractor): 'description': data.get('seoDescription'), 'formats': formats, 'thumbnails': thumbnails, - 'duration': int_or_none(data.get('duration')), + 'duration': float_or_none(data.get('duration')), 'timestamp': int_or_none(data.get('createdAt')), }