X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fwistia.py;h=2182d6fd485bf4f1ed6ead7d78451b7c357ddc37;hb=4c76aa06665621c7689938afd7bbdbc797b5c7ea;hp=478c428334d4fc64527606e57e9e04a69211be76;hpb=f0c96af9cb0edc69f9ba73d39e6e191994e31256;p=youtube-dl diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 478c42833..2182d6fd4 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,9 +1,13 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + float_or_none, + unescapeHTML, ) @@ -27,8 +31,31 @@ class WistiaIE(InfoExtractor): }, { 'url': 'wistia:sh7fpupwlt', 'only_matching': True, + }, { + # with hls video + 'url': 'wistia:807fafadvk', + 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + match = re.search( + r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) + if match: + return unescapeHTML(match.group('url')) + + match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) + if match: + return 'wistia:%s' % match.group('id') + + match = re.search( + r'''(?sx) + ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 + ''', webpage) + if match: + return 'wistia:%s' % match.group('id') + def _real_extract(self, url): video_id = self._match_id(url) @@ -49,19 +76,25 @@ class WistiaIE(InfoExtractor): formats = [] thumbnails = [] for a in data['assets']: + aurl = a.get('url') + if not aurl: + continue astatus = a.get('status') atype = a.get('type') - if (astatus is not None and astatus != 2) or atype == 'preview': + if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): continue elif atype in ('still', 'still_image'): thumbnails.append({ - 'url': a['url'], - 'resolution': '%dx%d' % (a['width'], a['height']), + 'url': aurl, + 'width': int_or_none(a.get('width')), + 'height': int_or_none(a.get('height')), }) else: + aext = a.get('ext') + is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' formats.append({ 'format_id': atype, - 'url': a['url'], + 'url': aurl, 'tbr': int_or_none(a.get('bitrate')), 'vbr': int_or_none(a.get('opt_vbitrate')), 'width': int_or_none(a.get('width')), @@ -69,7 +102,8 @@ class WistiaIE(InfoExtractor): 'filesize': int_or_none(a.get('size')), 'vcodec': a.get('codec'), 'container': a.get('container'), - 'ext': a.get('ext'), + 'ext': 'mp4' if is_m3u8 else aext, + 'protocol': 'm3u8' if is_m3u8 else None, 'preference': 1 if atype == 'original' else None, }) @@ -81,6 +115,6 @@ class WistiaIE(InfoExtractor): 'description': data.get('seoDescription'), 'formats': formats, 'thumbnails': thumbnails, - 'duration': int_or_none(data.get('duration')), + 'duration': float_or_none(data.get('duration')), 'timestamp': int_or_none(data.get('createdAt')), }