X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fwistia.py;h=fa142b974ae873a7883ede4894421f4f8a010549;hb=a61ce71468cb222338ccd8039dc631f3619dc585;hp=97139a35a3f134f98a7ce87a958545bd36811fc5;hpb=45f160a43c5f103af7a843f1159a1f6e8f498f0f;p=youtube-dl diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 97139a35a..fa142b974 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,14 +1,18 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + float_or_none, + unescapeHTML, ) class WistiaIE(InfoExtractor): - _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P[a-z0-9]+)' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P[a-z0-9]+)' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' @@ -31,8 +35,33 @@ class WistiaIE(InfoExtractor): # with hls video 'url': 'wistia:807fafadvk', 'only_matching': True, + }, { + 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt', + 'only_matching': True, + }, { + 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json', + 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + match = re.search( + r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage) + if match: + return unescapeHTML(match.group('url')) + + match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) + if match: + return 'wistia:%s' % match.group('id') + + match = re.search( + r'''(?sx) + ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 + ''', webpage) + if match: + return 'wistia:%s' % match.group('id') + def _real_extract(self, url): video_id = self._match_id(url) @@ -92,6 +121,6 @@ class WistiaIE(InfoExtractor): 'description': data.get('seoDescription'), 'formats': formats, 'thumbnails': thumbnails, - 'duration': int_or_none(data.get('duration')), + 'duration': float_or_none(data.get('duration')), 'timestamp': int_or_none(data.get('createdAt')), }