X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fellentv.py;h=5154bbd7f8e5a8447a24d8274780648a7eae0ca4;hb=af14ded75e10653b4713c23f8c428c6cd88610ad;hp=3e7923648992d334357bad7206d745a17313b23e;hpb=9480d1a56674f95f135562d2133cbf12f6a96bbc;p=youtube-dl diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 3e7923648..5154bbd7f 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re import json from .common import InfoExtractor @@ -12,32 +11,50 @@ from ..utils import ( class EllenTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P[a-z0-9_-]+)' - _TEST = { - 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/', - 'md5': 'e4af06f3bf0d5f471921a18db5764642', + _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P[a-z0-9_-]+)' + _TESTS = [{ + 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', + 'md5': '8e3c576bf2e9bfff4d76565f56f94c9c', 'info_dict': { - 'id': '0-7jqrsr18', + 'id': '0-ipq1gsai', 'ext': 'mp4', - 'title': 'What\'s Wrong with These Photos? A Whole Lot', - 'timestamp': 1406876400, - 'upload_date': '20140801', + 'title': 'Fast Fingers of Fate', + 'description': 'md5:686114ced0a032926935e9015ee794ac', + 'timestamp': 1428033600, + 'upload_date': '20150403', } - } + }, { + 'url': 'http://ellentube.com/videos/0-dvzmabd5/', + 'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb', + 'info_dict': { + 'id': '0-dvzmabd5', + 'ext': 'mp4', + 'title': '1 year old twin sister makes her brother laugh', + 'description': '1 year old twin sister makes her brother laugh', + 'timestamp': 1419542075, + 'upload_date': '20141225', + } + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True) + title = self._og_search_title(webpage, default=None) or self._search_regex( + r'pageName\s*=\s*"([^"]+)"', webpage, 'title') + description = self._html_search_meta( + 'description', webpage, 'description') or self._og_search_description(webpage) timestamp = parse_iso8601(self._search_regex( r'