X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdaum.py;h=9a94cf361170fee712fb6a5e11f139b8da7e3e81;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=d418ce4a8a29c122e811c96aac76d388c790b560;hpb=29030c0a4c2f4dded5a310add940aae0791f9d73;p=youtube-dl diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index d418ce4a8..9a94cf361 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -1,45 +1,67 @@ # encoding: utf-8 + +from __future__ import unicode_literals + import re from .common import InfoExtractor -from ..utils import ( +from ..compat import ( compat_urllib_parse, - determine_ext, ) class DaumIE(InfoExtractor): - _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P\d+)' - IE_NAME = u'daum.net' + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P[^?#&]+)' + IE_NAME = 'daum.net' - _TEST = { - u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', - u'file': u'52554690.mp4', - u'info_dict': { - u'title': u'DOTA 2GETHER 시즌2 6회 - 2부', - u'description': u'DOTA 2GETHER 시즌2 6회 - 2부', - u'upload_date': u'20130831', - u'duration': 3868, + _TESTS = [{ + 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', + 'info_dict': { + 'id': '52554690', + 'ext': 'mp4', + 'title': 'DOTA 2GETHER 시즌2 6회 - 2부', + 'description': 'DOTA 2GETHER 시즌2 6회 - 2부', + 'upload_date': '20130831', + 'duration': 3868, + }, + }, { + # Test for https://github.com/rg3/youtube-dl/issues/7949 + 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290', + 'md5': 'c92d78bcee4424451f1667f275c1dc97', + 'info_dict': { + 'id': '73147290', + 'ext': 'mp4', + 'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218', + 'description': '싸이 - 나팔바지', + 'upload_date': '20151219', + 'duration': 232, }, - } + }, { + 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', + 'only_matching': True, + }, { + 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') canonical_url = 'http://tvpot.daum.net/v/%s' % video_id webpage = self._download_webpage(canonical_url, video_id) + og_url = self._og_search_url(webpage, default=None) or self._search_regex( + r']+rel=(["\'])canonical\1[^>]+href=(["\'])(?P.+?)\2', + webpage, 'canonical url', group='url') full_id = self._search_regex( - r'