X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvk.py;h=d9acafd70b2fa9089e6d39ce808e2ec74c9fbe76;hb=9e1a5b845586a0a5431fb72467142046d8571e6f;hp=d6632cbb7527852ce450d3e6a5e4134e9cb884a8;hpb=c52331f30c15ff715431cf1ca5fceec505efe599;p=youtube-dl diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index d6632cbb7..d9acafd70 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -11,7 +11,8 @@ from ..utils import ( compat_urllib_parse, compat_str, unescapeHTML, - unified_strdate) + unified_strdate, + orderedSet) class VKIE(InfoExtractor): @@ -73,21 +74,6 @@ class VKIE(InfoExtractor): }, 'skip': 'Requires vk account credentials', }, - { - # VIDEO NOW REMOVED - # please update if you find a video whose URL follows the same pattern - 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', - 'md5': 'd82c22e449f036282d1d3f7f4d276869', - 'info_dict': { - 'id': '166094326', - 'ext': 'mp4', - 'uploader': 'Киномания - лучшее из мира кино', - 'title': 'Запах женщины (1992)', - 'duration': 9392, - 'upload_date': '20130914' - }, - 'skip': 'Requires vk account credentials', - }, { 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', 'md5': '4d7a5ef8cf114dfa09577e57b2993202', @@ -114,6 +100,11 @@ class VKIE(InfoExtractor): }, 'skip': 'Only works from Russia', }, + { + # removed video, just testing that we match the pattern + 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', + 'only_matching': True, + }, ] def _login(self): @@ -130,7 +121,7 @@ class VKIE(InfoExtractor): } request = compat_urllib_request.Request('https://login.vk.com/?act=login', - compat_urllib_parse.urlencode(login_form).encode('utf-8')) + compat_urllib_parse.urlencode(login_form).encode('utf-8')) login_page = self._download_webpage(request, None, note='Logging in as %s' % username) if re.search(r'onLoginFailed', login_page): @@ -184,7 +175,7 @@ class VKIE(InfoExtractor): upload_date = None mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) if mobj is not None: - x = mobj.group(1) + ' ' + mobj.group(2) + mobj.group(1) + ' ' + mobj.group(2) upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) formats = [{ @@ -208,34 +199,21 @@ class VKIE(InfoExtractor): class VKUserVideosIE(InfoExtractor): IE_NAME = 'vk.com:user-videos' - IE_DESC = 'All of a user\'s videos' - _VALID_URL = r'https?://(?:m\.)?vk\.com/videos([0-9]+)(?:m\?.*)?' + IE_DESC = 'vk.com:All of a user\'s videos' + _VALID_URL = r'https?://vk\.com/videos(?P[0-9]+)(?:m\?.*)?' _TEMPLATE_URL = 'https://vk.com/videos' _TEST = { 'url': 'http://vk.com/videos205387401', 'playlist_mincount': 4, } - def extract_videos_from_page(self, page): - ids_in_page = [] - for mobj in re.finditer(r'href="/video([0-9_]+)"', page): - if mobj.group(1) not in ids_in_page: - ids_in_page.append(mobj.group(1)) - return ids_in_page - def _real_extract(self, url): - # Extract page id - mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError('Invalid URL: %s' % url) - - # Download page and get video ids - page_id = mobj.group(1) + page_id = self._match_id(url) page = self._download_webpage(url, page_id) - video_ids = self.extract_videos_from_page(page) - - self._downloader.to_screen('[vk] User videos %s: Found %i videos' % (page_id, len(video_ids))) - - url_entries = [self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id) - for video_id in video_ids] - return self.playlist_result(url_entries, page_id) \ No newline at end of file + video_ids = orderedSet( + m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page)) + url_entries = [ + self.url_result( + 'http://vk.com/video' + video_id, 'VK', video_id=video_id) + for video_id in video_ids] + return self.playlist_result(url_entries, page_id)