From 79fa9db0da4aa86aa101cc32a5da4b7e254c6a0d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Sergey=20M=E2=80=A4?= Date: Fri, 28 Aug 2015 21:24:54 +0600 Subject: [PATCH] [mtvde] Simplify (Closes #6673) --- youtube_dl/extractor/mtv.py | 59 ++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 15df62649..f2d0638bf 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -174,8 +174,11 @@ class MTVServicesInfoExtractor(InfoExtractor): if self._LANG: info_url += 'lang=%s&' % self._LANG info_url += data + return self._get_videos_info_from_url(info_url, video_id) + + def _get_videos_info_from_url(self, url, video_id): idoc = self._download_xml( - info_url, video_id, + url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return self.playlist_result( [self._get_video_info(item) for item in idoc.findall('.//item')]) @@ -289,39 +292,35 @@ class MTVIggyIE(MTVServicesInfoExtractor): } _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/' + class MTVDEIE(MTVServicesInfoExtractor): IE_NAME = 'mtv.de' - _VALID_URL = r'''(?x)^https?://(?:www\.)?mtv\.de(?P/artists/.*)''' - _TESTS = [ - { - 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', - 'info_dict': { - 'id': 'a50bc5f0b3aa4b3190aa', - 'ext': 'mp4', - 'title': 'cro-traum', - 'description': 'Cro - Traum', - }, + _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows)/(?:[^/]+/)+(?P\d+)-[^/#?]+/*(?:[#?].*)?$' + _TESTS = [{ + 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', + 'info_dict': { + 'id': 'music_video-a50bc5f0b3aa4b3190aa', + 'ext': 'mp4', + 'title': 'MusicVideo_cro-traum', + 'description': 'Cro - Traum', }, - ] + 'params': { + # rtmp download + 'skip_download': True, + }, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - return self._get_videos_info(url, mobj.group('video_path')) + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + playlist = self._parse_json( + self._search_regex( + r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'), + video_id) - def _get_videos_info(self, url, video_path): - webpage = self._download_webpage(url, video_path) - playlist_js = self._search_regex(r'