[^<]*


', page): - return self.playlist_result([self.url_result(vid) for vid in vids], video_id) - - title = self._html_search_regex( - r'

[^<]*

([^<]+)

', page, 'title') - - return { - '_type': 'url', - 'id': video_id, - 'url': vids[0], - 'title': title, - } - - -class GoGoAnimeSearchIE(InfoExtractor): - IE_NAME = 'gogoanime:search' - IE_DESC = 'GoGoAnime Search' - - _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P[^&]*)' - _TEST = { - 'url': 'http://www.gogoanime.com/?s=bokusatsu', - 'info_dict': { - 'id': 'bokusatsu' - }, - 'playlist_count': 6 - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - posts = re.findall( - r'
[^<]*]*>[^<]*.+)' - - _TESTS = [{ - 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', - 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', - 'info_dict': { - 'id': 'mahou-shoujo-madoka-magica-07', - 'ext': 'flv', - 'title': 'mahou-shoujo-madoka-magica-07', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - video_url = compat_urllib_parse.unquote(self._html_search_regex( - r'_url = "(https?://[^"]+?)";', page, 'url')) - title = self._search_regex(r'.*/(?P[^.]*).', video_url, 'title') - - return { - 'id': title, - 'url': video_url, - 'title': title, - } - - -class ByZooIE(Play44IE): - _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', - 'md5': '455c83dabe2cd9fd74a87612b01fe017', - 'info_dict': { - 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', - 'ext': 'mp4', - 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', - } - }] - - -class Video44IE(Play44IE): - _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', - 'md5': '43eaec6d0beb10e8d42459b9f108aff3', - 'info_dict': { - 'id': 'chaoshead-12', - 'ext': 'mp4', - 'title': 'chaoshead-12', - } - }] - - -class VideoWingIE(Play44IE): - _VALID_URL = r'''(?x) - http://[w.]*videowing\.[^/]*/ - (?: - .*video=/* - |embed/ - ) - (?P<id>[^&?.]+) - ''' - - _TESTS = [{ - 'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }, { - 'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', - 'md5': '33fdd71581357018c226f95c5cedcfd7', - 'info_dict': { - 'id': 'mahoushoujomadokamagicamovie1part1', - 'ext': 'flv', - 'title': 'mahoushoujomadokamagicamovie1part1', - } - }] - - -class PlayPandaIE(Play44IE): - _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'description': 'boku_wa_tomodachi_ga_sukunai_-_05' - } - }] - - -class VideoZooIE(Play44IE): - _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }] - - -class PlayBBIE(Play44IE): - _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }] - - -class EasyVideoIE(Play44IE): - _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' - - _TESTS = [{ - 'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', - 'md5': '26178b57629b7650106d72b191137176', - 'info_dict': { - 'id': 'bokuwatomodachigasukunai-04', - 'ext': 'mp4', - 'title': 'bokuwatomodachigasukunai-04', - }, - 'skip': 'Blocked in Germany', - }] diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index d029b0ec5..a3ca79f2c 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -8,7 +8,7 @@ from ..utils import parse_duration class RtlXlIE(InfoExtractor): IE_NAME = 'rtlxl.nl' - _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' + _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' _TEST = { 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 944177426..10b3b706a 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -13,7 +13,7 @@ from ..compat import ( class TEDIE(SubtitlesInfoExtractor): _VALID_URL = r'''(?x) (?P<proto>https?://) - (?P<type>www|embed)(?P<urlmain>\.ted\.com/ + (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ ( (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | @@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) - if m.group('type') == 'embed': + if m.group('type').startswith('embed'): desktop_url = m.group('proto') + 'www' + m.group('urlmain') return self.url_result(desktop_url, 'TED') name = m.group('name') diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 07cc81226..025d0877c 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py deleted file mode 100644 index 0364b9d32..000000000 --- a/youtube_dl/extractor/videofun.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse -) - - -class VideoFunIE(InfoExtractor): - _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' - - _TEST = { - 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', - 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', - 'info_dict': { - 'id': 'Mahou-Shoujo-Madoka-Magica-07', - 'ext': 'flv', - 'title': 'Mahou-Shoujo-Madoka-Magica-07', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, 'Downloading video page') - - video_url_encoded = self._html_search_regex( - r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') - video_url = compat_urllib_parse.unquote(video_url_encoded) - title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') - - return { - 'id': title, - 'url': video_url, - 'title': title, - } diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 33d370e1c..ee3d86117 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor): IE_DESC = 'Vimple.ru' _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' _TESTS = [ - # Quality: Large, from iframe { - 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', + 'md5': '2e750a330ed211d3fd41821c6ad9a279', 'info_dict': { - 'id': 'b132bdfd71b546d3972f9ab9a25f201c', - 'title': 'great-escape-minecraft.flv', + 'id': 'c0f6b1687dcd4000a97ebe70068039cf', 'ext': 'mp4', - 'duration': 352, - 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'Sunset', + 'duration': 20, + 'thumbnail': 're:https?://.*?\.jpg', }, }, - # Quality: Medium, from mainpage - { - 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - 'info_dict': { - 'id': 'a15950562888453b8e6f9572dc8600cd', - 'title': 'DB 01', - 'ext': 'flv', - 'duration': 1484, - 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - } - }, ] def _real_extract(self, url): diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py new file mode 100644 index 000000000..396cf4e83 --- /dev/null +++ b/youtube_dl/extractor/webofstories.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class WebOfStoriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)' + _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' + _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' + _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' + _TESTS = [ + { + 'url': 'http://www.webofstories.com/play/hans.bethe/71', + 'md5': '373e4dd915f60cfe3116322642ddf364', + 'info_dict': { + 'id': '4536', + 'ext': 'mp4', + 'title': 'The temperature of the sun', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Hans Bethe talks about calculating the temperature of the sun', + 'duration': 238, + } + }, + { + 'url': 'http://www.webofstories.com/play/55908', + 'md5': '2985a698e1fe3211022422c4b5ed962c', + 'info_dict': { + 'id': '55908', + 'ext': 'mp4', + 'title': 'The story of Gemmata obscuriglobus', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', + 'duration': 169, + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + description = self._html_search_meta('description', webpage) + thumbnail = self._og_search_thumbnail(webpage) + + story_filename = self._search_regex( + r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') + speaker_id = self._search_regex( + r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') + story_id = self._search_regex( + r'\.storyId\((\d+)\)', webpage, 'story ID') + speaker_type = self._search_regex( + r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') + great_life = self._search_regex( + r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') + is_great_life_series = great_life == 'true' + duration = int_or_none(self._search_regex( + r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + + # URL building, see: http://www.webofstories.com/scripts/player.js + ms_prefix = '' + if speaker_type.lower() == 'ms': + ms_prefix = 'mini_sites/' + + if is_great_life_series: + mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format( + self._VIDEO_DOMAIN, speaker_id, story_filename) + rtmp_ext = 'flv' + streamer = self._GREAT_LIFE_STREAMER + play_path = 'stories/{0:}/{1:}'.format( + speaker_id, story_filename) + else: + mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format( + self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename) + rtmp_ext = 'mp4' + streamer = self._USER_STREAMER + play_path = 'mp4:{0:}{1:}/{2}.mp4'.format( + ms_prefix, speaker_id, story_filename) + + formats = [{ + 'format_id': 'mp4_sd', + 'url': mp4_url, + }, { + 'format_id': 'rtmp_sd', + 'page_url': url, + 'url': streamer, + 'ext': rtmp_ext, + 'play_path': play_path, + }] + + self._sort_formats(formats) + + return { + 'id': story_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e9bf39a00..d1bbf0b01 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1206,9 +1206,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if playlist_id.startswith('RD'): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - if playlist_id.startswith('TL'): - raise ExtractorError('For downloading YouTube.com top lists, use ' - 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) @@ -1254,49 +1251,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, playlist_title) -class YoutubeTopListIE(YoutubePlaylistIE): - IE_NAME = 'youtube:toplist' - IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' - ' (Example: "yttoplist:music:Top Tracks")') - _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' - _TESTS = [{ - 'url': 'yttoplist:music:Trending', - 'playlist_mincount': 5, - 'skip': 'Only works for logged-in users', - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel = mobj.group('chann') - title = mobj.group('title') - query = compat_urllib_parse.urlencode({'title': title}) - channel_page = self._download_webpage( - 'https://www.youtube.com/%s' % channel, title) - link = self._html_search_regex( - r'''(?x) - <a\s+href="([^"]+)".*?>\s* - <span\s+class="branded-page-module-title-text">\s* - <span[^>]*>.*?%s.*?</span>''' % re.escape(query), - channel_page, 'list') - url = compat_urlparse.urljoin('https://www.youtube.com/', link) - - video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' - ids = [] - # sometimes the webpage doesn't contain the videos - # retry until we get them - for i in itertools.count(0): - msg = 'Downloading Youtube mix' - if i > 0: - msg += ', retry #%d' % i - - webpage = self._download_webpage(url, title, msg) - ids = orderedSet(re.findall(video_re, webpage)) - if ids: - break - url_results = self._ids_to_results(ids) - return self.playlist_result(url_results, playlist_title=title) - - class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 048525efc..473536dcc 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -520,7 +520,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] + args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) return True, info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bdfe053a7..d4951c406 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,4 +1560,3 @@ def urlhandle_detect_ext(url_handle): getheader = url_handle.info().getheader return getheader('Content-Type').split("/")[1] -