X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fwebofstories.py;h=2037d9b3d57cd5876d85e9552ffcc9f387fcc975;hb=027eb5a6b041a91ca7fdd61826daaea24bec1cfb;hp=396cf4e8312ca73f90f45b3e24f3fb3561f54fa8;hpb=ecd1936695e73ba850d0618828b4a40d7d16c091;p=youtube-dl diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py index 396cf4e83..2037d9b3d 100644 --- a/youtube_dl/extractor/webofstories.py +++ b/youtube_dl/extractor/webofstories.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import int_or_none @@ -45,19 +47,17 @@ class WebOfStoriesIE(InfoExtractor): description = self._html_search_meta('description', webpage) thumbnail = self._og_search_thumbnail(webpage) - story_filename = self._search_regex( - r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') - speaker_id = self._search_regex( - r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') - story_id = self._search_regex( - r'\.storyId\((\d+)\)', webpage, 'story ID') - speaker_type = self._search_regex( - r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') - great_life = self._search_regex( - r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') + embed_params = [s.strip(" \r\n\t'") for s in self._search_regex( + r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)', + webpage, 'embed params').split(',')] + + ( + _, speaker_id, story_id, story_duration, + speaker_type, great_life, _thumbnail, _has_subtitles, + story_filename, _story_order) = embed_params + is_great_life_series = great_life == 'true' - duration = int_or_none(self._search_regex( - r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + duration = int_or_none(story_duration) # URL building, see: http://www.webofstories.com/scripts/player.js ms_prefix = '' @@ -100,3 +100,42 @@ class WebOfStoriesIE(InfoExtractor): 'description': description, 'duration': duration, } + + +class WebOfStoriesPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P[^/]+)' + _TEST = { + 'url': 'http://www.webofstories.com/playAll/donald.knuth', + 'info_dict': { + 'id': 'donald.knuth', + 'title': 'Donald Knuth (Scientist)', + }, + 'playlist_mincount': 97, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories') + for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage)) + ] + + title = self._search_regex( + r'
\s*([^<]+)', + webpage, 'speaker', default=None) + if title: + field = self._search_regex( + r'([^<]+)', + webpage, 'field', default=None) + if field: + title += ' (%s)' % field + + if not title: + title = self._search_regex( + r'Play\s+all\s+stories\s*-\s*([^<]+)\s*-\s*Web\s+of\s+Stories', + webpage, 'title') + + return self.playlist_result(entries, playlist_id, title)