from ..utils import (
clean_html,
determine_ext,
+ ExtractorError,
)
'id': '1',
'ext': 'mp4',
'title': 'Folge 1 vom 10. April 2007',
+ 'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
episode = self._match_id(url)
- webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode)
- title = clean_html(self._html_search_regex('<h3>([^<]+?)</h3>', webpage, 'title'))
- matches = re.search(r'(?s)<video[^>]*poster="([^"]+)"[^>]*>(.*?)</video>', webpage)
- if matches:
- poster, sources = matches.groups()
- urls = re.findall(r'(?s)<source[^>]*src="([^"]+)"[^>]*>', sources)
- if sources:
- formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls]
- return {
- 'id': episode,
- 'title': title,
- 'formats': formats,
- 'thumbnail': poster,
- }
+ webpage = self._download_webpage(
+ 'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
+ title = clean_html(self._html_search_regex(
+ '<h3>([^<]+)</h3>', webpage, 'title'))
+ matches = re.search(
+ r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
+ webpage)
+ if matches is None:
+ raise ExtractorError('Unable to extract the video')
+
+ poster, sources = matches.groups()
+ if poster is None:
+ self.report_warning('unable to extract thumbnail')
+
+ urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
+ formats = [{
+ 'url': url,
+ 'format_id': determine_ext(url),
+ } for url in urls]
+ return {
+ 'id': episode,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': poster,
+ }