X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ffktv.py;h=5f6e65daed2d5dc2c18c09a97450ae2a9c88e2df;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=c2aa23aa28f7c257d697ee2567f3b64848dfbcb8;hpb=7b4137c351222a94f46f854bf490a299e4124acc;p=youtube-dl
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py
index c2aa23aa2..5f6e65dae 100644
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -1,11 +1,10 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
+ js_to_json,
)
@@ -20,23 +19,33 @@ class FKTVIE(InfoExtractor):
'id': '1',
'ext': 'mp4',
'title': 'Folge 1 vom 10. April 2007',
+ 'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
episode = self._match_id(url)
- webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode)
- title = clean_html(self._html_search_regex('
([^<]+?)
', webpage, 'title'))
- matchs = re.search(r'(?s)', webpage)
- if matchs:
- poster, sources = matchs.groups()
- urls = re.findall(r'(?s)]*src="([^"]+)"[^>]*>', sources)
- if sources:
- formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls]
- return {
- 'id': episode,
- 'title': title,
- 'formats': formats,
- 'thumbnail': poster,
- }
+ webpage = self._download_webpage(
+ 'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
+ title = clean_html(self._html_search_regex(
+ '([^<]+)
', webpage, 'title'))
+ thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
+ sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
+
+ formats = []
+ for source in sources:
+ furl = source.get('src')
+ if furl:
+ formats.append({
+ 'url': furl,
+ 'format_id': determine_ext(furl),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': episode,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }