[clipfish] extract mp4 video link
[youtube-dl] / youtube_dl / extractor / clipfish.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5     ExtractorError,
6     int_or_none,
7     js_to_json,
8     determine_ext,
9 )
10
11
12 class ClipfishIE(InfoExtractor):
13     IE_NAME = 'clipfish'
14
15     _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
16     _TEST = {
17         'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
18         'md5': '79bc922f3e8a9097b3d68a93780fd475',
19         'info_dict': {
20             'id': '3966754',
21             'ext': 'mp4',
22             'title': 'FIFA 14 - E3 2013 Trailer',
23             'duration': 82,
24         }
25     }
26
27     def _real_extract(self, url):
28         video_id = self._match_id(url)
29         webpage = self._download_webpage(url, video_id)
30         video_info = self._parse_json(
31             js_to_json(self._html_search_regex('var videoObject = ({[^}]+?})', webpage, 'videoObject')),
32             video_id
33         )
34         info_url = self._parse_json(
35             js_to_json(self._html_search_regex('var globalFlashvars = ({[^}]+?})', webpage, 'globalFlashvars')),
36             video_id
37         )['data']
38
39         doc = self._download_xml(
40             info_url, video_id, note='Downloading info page')
41         title = doc.find('title').text
42         video_url = doc.find('filename').text
43         thumbnail = doc.find('imageurl').text
44         duration = int_or_none(video_info['length'])
45         formats = [{'url': video_info['videourl']},{'url': video_url}]
46         self._sort_formats(formats)
47
48         return {
49             'id': video_id,
50             'title': title,
51             'formats': formats,
52             'thumbnail': thumbnail,
53             'duration': duration,
54         }