Merge branch 'crooksandliars' of https://github.com/fstirlitz/youtube-dl into fstirli...
[youtube-dl] / youtube_dl / extractor / dreisat.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     ExtractorError,
8     unified_strdate,
9 )
10
11
12 class DreiSatIE(InfoExtractor):
13     IE_NAME = '3sat'
14     _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
15     _TEST = {
16         'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
17         'md5': 'be37228896d30a88f315b638900a026e',
18         'info_dict': {
19             'id': '45918',
20             'ext': 'mp4',
21             'title': 'Waidmannsheil',
22             'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
23             'uploader': '3sat',
24             'upload_date': '20140913'
25         }
26     }
27
28     def _real_extract(self, url):
29         mobj = re.match(self._VALID_URL, url)
30         video_id = mobj.group('id')
31         details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
32         details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
33
34         status_code = details_doc.find('./status/statuscode')
35         if status_code is not None and status_code.text != 'ok':
36             code = status_code.text
37             if code == 'notVisibleAnymore':
38                 message = 'Video %s is not available' % video_id
39             else:
40                 message = '%s returned error: %s' % (self.IE_NAME, code)
41             raise ExtractorError(message, expected=True)
42
43         thumbnail_els = details_doc.findall('.//teaserimage')
44         thumbnails = [{
45             'width': int(te.attrib['key'].partition('x')[0]),
46             'height': int(te.attrib['key'].partition('x')[2]),
47             'url': te.text,
48         } for te in thumbnail_els]
49
50         information_el = details_doc.find('.//information')
51         video_title = information_el.find('./title').text
52         video_description = information_el.find('./detail').text
53
54         details_el = details_doc.find('.//details')
55         video_uploader = details_el.find('./channel').text
56         upload_date = unified_strdate(details_el.find('./airtime').text)
57
58         format_els = details_doc.findall('.//formitaet')
59         formats = [{
60             'format_id': fe.attrib['basetype'],
61             'width': int(fe.find('./width').text),
62             'height': int(fe.find('./height').text),
63             'url': fe.find('./url').text,
64             'filesize': int(fe.find('./filesize').text),
65             'video_bitrate': int(fe.find('./videoBitrate').text),
66         } for fe in format_els
67             if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
68
69         self._sort_formats(formats)
70
71         return {
72             '_type': 'video',
73             'id': video_id,
74             'title': video_title,
75             'formats': formats,
76             'description': video_description,
77             'thumbnails': thumbnails,
78             'thumbnail': thumbnails[-1]['url'],
79             'uploader': video_uploader,
80             'upload_date': upload_date,
81         }