[golem] Don't omit positional argument specifiers
[youtube-dl] / youtube_dl / extractor / golem.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import compat_urlparse
8
9
10 class GolemIE(InfoExtractor):
11     _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
12     _TEST = {
13         'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
14         'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
15         'info_dict': {
16             'id': '14095',
17             'format_id': 'high',
18             'ext': 'mp4',
19             'title': 'iPhone 6 und 6 Plus - Test',
20             'duration': 300,
21             'filesize': 65309548,
22         }
23     }
24
25     _CONFIG = 'https://video.golem.de/xml/{0}.xml'
26     _PREFIX = 'http://video.golem.de'
27
28     def _warn(self, fmt, *args):
29         self.report_warning(fmt.format(*args), self._id)
30
31     def _extract_format(self, elem):
32         format_id = elem.tag
33
34         url = elem.findtext('./url')
35         if url == '':
36             self._warn("{0}: url: empty, skipping", format_id)
37             return None
38
39         fmt = {
40             'format_id': format_id,
41             'url': compat_urlparse.urljoin(self._PREFIX, url)
42         }
43
44         try:
45             _, ext = elem.findtext('./filename', '').rsplit('.', 1)
46         except ValueError:
47             self._warn('{0}: ext: missing extension', format_id)
48         else:
49             fmt['ext'] = ext
50
51         filesize = elem.findtext('./filesize')
52         if filesize is not None:
53             try:
54                 fmt['filesize'] = int(filesize)
55             except ValueError as e:
56                 self._warn('{0}: filesize: {1}', format_id, e)
57
58         width = elem.get('width')
59         if width is not None:
60             try:
61                 fmt['width'] = int(width)
62             except ValueError as e:
63                 self._warn('{0}: width: {1}', format_id, e)
64
65         height = elem.get('height')
66         if height is not None:
67             try:
68                 fmt['height'] = int(height)
69             except ValueError as e:
70                 self._warn('{0}: height: {1}', format_id, e)
71
72         return fmt
73
74     def _extract_thumbnail(self, elem):
75         url = elem.findtext('./url')
76         if url == '':
77             return None
78         thumb = {
79             'url': compat_urlparse.urljoin(self._PREFIX, url)
80         }
81
82         width = elem.get('width')
83         if width is not None:
84             try:
85                 thumb['width'] = int(width)
86             except ValueError as e:
87                 self._warn('thumbnail: width: {0}', e)
88
89         height = elem.get('height')
90         if height is not None:
91             try:
92                 thumb['height'] = int(height)
93             except ValueError as e:
94                 self._warn('thumbnail: height: {0}', e)
95
96         return thumb
97
98     def _real_extract(self, url):
99         mobj = re.match(self._VALID_URL, url)
100         self._id = mobj.group('id')
101
102         config = self._download_xml(self._CONFIG.format(self._id), self._id)
103
104         info = {
105             'id': self._id,
106             'title': config.findtext('./title', 'golem')
107         }
108
109         formats = []
110         for e in config.findall('./*[url]'):
111             fmt = self._extract_format(e)
112             if fmt is not None:
113                 formats.append(fmt)
114         self._sort_formats(formats)
115         info['formats'] = formats
116
117         thumbnails = []
118         for e in config.findall('.//teaser[url]'):
119             thumb = self._extract_thumbnail(e)
120             if thumb is not None:
121                 thumbnails.append(thumb)
122         info['thumbnails'] = thumbnails
123
124         playtime = config.findtext('./playtime')
125         if playtime is not None:
126             try:
127                 info['duration'] = round(float(playtime))
128             except ValueError as e:
129                 self._warn('duration: {0}', e)
130
131         return info