[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / golem.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import (
6     compat_str,
7     compat_urlparse,
8 )
9 from ..utils import (
10     determine_ext,
11 )
12
13
14 class GolemIE(InfoExtractor):
15     _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
16     _TEST = {
17         'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
18         'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
19         'info_dict': {
20             'id': '14095',
21             'format_id': 'high',
22             'ext': 'mp4',
23             'title': 'iPhone 6 und 6 Plus - Test',
24             'duration': 300.44,
25             'filesize': 65309548,
26         }
27     }
28
29     _PREFIX = 'http://video.golem.de'
30
31     def _real_extract(self, url):
32         video_id = self._match_id(url)
33
34         config = self._download_xml(
35             'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id)
36
37         info = {
38             'id': video_id,
39             'title': config.findtext('./title', 'golem'),
40             'duration': self._float(config.findtext('./playtime'), 'duration'),
41         }
42
43         formats = []
44         for e in config:
45             url = e.findtext('./url')
46             if not url:
47                 continue
48
49             formats.append({
50                 'format_id': compat_str(e.tag),
51                 'url': compat_urlparse.urljoin(self._PREFIX, url),
52                 'height': self._int(e.get('height'), 'height'),
53                 'width': self._int(e.get('width'), 'width'),
54                 'filesize': self._int(e.findtext('filesize'), 'filesize'),
55                 'ext': determine_ext(e.findtext('./filename')),
56             })
57         self._sort_formats(formats)
58         info['formats'] = formats
59
60         thumbnails = []
61         for e in config.findall('.//teaser'):
62             url = e.findtext('./url')
63             if not url:
64                 continue
65             thumbnails.append({
66                 'url': compat_urlparse.urljoin(self._PREFIX, url),
67                 'width': self._int(e.get('width'), 'thumbnail width'),
68                 'height': self._int(e.get('height'), 'thumbnail height'),
69             })
70         info['thumbnails'] = thumbnails
71
72         return info