Merge remote-tracking branch 'capital-G/master'
[youtube-dl] / youtube_dl / extractor / bild.py
1 from __future__ import unicode_literals\r
2 \r
3 import re\r
4 \r
5 from .common import InfoExtractor\r
6 \r
7 \r
8 class BildIE(InfoExtractor):\r
9     IE_NAME = 'bild'\r
10     _TEST = {\r
11         'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',\r
12         'info_dict': {\r
13             'id': '38184146',\r
14             'title': 'BILD hat sie getestet',\r
15             'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',\r
16             'duration': 196,\r
17         }\r
18     }\r
19     \r
20     #http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html\r
21     _VALID_URL = r'http?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'\r
22     \r
23     def _real_extract(self, url):\r
24         m = re.match(self._VALID_URL, url)\r
25         video_id = m.group('id')\r
26         \r
27         #webpage = self._download_webpage(url, video_id)\r
28         \r
29         xml_url = url.split(".bild.html")[0]+",view=xml.bild.xml"\r
30         \r
31         doc = self._download_xml(xml_url, video_id)\r
32         \r
33         video_url = doc.attrib['src']\r
34         title = doc.attrib['ueberschrift']\r
35         description = doc.attrib['text']\r
36         thumbnail = doc.attrib['img']\r
37         duration = int(doc.attrib['duration'])/1000\r
38 \r
39         return {\r
40             'id': video_id,\r
41             'title': title,\r
42             'description': description,\r
43             'url': video_url,\r
44             'thumbnail': thumbnail,\r
45             'duration': duration,\r
46         }\r