[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / phoenix.py
1 from __future__ import unicode_literals
2
3 from .dreisat import DreiSatIE
4
5
6 class PhoenixIE(DreiSatIE):
7     IE_NAME = 'phoenix.de'
8     _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
9         (?:
10             phoenix/die_sendungen/(?:[^/]+/)?
11         )?
12         (?P<id>[0-9]+)'''
13     _TESTS = [
14         {
15             'url': 'http://www.phoenix.de/content/884301',
16             'md5': 'ed249f045256150c92e72dbb70eadec6',
17             'info_dict': {
18                 'id': '884301',
19                 'ext': 'mp4',
20                 'title': 'Michael Krons mit Hans-Werner Sinn',
21                 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
22                 'upload_date': '20141025',
23                 'uploader': 'Im Dialog',
24             }
25         },
26         {
27             'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
28             'only_matching': True,
29         },
30         {
31             'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
32             'only_matching': True,
33         },
34     ]
35
36     def _real_extract(self, url):
37         video_id = self._match_id(url)
38         webpage = self._download_webpage(url, video_id)
39
40         internal_id = self._search_regex(
41             r'<div class="phx_vod" id="phx_vod_([0-9]+)"',
42             webpage, 'internal video ID')
43
44         api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id
45         return self.extract_from_xml_url(video_id, api_url)