[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / dbtv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7
8
9 class DBTVIE(InfoExtractor):
10     _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
11     _TESTS = [{
12         'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
13         'md5': 'b8f850ba1860adbda668d367f9b77699',
14         'info_dict': {
15             'id': 'PynxJnNWChE',
16             'ext': 'mp4',
17             'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
18             'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
19             'thumbnail': r're:https?://.*\.jpg',
20             'upload_date': '20160916',
21             'duration': 69,
22             'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
23             'uploader': 'Dagbladet',
24         },
25         'add_ie': ['Youtube']
26     }, {
27         'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
28         'only_matching': True,
29     }, {
30         'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
31         'only_matching': True,
32     }]
33
34     @staticmethod
35     def _extract_urls(webpage):
36         return [url for _, url in re.findall(
37             r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
38             webpage)]
39
40     def _real_extract(self, url):
41         display_id, video_id = re.match(self._VALID_URL, url).groups()
42         info = {
43             '_type': 'url_transparent',
44             'id': video_id,
45             'display_id': display_id,
46         }
47         if len(video_id) == 11:
48             info.update({
49                 'url': video_id,
50                 'ie_key': 'Youtube',
51             })
52         else:
53             info.update({
54                 'url': 'jwplatform:' + video_id,
55                 'ie_key': 'JWPlatform',
56             })
57         return info