[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / digg.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import js_to_json
5
6
7 class DiggIE(InfoExtractor):
8     _VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
9     _TESTS = [{
10         # JWPlatform via provider
11         'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
12         'info_dict': {
13             'id': 'LcqvmS0b',
14             'ext': 'mp4',
15             'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
16             'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
17             'upload_date': '20180109',
18             'timestamp': 1515530551,
19         },
20         'params': {
21             'skip_download': True,
22         },
23     }, {
24         # Youtube via provider
25         'url': 'http://digg.com/video/dog-boat-seal-play',
26         'only_matching': True,
27     }, {
28         # vimeo as regular embed
29         'url': 'http://digg.com/video/dream-girl-short-film',
30         'only_matching': True,
31     }]
32
33     def _real_extract(self, url):
34         display_id = self._match_id(url)
35
36         webpage = self._download_webpage(url, display_id)
37
38         info = self._parse_json(
39             self._search_regex(
40                 r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
41                 default='{}'), display_id, transform_source=js_to_json,
42             fatal=False)
43
44         video_id = info.get('video_id')
45
46         if video_id:
47             provider = info.get('provider_name')
48             if provider == 'youtube':
49                 return self.url_result(
50                     video_id, ie='Youtube', video_id=video_id)
51             elif provider == 'jwplayer':
52                 return self.url_result(
53                     'jwplatform:%s' % video_id, ie='JWPlatform',
54                     video_id=video_id)
55
56         return self.url_result(url, 'Generic')