[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / tunepk.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..compat import compat_str
5 from ..utils import (
6     int_or_none,
7     try_get,
8     unified_timestamp,
9 )
10
11
12 class TunePkIE(InfoExtractor):
13     _VALID_URL = r'''(?x)
14                     https?://
15                         (?:
16                             (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)|
17                             embed\.tune\.pk/play/
18                         )
19                         (?P<id>\d+)
20                     '''
21     _TESTS = [{
22         'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins',
23         'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55',
24         'info_dict': {
25             'id': '6919541',
26             'ext': 'mp4',
27             'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins',
28             'description': 'md5:eb5a04114fafef5cec90799a93a2d09c',
29             'thumbnail': r're:^https?://.*\.jpg$',
30             'timestamp': 1487327564,
31             'upload_date': '20170217',
32             'uploader': 'Movie Trailers',
33             'duration': 107,
34             'view_count': int,
35         }
36     }, {
37         'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no',
38         'only_matching': True,
39     }, {
40         'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true',
41         'only_matching': True,
42     }]
43
44     def _real_extract(self, url):
45         video_id = self._match_id(url)
46
47         webpage = self._download_webpage(
48             'https://tune.pk/video/%s' % video_id, video_id)
49
50         details = self._parse_json(
51             self._search_regex(
52                 r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'),
53             video_id)['details']
54
55         video = details['video']
56         title = video.get('title') or self._og_search_title(
57             webpage, default=None) or self._html_search_meta(
58             'title', webpage, 'title', fatal=True)
59
60         formats = self._parse_jwplayer_formats(
61             details['player']['sources'], video_id)
62         self._sort_formats(formats)
63
64         description = self._og_search_description(
65             webpage, default=None) or self._html_search_meta(
66             'description', webpage, 'description')
67
68         thumbnail = video.get('thumb') or self._og_search_thumbnail(
69             webpage, default=None) or self._html_search_meta(
70             'thumbnail', webpage, 'thumbnail')
71
72         timestamp = unified_timestamp(video.get('date_added'))
73         uploader = try_get(
74             video, lambda x: x['uploader']['name'],
75             compat_str) or self._html_search_meta('author', webpage, 'author')
76
77         duration = int_or_none(video.get('duration'))
78         view_count = int_or_none(video.get('views'))
79
80         return {
81             'id': video_id,
82             'title': title,
83             'description': description,
84             'thumbnail': thumbnail,
85             'timestamp': timestamp,
86             'uploader': uploader,
87             'duration': duration,
88             'view_count': view_count,
89             'formats': formats,
90         }