[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / radiojavan.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     parse_resolution,
8     str_to_int,
9     unified_strdate,
10     urlencode_postdata,
11     urljoin,
12 )
13
14
15 class RadioJavanIE(InfoExtractor):
16     _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
17     _TEST = {
18         'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
19         'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
20         'info_dict': {
21             'id': 'chaartaar-ashoobam',
22             'ext': 'mp4',
23             'title': 'Chaartaar - Ashoobam',
24             'thumbnail': r're:^https?://.*\.jpe?g$',
25             'upload_date': '20150215',
26             'view_count': int,
27             'like_count': int,
28             'dislike_count': int,
29         }
30     }
31
32     def _real_extract(self, url):
33         video_id = self._match_id(url)
34
35         download_host = self._download_json(
36             'https://www.radiojavan.com/videos/video_host', video_id,
37             data=urlencode_postdata({'id': video_id}),
38             headers={
39                 'Content-Type': 'application/x-www-form-urlencoded',
40                 'Referer': url,
41             }).get('host', 'https://host1.rjmusicmedia.com')
42
43         webpage = self._download_webpage(url, video_id)
44
45         formats = []
46         for format_id, _, video_path in re.findall(
47                 r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
48                 webpage):
49             f = parse_resolution(format_id)
50             f.update({
51                 'url': urljoin(download_host, video_path),
52                 'format_id': format_id,
53             })
54             formats.append(f)
55         self._sort_formats(formats)
56
57         title = self._og_search_title(webpage)
58         thumbnail = self._og_search_thumbnail(webpage)
59
60         upload_date = unified_strdate(self._search_regex(
61             r'class="date_added">Date added: ([^<]+)<',
62             webpage, 'upload date', fatal=False))
63
64         view_count = str_to_int(self._search_regex(
65             r'class="views">Plays: ([\d,]+)',
66             webpage, 'view count', fatal=False))
67         like_count = str_to_int(self._search_regex(
68             r'class="rating">([\d,]+) likes',
69             webpage, 'like count', fatal=False))
70         dislike_count = str_to_int(self._search_regex(
71             r'class="rating">([\d,]+) dislikes',
72             webpage, 'dislike count', fatal=False))
73
74         return {
75             'id': video_id,
76             'title': title,
77             'thumbnail': thumbnail,
78             'upload_date': upload_date,
79             'view_count': view_count,
80             'like_count': like_count,
81             'dislike_count': dislike_count,
82             'formats': formats,
83         }