[radiojavan] Fix extraction
[youtube-dl] / youtube_dl / extractor / radiojavan.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     unified_strdate,
8     str_to_int,
9     urlencode_postdata,
10 )
11
12
13 class RadioJavanIE(InfoExtractor):
14     _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
15     _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host'
16     _TEST = {
17         'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
18         'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
19         'info_dict': {
20             'id': 'chaartaar-ashoobam',
21             'ext': 'mp4',
22             'title': 'Chaartaar - Ashoobam',
23             'thumbnail': r're:^https?://.*\.jpe?g$',
24             'upload_date': '20150215',
25             'view_count': int,
26             'like_count': int,
27             'dislike_count': int,
28         }
29     }
30
31     def _real_extract(self, url):
32         video_id = self._match_id(url)
33
34         webpage = self._download_webpage(url, video_id)
35
36         download_host = self._download_json(
37             self._HOST_TRACKER_URL,
38             video_id,
39             data=urlencode_postdata({'id': video_id}),
40             headers={
41                 'Content-Type': 'application/x-www-form-urlencoded',
42                 'Referer': url,
43             }
44         )['host']
45
46         formats = [{
47             'url': '%s/%s' % (download_host, video_path),
48             'format_id': '%sp' % height,
49             'height': int(height),
50         } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
51         self._sort_formats(formats)
52
53         title = self._og_search_title(webpage)
54         thumbnail = self._og_search_thumbnail(webpage)
55
56         upload_date = unified_strdate(self._search_regex(
57             r'class="date_added">Date added: ([^<]+)<',
58             webpage, 'upload date', fatal=False))
59
60         view_count = str_to_int(self._search_regex(
61             r'class="views">Plays: ([\d,]+)',
62             webpage, 'view count', fatal=False))
63         like_count = str_to_int(self._search_regex(
64             r'class="rating">([\d,]+) likes',
65             webpage, 'like count', fatal=False))
66         dislike_count = str_to_int(self._search_regex(
67             r'class="rating">([\d,]+) dislikes',
68             webpage, 'dislike count', fatal=False))
69
70         return {
71             'id': video_id,
72             'title': title,
73             'thumbnail': thumbnail,
74             'upload_date': upload_date,
75             'view_count': view_count,
76             'like_count': like_count,
77             'dislike_count': dislike_count,
78             'formats': formats,
79         }