ec4fa6e602ea779dd6d3a530ea6cfb639eee3cf4
[youtube-dl] / youtube_dl / extractor / radiojavan.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     unified_strdate,
8     str_to_int,
9 )
10
11
12 class RadioJavanIE(InfoExtractor):
13     _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
14     _TEST = {
15         'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
16         'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
17         'info_dict': {
18             'id': 'chaartaar-ashoobam',
19             'ext': 'mp4',
20             'title': 'Chaartaar - Ashoobam',
21             'thumbnail': 're:^https?://.*\.jpe?g$',
22             'upload_date': '20150215',
23             'view_count': int,
24             'like_count': int,
25             'dislike_count': int,
26         }
27     }
28
29     def _real_extract(self, url):
30         video_id = self._match_id(url)
31
32         webpage = self._download_webpage(url, video_id)
33
34         formats = [{
35             'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
36             'format_id': '%sp' % height,
37             'height': int(height),
38         } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
39         self._sort_formats(formats)
40
41         title = self._og_search_title(webpage)
42         thumbnail = self._og_search_thumbnail(webpage)
43
44         upload_date = unified_strdate(self._search_regex(
45             r'class="date_added">Date added: ([^<]+)<',
46             webpage, 'upload date', fatal=False))
47
48         view_count = str_to_int(self._search_regex(
49             r'class="views">Plays: ([\d,]+)',
50             webpage, 'view count', fatal=False))
51         like_count = str_to_int(self._search_regex(
52             r'class="rating">([\d,]+) likes',
53             webpage, 'like count', fatal=False))
54         dislike_count = str_to_int(self._search_regex(
55             r'class="rating">([\d,]+) dislikes',
56             webpage, 'dislike count', fatal=False))
57
58         return {
59             'id': video_id,
60             'title': title,
61             'thumbnail': thumbnail,
62             'upload_date': upload_date,
63             'view_count': view_count,
64             'like_count': like_count,
65             'dislike_count': dislike_count,
66             'formats': formats,
67         }