Merge remote-tracking branch 'lenaten/8tracks'
[youtube-dl] / youtube_dl / extractor / thesixtyone.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import re
6
7 from .common import InfoExtractor
8 from ..utils import unified_strdate
9
10
11 class TheSixtyOneIE(InfoExtractor):
12     _VALID_URL = r'''(?x)https?://(?:www\.)?thesixtyone\.com/
13         (?:.*?/)*
14         (?:
15             s|
16             song/comments/list|
17             song
18         )/(?P<id>[A-Za-z0-9]+)/?$'''
19     _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}'
20     _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}.thesixtyone.com/thesixtyone_production/audio/{0:}_stream'
21     _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop'
22     _TESTS = [
23         {
24             'url': 'http://www.thesixtyone.com/s/SrE3zD7s1jt/',
25             'md5': '821cc43b0530d3222e3e2b70bb4622ea',
26             'info_dict': {
27                 'id': 'SrE3zD7s1jt',
28                 'ext': 'mp3',
29                 'title': 'CASIO - Unicorn War Mixtape',
30                 'thumbnail': 're:^https?://.*_desktop$',
31                 'upload_date': '20071217',
32                 'duration': 3208,
33             }
34         },
35         {
36             'url': 'http://www.thesixtyone.com/song/comments/list/SrE3zD7s1jt',
37             'only_matching': True,
38         },
39         {
40             'url': 'http://www.thesixtyone.com/s/ULoiyjuJWli#/s/SrE3zD7s1jt/',
41             'only_matching': True,
42         },
43         {
44             'url': 'http://www.thesixtyone.com/#/s/SrE3zD7s1jt/',
45             'only_matching': True,
46         },
47         {
48             'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/',
49             'only_matching': True,
50         },
51     ]
52
53     _DECODE_MAP = {
54         "x": "a",
55         "m": "b",
56         "w": "c",
57         "q": "d",
58         "n": "e",
59         "p": "f",
60         "a": "0",
61         "h": "1",
62         "e": "2",
63         "u": "3",
64         "s": "4",
65         "i": "5",
66         "o": "6",
67         "y": "7",
68         "r": "8",
69         "c": "9"
70     }
71
72     def _real_extract(self, url):
73         mobj = re.match(self._VALID_URL, url)
74         song_id = mobj.group('id')
75
76         webpage = self._download_webpage(
77             self._SONG_URL_TEMPLATE.format(song_id), song_id)
78
79         song_data = json.loads(self._search_regex(
80             r'"%s":\s(\{.*?\})' % song_id, webpage, 'song_data'))
81         keys = [self._DECODE_MAP.get(s, s) for s in song_data['key']]
82         url = self._SONG_FILE_URL_TEMPLATE.format(
83             "".join(reversed(keys)), **song_data)
84
85         formats = [{
86             'format_id': 'sd',
87             'url': url,
88             'ext': 'mp3',
89         }]
90
91         return {
92             'id': song_id,
93             'title': '{artist:} - {name:}'.format(**song_data),
94             'formats': formats,
95             'comment_count': song_data.get('comments_count'),
96             'duration': song_data.get('play_time'),
97             'like_count': song_data.get('score'),
98             'thumbnail': self._THUMBNAIL_URL_TEMPLATE.format(**song_data),
99             'upload_date': unified_strdate(song_data.get('publish_date')),
100         }