[vgtv] Add new extractor
[youtube-dl] / youtube_dl / extractor / eighttracks.py
1 import json
2 import random
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     ExtractorError,
8 )
9
10
11 class EightTracksIE(InfoExtractor):
12     IE_NAME = '8tracks'
13     _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
14     _TEST = {
15         u"name": u"EightTracks",
16         u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
17         u"playlist": [
18             {
19                 u"file": u"11885610.m4a",
20                 u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
21                 u"info_dict": {
22                     u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
23                     u"uploader_id": u"ytdl"
24                 }
25             },
26             {
27                 u"file": u"11885608.m4a",
28                 u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
29                 u"info_dict": {
30                     u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
31                     u"uploader_id": u"ytdl"
32                 }
33             },
34             {
35                 u"file": u"11885679.m4a",
36                 u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
37                 u"info_dict": {
38                     u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
39                     u"uploader_id": u"ytdl"
40                 }
41             },
42             {
43                 u"file": u"11885680.m4a",
44                 u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
45                 u"info_dict": {
46                     u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
47                     u"uploader_id": u"ytdl"
48                 }
49             },
50             {
51                 u"file": u"11885682.m4a",
52                 u"md5": u"1893e872e263a2705558d1d319ad19e8",
53                 u"info_dict": {
54                     u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
55                     u"uploader_id": u"ytdl"
56                 }
57             },
58             {
59                 u"file": u"11885683.m4a",
60                 u"md5": u"b673c46f47a216ab1741ae8836af5899",
61                 u"info_dict": {
62                     u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
63                     u"uploader_id": u"ytdl"
64                 }
65             },
66             {
67                 u"file": u"11885684.m4a",
68                 u"md5": u"1d74534e95df54986da7f5abf7d842b7",
69                 u"info_dict": {
70                     u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
71                     u"uploader_id": u"ytdl"
72                 }
73             },
74             {
75                 u"file": u"11885685.m4a",
76                 u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
77                 u"info_dict": {
78                     u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
79                     u"uploader_id": u"ytdl"
80                 }
81             }
82         ]
83     }
84
85
86     def _real_extract(self, url):
87         mobj = re.match(self._VALID_URL, url)
88         if mobj is None:
89             raise ExtractorError(u'Invalid URL: %s' % url)
90         playlist_id = mobj.group('id')
91
92         webpage = self._download_webpage(url, playlist_id)
93
94         json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
95         data = json.loads(json_like)
96
97         session = str(random.randint(0, 1000000000))
98         mix_id = data['id']
99         track_count = data['tracks_count']
100         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
101         next_url = first_url
102         res = []
103         for i in range(track_count):
104             api_json = self._download_webpage(next_url, playlist_id,
105                 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
106                 errnote=u'Failed to download song information')
107             api_data = json.loads(api_json)
108             track_data = api_data[u'set']['track']
109             info = {
110                 'id': track_data['id'],
111                 'url': track_data['track_file_stream_url'],
112                 'title': track_data['performer'] + u' - ' + track_data['name'],
113                 'raw_title': track_data['name'],
114                 'uploader_id': data['user']['login'],
115                 'ext': 'm4a',
116             }
117             res.append(info)
118             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
119         return res