744f9db38d53b0829ed34d342ba618927ae7db08
[youtube-dl] / youtube_dl / extractor / tapely.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import (
8     compat_urllib_request,
9 )
10 from ..utils import (
11     clean_html,
12     ExtractorError,
13     float_or_none,
14     parse_iso8601,
15 )
16
17
18 class TapelyIE(InfoExtractor):
19     _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
20     _API_URL = 'http://tape.ly/showtape?id={0:}'
21     _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
22     _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}'
23     _TESTS = [
24         {
25             'url': 'http://tape.ly/my-grief-as-told-by-water',
26             'info_dict': {
27                 'id': 23952,
28                 'title': 'my grief as told by water',
29                 'thumbnail': 're:^https?://.*\.png$',
30                 'uploader_id': 16484,
31                 'timestamp': 1411848286,
32                 'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.',
33             },
34             'playlist_count': 13,
35         },
36         {
37             'url': 'http://tape.ly/my-grief-as-told-by-water/1',
38             'md5': '79031f459fdec6530663b854cbc5715c',
39             'info_dict': {
40                 'id': 258464,
41                 'title': 'Dreaming Awake  (My Brightest Diamond)',
42                 'ext': 'm4a',
43             },
44         },
45         {
46             'url': 'https://tapely.com/my-grief-as-told-by-water',
47             'only_matching': True,
48         },
49     ]
50
51     def _real_extract(self, url):
52         mobj = re.match(self._VALID_URL, url)
53         display_id = mobj.group('id')
54
55         playlist_url = self._API_URL.format(display_id)
56         request = compat_urllib_request.Request(playlist_url)
57         request.add_header('X-Requested-With', 'XMLHttpRequest')
58         request.add_header('Accept', 'application/json')
59         request.add_header('Referer', url)
60
61         playlist = self._download_json(request, display_id)
62
63         tape = playlist['tape']
64
65         entries = []
66         for s in tape['songs']:
67             song = s['song']
68             entry = {
69                 'id': song['id'],
70                 'duration': float_or_none(song.get('songduration'), 1000),
71                 'title': song['title'],
72             }
73             if song['source'] == 'S3':
74                 entry.update({
75                     'url': self._S3_SONG_URL.format(song['filename']),
76                 })
77                 entries.append(entry)
78             elif song['source'] == 'YT':
79                 self.to_screen('YouTube video detected')
80                 yt_id = song['filename'].replace('/youtube/', '')
81                 entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id))
82                 entries.append(entry)
83             elif song['source'] == 'SC':
84                 self.to_screen('SoundCloud song detected')
85                 sc_url = self._SOUNDCLOUD_SONG_URL.format(song['filename'])
86                 entry.update(self.url_result(sc_url, 'Soundcloud'))
87                 entries.append(entry)
88             else:
89                 self.report_warning('Unknown song source: %s' % song['source'])
90
91         if mobj.group('songnr'):
92             songnr = int(mobj.group('songnr')) - 1
93             try:
94                 return entries[songnr]
95             except IndexError:
96                 raise ExtractorError(
97                     'No song with index: %s' % mobj.group('songnr'),
98                     expected=True)
99
100         return {
101             '_type': 'playlist',
102             'id': tape['id'],
103             'display_id': display_id,
104             'title': tape['name'],
105             'entries': entries,
106             'thumbnail': tape.get('image_url'),
107             'description': clean_html(tape.get('subtext')),
108             'like_count': tape.get('likescount'),
109             'uploader_id': tape.get('user_id'),
110             'timestamp': parse_iso8601(tape.get('published_at')),
111         }