[tapely] Add new extractor (closes #3861)
[youtube-dl] / youtube_dl / extractor / tapely.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     ExtractorError,
9     clean_html,
10     compat_urllib_request,
11     float_or_none,
12     parse_iso8601,
13 )
14
15
16 class TapelyIE(InfoExtractor):
17     _VALID_URL = r'https?://(?:www\.)?tape\.ly/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
18     _API_URL = 'http://tape.ly/showtape?id={0:}'
19     _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
20     _TESTS = [
21         {
22             'url': 'http://tape.ly/my-grief-as-told-by-water',
23             'info_dict': {
24                 'id': 23952,
25                 'title': 'my grief as told by water',
26                 'thumbnail': 're:^https?://.*\.png$',
27                 'uploader_id': 16484,
28                 'timestamp': 1411848286,
29                 'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.',
30             },
31             'playlist_count': 13,
32         },
33         {
34             'url': 'http://tape.ly/my-grief-as-told-by-water/1',
35             'md5': '79031f459fdec6530663b854cbc5715c',
36             'info_dict': {
37                 'id': 258464,
38                 'title': 'Dreaming Awake  (My Brightest Diamond)',
39                 'ext': 'm4a',
40             },
41         },
42     ]
43
44     def _real_extract(self, url):
45         mobj = re.match(self._VALID_URL, url)
46         display_id = mobj.group('id')
47
48         playlist_url = self._API_URL.format(display_id)
49         request = compat_urllib_request.Request(playlist_url)
50         request.add_header('X-Requested-With', 'XMLHttpRequest')
51         request.add_header('Accept', 'application/json')
52
53         playlist = self._download_json(request, display_id)
54
55         tape = playlist['tape']
56
57         entries = []
58         for s in tape['songs']:
59             song = s['song']
60             entry = {
61                 'id': song['id'],
62                 'duration': float_or_none(song.get('songduration'), 1000),
63                 'title': song['title'],
64             }
65             if song['source'] == 'S3':
66                 entry.update({
67                     'url': self._S3_SONG_URL.format(song['filename']),
68                 })
69                 entries.append(entry)
70             elif song['source'] == 'YT':
71                 _, _, yt_id = song['filename'].split('/')
72                 entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id))
73                 entries.append(entry)
74             else:
75                 self.report_warning('Unknown song source: %s' % song['source'])
76
77         if mobj.group('songnr'):
78             songnr = int(mobj.group('songnr')) - 1
79             try:
80                 return entries[songnr]
81             except IndexError:
82                 raise ExtractorError(
83                     'No song with index: %s' % mobj.group('songnr'),
84                     expected=True)
85
86         return {
87             '_type': 'playlist',
88             'id': tape['id'],
89             'display_id': display_id,
90             'title': tape['name'],
91             'entries': entries,
92             'thumbnail': tape.get('image_url'),
93             'description': clean_html(tape.get('subtext')),
94             'like_count': tape.get('likescount'),
95             'uploader_id': tape.get('user_id'),
96             'timestamp': parse_iso8601(tape.get('published_at')),
97         }