1 from __future__ import unicode_literals
8 from .common import InfoExtractor
17 class JustinTVIE(InfoExtractor):
18 """Information extractor for justin.tv and twitch.tv"""
19 # TODO: One broadcast may be split into multiple videos. The key
20 # 'broadcast_id' is the same for all parts, and 'broadcast_part'
21 # starts at 1 and increases. Can we treat all parts as one video?
23 _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
26 (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
27 (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
31 _JUSTIN_PAGE_LIMIT = 100
33 IE_DESC = 'justin.tv and twitch.tv'
35 'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
36 'md5': 'ecaa8a790c22a40770901460af191c9a',
40 'upload_date': '20110927',
41 'uploader_id': 25114803,
42 'uploader': 'thegamedevhub',
43 'title': 'Beginner Series - Scripting With Python Pt.1'
47 _API_BASE = 'https://api.twitch.tv'
49 # Return count of items, list of *valid* items
50 def _parse_page(self, url, video_id, counter):
51 info_json = self._download_webpage(
53 'Downloading video info JSON on page %d' % counter,
54 'Unable to download video info JSON %d' % counter)
56 response = json.loads(info_json)
57 if type(response) != list:
58 error_text = response.get('error', 'unknown error')
59 raise ExtractorError('Justin.tv API: %s' % error_text)
62 video_url = clip['video_file_url']
64 video_extension = os.path.splitext(video_url)[1][1:]
65 video_date = re.sub('-', '', clip['start_time'][:10])
66 video_uploader_id = clip.get('user_id', clip.get('channel_id'))
68 video_title = clip.get('title', video_id)
70 'id': compat_str(video_id),
73 'uploader': clip.get('channel_name', video_uploader_id),
74 'uploader_id': video_uploader_id,
75 'upload_date': video_date,
76 'ext': video_extension,
78 return (len(response), info)
80 def _handle_error(self, response):
81 if not isinstance(response, dict):
83 error = response.get('error')
86 '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
89 def _download_json(self, url, video_id, note='Downloading JSON metadata'):
90 response = super(JustinTVIE, self)._download_json(url, video_id, note)
91 self._handle_error(response)
94 def _extract_media(self, item, item_id):
101 info = self._extract_info(self._download_json(
102 '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
103 'Downloading %s info JSON' % ITEMS[item]))
105 response = self._download_json(
106 '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
107 'Downloading %s playlist JSON' % ITEMS[item])
110 chunks = response['chunks']
111 qualities = list(chunks.keys())
112 for num, fragment in enumerate(zip(*chunks.values()), start=1):
114 for fmt_num, fragment_fmt in enumerate(fragment):
115 format_id = qualities[fmt_num]
117 'url': fragment_fmt['url'],
118 'format_id': format_id,
119 'quality': 1 if format_id == 'live' else 0,
121 m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
123 fmt['height'] = int(m.group('height'))
125 self._sort_formats(formats)
127 entry['title'] = '%s part %d' % (entry['title'], num)
128 entry['formats'] = formats
129 entries.append(entry)
132 def _extract_info(self, info):
135 'title': info['title'],
136 'description': info['description'],
137 'duration': info['length'],
138 'thumbnail': info['preview'],
139 'uploader': info['channel']['display_name'],
140 'uploader_id': info['channel']['name'],
141 'timestamp': parse_iso8601(info['recorded_at']),
142 'view_count': info['views'],
145 def _real_extract(self, url):
146 mobj = re.match(self._VALID_URL, url)
148 api_base = 'http://api.twitch.tv'
150 if mobj.group('channelid'):
152 video_id = mobj.group('channelid')
153 api = api_base + '/channel/archives/%s.json' % video_id
154 elif mobj.group('chapterid'):
155 return self._extract_media('c', mobj.group('chapterid'))
158 webpage = self._download_webpage(url, chapter_id)
159 m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
161 raise ExtractorError('Cannot find archive of a chapter')
162 archive_id = m.group(1)
164 api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
165 doc = self._download_xml(
167 note='Downloading chapter information',
168 errnote='Chapter information download failed')
169 for a in doc.findall('.//archive'):
170 if archive_id == a.find('./id').text:
173 raise ExtractorError('Could not find chapter in chapter information')
175 video_url = a.find('./video_file_url').text
176 video_ext = video_url.rpartition('.')[2] or 'flv'
178 chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
179 chapter_info = self._download_json(
180 chapter_api_url, 'c' + chapter_id,
181 note='Downloading chapter metadata',
182 errnote='Download of chapter metadata failed')
184 bracket_start = int(doc.find('.//bracket_start').text)
185 bracket_end = int(doc.find('.//bracket_end').text)
187 # TODO determine start (and probably fix up file)
188 # youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
189 #video_url += '?start=' + TODO:start_timestamp
190 # bracket_start is 13290, but we want 51670615
191 self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
192 'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
195 'id': 'c' + chapter_id,
198 'title': chapter_info['title'],
199 'thumbnail': chapter_info['preview'],
200 'description': chapter_info['description'],
201 'uploader': chapter_info['channel']['display_name'],
202 'uploader_id': chapter_info['channel']['name'],
207 return self._extract_media('a', mobj.group('videoid'))
211 limit = self._JUSTIN_PAGE_LIMIT
212 for counter in itertools.count(1):
213 page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
214 page_count, page_info = self._parse_page(
215 page_url, video_id, counter)
216 entries.extend(page_info)
217 if not paged or page_count != limit: