srt_lang = list(srt_lang_list.keys())[0]
if not srt_lang in srt_lang_list:
return (u'WARNING: no closed captions found in the specified language', None)
- request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
+ params = compat_urllib_parse.urlencode({
+ 'lang': srt_lang,
+ 'name': srt_lang_list[srt_lang].encode('utf-8'),
+ 'v': video_id,
+ })
+ url = 'http://www.youtube.com/api/timedtext?' + params
try:
- srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8')
+ srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
if not srt_xml:
- return (u'WARNING: unable to download video subtitles', None)
+ return (u'WARNING: Did not fetch video subtitles', None)
return (None, self._closed_captions_xml_to_srt(srt_xml))
def _print_formats(self, formats):
if not m:
raise ExtractorError(u'Cannot parse data')
data = dict(json.loads(m.group(1)))
- video_url = compat_urllib_parse.unquote(data['hd_src'])
- video_duration = int(data['video_duration'])
+ params_raw = compat_urllib_parse.unquote(data['params'])
+ params = json.loads(params_raw)
+ video_url = params['hd_src']
+ video_duration = int(params['video_duration'])
m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
if not m:
'url': video_url,
'ext': 'mp4',
'duration': video_duration,
- 'thumbnail': data['thumbnail_src'],
+ 'thumbnail': params['thumbnail_src'],
}
return [info]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
- self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
- return
+ raise ExtractorError(u'Invalid URL: %s' % url)
if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course')
'upload_date': None,
}
- self.report_download_webpage(info['id'])
- try:
- coursepage = compat_urllib_request.urlopen(url).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
- return
+ coursepage = self._download_webpage(url, info['id'],
+ note='Downloading course info page',
+ errnote='Unable to download course info page')
m = re.search('<h1>([^<]+)</h1>', coursepage)
if m:
assert entry['type'] == 'reference'
results += self.extract(entry['url'])
return results
-
else: # Root page
info = {
'id': 'Stanford OpenClassroom',