- links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
- info['list'] = [
- {
- 'type': 'reference',
- 'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
- }
- for cpage in links]
-
- results = []
- for entry in info['list']:
- assert entry['type'] == 'reference'
- results += self.extract(entry['url'])
- return results
+ rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
+ rootpage = self._download_webpage(rootURL, info['id'],
+ errnote='Unable to download course info page')
+
+ links = orderedSet(re.findall(r'<a href="(CoursePage.php\?[^"]+)">', rootpage))
+ info['entries'] = [self.url_result(
+ 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
+ ) for l in links]
+ return info