- for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
- if mobj.group(1) not in ids_in_page:
- ids_in_page.append(mobj.group(1))
- return ids_in_page
+ titles_in_page = []
+ for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
+ video_id = mobj.group('id')
+ video_title = unescapeHTML(mobj.group('title'))
+ try:
+ idx = ids_in_page.index(video_id)
+ if video_title and not titles_in_page[idx]:
+ titles_in_page[idx] = video_title
+ except ValueError:
+ ids_in_page.append(video_id)
+ titles_in_page.append(video_title)
+ return zip(ids_in_page, titles_in_page)