url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
+ @staticmethod
+ def _extract_chapters(description, duration):
+ if not description:
+ return None
+ chapter_lines = re.findall(
+ r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
+ description)
+ if not chapter_lines:
+ return None
+ chapters = []
+ for next_num, (chapter_line, time_point) in enumerate(
+ chapter_lines, start=1):
+ start_time = parse_duration(time_point)
+ if start_time is None:
+ continue
+ end_time = (duration if next_num == len(chapter_lines)
+ else parse_duration(chapter_lines[next_num][1]))
+ if end_time is None:
+ continue
+ chapter_title = re.sub(
+ r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
+ chapter_title = re.sub(r'\s+', ' ', chapter_title)
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': chapter_title,
+ })
+ return chapters
+
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_title = '_'
# description
- video_description = get_element_by_id("eow-description", video_webpage)
+ description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
- video_description = re.sub(r'''(?x)
+ description_original = video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
+ chapters = self._extract_chapters(description_original, video_duration)
+
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
+ [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+ r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
+ 'chapters': chapters,
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,