from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
-from ..utils import (
+from ..compat import (
compat_chr,
compat_parse_qs,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
-
+)
+from ..utils import (
clean_html,
- get_element_by_id,
- get_element_by_attribute,
ExtractorError,
+ get_element_by_attribute,
+ get_element_by_id,
int_or_none,
OnDemandPagedList,
+ orderedSet,
unescapeHTML,
unified_strdate,
- orderedSet,
uppercase_escape,
)
'expected_warnings': [
'DASH manifest missing',
]
- }
+ },
+ # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+ {
+ 'url': 'lqQg6PlCWgI',
+ 'info_dict': {
+ 'id': 'lqQg6PlCWgI',
+ 'ext': 'mp4',
+ 'upload_date': '20120731',
+ 'uploader_id': 'olympic',
+ 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+ 'uploader': 'Olympics',
+ 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
]
def __init__(self, *args, **kwargs):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
- def _parse_dash_manifest(self, video_id, dash_manifest_url):
+ def _parse_dash_manifest(
+ self, video_id, dash_manifest_url, player_url, age_gate):
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
- video_webpage, 'categories', fatal=False)
+ video_webpage, 'categories', default=None)
if m_cat_container:
category = self._html_search_regex(
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
'url': video_info['conn'][0],
'player_url': player_url,
}]
- elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+ elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
# Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True):
dash_mpd = video_info.get('dashmpd')
- if not dash_mpd:
- self.report_warning('%s: DASH manifest missing' % video_id)
- else:
+ if dash_mpd:
dash_manifest_url = dash_mpd[0]
try:
dash_formats = self._parse_dash_manifest(
- video_id, dash_manifest_url)
+ video_id, dash_manifest_url, player_url, age_gate)
except (ExtractorError, KeyError) as e:
self.report_warning(
'Skipping DASH manifest: %r' % e, video_id)
((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
- _MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
IE_NAME = 'youtube:playlist'
_TESTS = [{
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
- _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
- _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = 'youtube:channel'
_TESTS = [{
'note': 'paginated channel',
return self.playlist_result(entries, channel_id)
def _entries():
+ more_widget_html = content_html = channel_page
for pagenum in itertools.count(1):
- url = self._MORE_PAGES_URL % (pagenum, channel_id)
- page = self._download_json(
- url, channel_id, note='Downloading page #%s' % pagenum,
- transform_source=uppercase_escape)
- ids_in_page = self.extract_videos_from_page(page['content_html'])
+ ids_in_page = self.extract_videos_from_page(content_html)
for video_id in ids_in_page:
yield self.url_result(
video_id, 'Youtube', video_id=video_id)
- if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
+ mobj = re.search(
+ r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
+ more_widget_html)
+ if not mobj:
break
+ more = self._download_json(
+ 'https://youtube.com/%s' % mobj.group('more'), channel_id,
+ 'Downloading page #%s' % (pagenum + 1),
+ transform_source=uppercase_escape)
+ content_html = more['content_html']
+ more_widget_html = more['load_more_widget_html']
+
return self.playlist_result(_entries(), channel_id)