# TODO implement WebVTT downloading
pass
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+ segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')),
}
+ if segment_list:
+ f.update({
+ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
+ 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')]
+ })
try:
existing_format = next(
fo for fo in formats
self.report_warning(
'Skipping DASH manifest: %r' % e, video_id)
else:
- # Hide the formats we found through non-DASH
+ # Remove the formats we found through non-DASH, they
+ # contain less info and it can be wrong, because we use
+ # fixed values (for example the resolution). See
+ # https://github.com/rg3/youtube-dl/issues/5774 for an
+ # example.
dash_keys = set(df['format_id'] for df in dash_formats)
- for f in formats:
- if f['format_id'] in dash_keys:
- f['format_id'] = 'nondash-%s' % f['format_id']
- f['preference'] = f.get('preference', 0) - 10000
+ formats = [f for f in formats if f['format_id'] not in dash_keys]
formats.extend(dash_formats)
# Check for malformed aspect ratio
channel_id = self._match_id(url)
url = self._TEMPLATE_URL % channel_id
+
+ # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
+ # Workaround by extracting as a playlist if managed to obtain channel playlist URL
+ # otherwise fallback on channel by page extraction
+ channel_page = self._download_webpage(
+ url + '?view=57', channel_id,
+ 'Downloading channel page', fatal=False)
+ channel_playlist_id = self._search_regex(
+ [r'<meta itemprop="channelId" content="([^"]+)">',
+ r'data-channel-external-id="([^"]+)"'],
+ channel_page, 'channel id', default=None)
+ if channel_playlist_id and channel_playlist_id.startswith('UC'):
+ playlist_id = 'UU' + channel_playlist_id[2:]
+ return self.url_result(
+ compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
+
channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
autogenerated = re.search(r'''(?x)
class="[^"]*?(?: