from worst to best quality.
Potential fields:
- * url Mandatory. The URL of the video file or URL of
- the manifest file in case of fragmented media
- (DASH, hls, hds).
+ * url Mandatory. The URL of the video file
+ * manifest_url
+ The URL of the manifest file in case of
+ fragmented media (DASH, hls, hds)
* ext Will be calculated from URL if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
formats.append({
'format_id': format_id,
'url': manifest_url,
+ 'manifest_url': manifest_url,
'ext': 'flv' if bootstrap_info is not None else None,
'tbr': tbr,
'width': width,
# format_id intact.
if not live:
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
+ manifest_url = format_url(line.strip())
f = {
'format_id': '-'.join(format_id),
- 'url': format_url(line.strip()),
+ 'url': manifest_url,
+ 'manifest_url': manifest_url,
'tbr': tbr,
'ext': ext,
'fps': float_or_none(last_info.get('FRAME-RATE')),
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
return self._parse_mpd_formats(
- compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
+ compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
+ formats_dict=formats_dict, mpd_url=mpd_url)
- def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
+ def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
"""
Parse formats from MPD manifest.
References:
f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
'url': base_url,
+ 'manifest_url': mpd_url,
'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')),
'height': int_or_none(representation_attrib.get('height')),
if 'total_number' not in representation_ms_info and 'segment_duration':
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
- representation_ms_info['segment_urls'] = [
- media_template % {
- 'Number': segment_number,
- 'Bandwidth': representation_attrib.get('bandwidth'),
- }
- for segment_number in range(
- representation_ms_info['start_number'],
- representation_ms_info['total_number'] + representation_ms_info['start_number'])]
representation_ms_info['fragments'] = [{
'url': media_template % {
'Number': segment_number,
# $Number*$ or $Time$ in media template with S list available
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
- representation_ms_info['segment_urls'] = []
representation_ms_info['fragments'] = []
segment_time = 0
segment_d = None
'Bandwidth': representation_attrib.get('bandwidth'),
'Number': segment_number,
}
- representation_ms_info['segment_urls'].append(segment_url)
representation_ms_info['fragments'].append({
'url': segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
})
representation_ms_info['fragments'] = fragments
- if 'segment_urls' in representation_ms_info:
+ # NB: MPD manifest may contain direct URLs to unfragmented media.
+ # No fragments key is present in this case.
+ if 'fragments' in representation_ms_info:
f.update({
- 'segment_urls': representation_ms_info['segment_urls'],
'fragments': [],
'protocol': 'http_dash_segments',
})
if 'initialization_url' in representation_ms_info:
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
- f.update({
- 'initialization_url': initialization_url,
- })
if not f.get('url'):
f['url'] = initialization_url
f['fragments'].append({'url': initialization_url})
for track_tag in re.findall(r'<track[^>]+>', media_content):
track_attributes = extract_attributes(track_tag)
kind = track_attributes.get('kind')
- if not kind or kind == 'subtitles':
+ if not kind or kind in ('subtitles', 'captions'):
src = track_attributes.get('src')
if not src:
continue
media_info['subtitles'].setdefault(lang, []).append({
'url': absolute_url(src),
})
- if media_info['formats']:
+ if media_info['formats'] or media_info['subtitles']:
entries.append(media_info)
return entries
def _extract_akamai_formats(self, manifest_url, video_id):
formats = []
+ hdcore_sign = 'hdcore=3.7.0'
f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
- formats.extend(self._extract_f4m_formats(
- update_url_query(f4m_url, {'hdcore': '3.7.0'}),
- video_id, f4m_id='hds', fatal=False))
+ if 'hdcore=' not in f4m_url:
+ f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
+ f4m_formats = self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False)
+ for entry in f4m_formats:
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.extend(f4m_formats)
m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',