return self._html_search_meta('twitter:player', html,
'twitter card player')
- def _search_json_ld(self, html, video_id, fatal=True):
+ def _search_json_ld(self, html, video_id, **kwargs):
json_ld = self._search_regex(
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
- html, 'JSON-LD', fatal=fatal, group='json_ld')
+ html, 'JSON-LD', group='json_ld', **kwargs)
if not json_ld:
return {}
- return self._json_ld(json_ld, video_id, fatal=fatal)
+ return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
def _json_ld(self, json_ld, video_id, fatal=True):
if isinstance(json_ld, compat_str):
if not formats:
raise ExtractorError('No video formats found')
+ for f in formats:
+ # Automatically determine tbr when missing based on abr and vbr (improves
+ # formats sorting in some cases)
+ if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
+ f['tbr'] = f['abr'] + f['vbr']
+
def _formats_key(f):
# TODO remove the following workaround
from ..utils import determine_ext
return []
m3u8_doc, urlh = res
m3u8_url = urlh.geturl()
+ # A Media Playlist Tag MUST NOT appear in a Master Playlist
+ # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
+ # The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
+ # https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
+ if '#EXT-X-TARGETDURATION' in m3u8_doc:
+ return [{
+ 'url': m3u8_url,
+ 'format_id': m3u8_id,
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }]
last_info = None
last_media = None
kv_rex = re.compile(
# TODO: looks like video codec is not always necessarily goes first
va_codecs = codecs.split(',')
if va_codecs[0]:
- f['vcodec'] = va_codecs[0].partition('.')[0]
+ f['vcodec'] = va_codecs[0]
if len(va_codecs) > 1 and va_codecs[1]:
- f['acodec'] = va_codecs[1].partition('.')[0]
+ f['acodec'] = va_codecs[1]
resolution = last_info.get('RESOLUTION')
if resolution:
width_str, height_str = resolution.split('x')
formats = []
rtmp_count = 0
http_count = 0
+ m3u8_count = 0
videos = smil.findall(self._xpath_ns('.//video', namespace))
for video in videos:
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
if proto == 'm3u8' or src_ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False))
+ m3u8_formats = self._extract_m3u8_formats(
+ src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+ if len(m3u8_formats) == 1:
+ m3u8_count += 1
+ m3u8_formats[0].update({
+ 'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ })
+ formats.extend(m3u8_formats)
continue
if src_ext == 'f4m':
})
return entries
+ def _download_dash_manifest(self, dash_manifest_url, video_id, fatal=True):
+ return self._download_xml(
+ dash_manifest_url, video_id,
+ note='Downloading DASH manifest',
+ errnote='Could not download DASH manifest',
+ fatal=fatal)
+
+ def _extract_dash_manifest_formats(self, dash_manifest_url, video_id, fatal=True, namespace=None, formats_dict={}):
+ dash_doc = self._download_dash_manifest(dash_manifest_url, video_id, fatal)
+ if dash_doc is False:
+ return []
+
+ return self._parse_dash_manifest(
+ dash_doc, namespace=namespace, formats_dict=formats_dict)
+
+ def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}):
+ def _add_ns(path):
+ return self._xpath_ns(path, namespace)
+
+ formats = []
+ for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')):
+ mime_type = a.attrib.get('mimeType')
+ for r in a.findall(_add_ns('Representation')):
+ mime_type = r.attrib.get('mimeType') or mime_type
+ url_el = r.find(_add_ns('BaseURL'))
+ if mime_type == 'text/vtt':
+ # TODO implement WebVTT downloading
+ pass
+ elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+ segment_list = r.find(_add_ns('SegmentList'))
+ format_id = r.attrib['id']
+ video_url = url_el.text if url_el is not None else None
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+ f = {
+ 'format_id': format_id,
+ 'url': video_url,
+ 'width': int_or_none(r.attrib.get('width')),
+ 'height': int_or_none(r.attrib.get('height')),
+ 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+ 'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+ 'filesize': filesize,
+ 'fps': int_or_none(r.attrib.get('frameRate')),
+ }
+ if segment_list is not None:
+ initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL']
+ f.update({
+ 'initialization_url': initialization_url,
+ 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))],
+ 'protocol': 'http_dash_segments',
+ })
+ if not f.get('url'):
+ f['url'] = initialization_url
+ try:
+ existing_format = next(
+ fo for fo in formats
+ if fo['format_id'] == format_id)
+ except StopIteration:
+ full_info = formats_dict.get(format_id, {}).copy()
+ full_info.update(f)
+ codecs = r.attrib.get('codecs')
+ if codecs:
+ if mime_type.startswith('video/'):
+ vcodec, acodec = codecs, 'none'
+ else: # mime_type.startswith('audio/')
+ vcodec, acodec = 'none', codecs
+
+ full_info.update({
+ 'vcodec': vcodec,
+ 'acodec': acodec,
+ })
+ formats.append(full_info)
+ else:
+ existing_format.update(f)
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+ return formats
+
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()