if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._downloader.params.get('verbose', False):
- self._downloader.to_stdout(
+ self._downloader.to_screen(
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
% (self._x_forwarded_for_ip, country_code.upper()))
video_info['title'] = video_title
return video_info
- def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
- urlrs = orderedSet(
+ def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
+ urls = orderedSet(
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches)
return self.playlist_result(
- urlrs, playlist_id=video_id, playlist_title=video_title)
+ urls, playlist_id=playlist_id, playlist_title=playlist_title)
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
def _family_friendly_search(self, html):
# See http://schema.org/VideoObject
- family_friendly = self._html_search_meta('isFamilyFriendly', html)
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', html, default=None)
if not family_friendly:
return None
item_type = e.get('@type')
if expected_type is not None and expected_type != item_type:
return info
- if item_type == 'TVEpisode':
+ if item_type in ('TVEpisode', 'Episode'):
info.update({
'episode': unescapeHTML(e.get('name')),
'episode_number': int_or_none(e.get('episodeNumber')),
'description': unescapeHTML(e.get('description')),
})
part_of_season = e.get('partOfSeason')
- if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
+ if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
- if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
+ if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
elif item_type == 'Article':
info.update({
})
elif item_type == 'VideoObject':
extract_video_object(e)
- elif item_type == 'WebPage':
- video = e.get('video')
- if isinstance(video, dict) and video.get('@type') == 'VideoObject':
- extract_video_object(video)
+ continue
+ video = e.get('video')
+ if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ extract_video_object(video)
break
return dict((k, v) for k, v in info.items() if v is not None)
ms_info['timescale'] = int(timescale)
segment_duration = source.get('duration')
if segment_duration:
- ms_info['segment_duration'] = int(segment_duration)
+ ms_info['segment_duration'] = float(segment_duration)
def extract_Initialization(source):
initialization = source.find(_add_ns('Initialization'))
'Bandwidth': bandwidth,
}
+ def location_key(location):
+ return 'url' if re.match(r'^https?://', location) else 'path'
+
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ media_location_key = location_key(media_template)
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
# can't be used at the same time
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
- 'url': media_template % {
+ media_location_key: media_template % {
'Number': segment_number,
'Bandwidth': bandwidth,
},
'Number': segment_number,
}
representation_ms_info['fragments'].append({
- 'url': segment_url,
+ media_location_key: segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
})
for s in representation_ms_info['s']:
duration = float_or_none(s['d'], timescale)
for r in range(s.get('r', 0) + 1):
+ segment_uri = representation_ms_info['segment_urls'][segment_index]
fragments.append({
- 'url': representation_ms_info['segment_urls'][segment_index],
+ location_key(segment_uri): segment_uri,
'duration': duration,
})
segment_index += 1
# No fragments key is present in this case.
if 'fragments' in representation_ms_info:
f.update({
+ 'fragment_base_url': base_url,
'fragments': [],
'protocol': 'http_dash_segments',
})
initialization_url = representation_ms_info['initialization_url']
if not f.get('url'):
f['url'] = initialization_url
- f['fragments'].append({'url': initialization_url})
+ f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
- for fragment in f['fragments']:
- fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats
return f
return {}
- def _media_formats(src, cur_media_type):
+ def _media_formats(src, cur_media_type, type_info={}):
full_url = absolute_url(src)
- ext = determine_ext(full_url)
+ ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8':
is_plain_url = False
formats = self._extract_m3u8_formats(
return is_plain_url, formats
entries = []
+ # amp-video and amp-audio are very similar to their HTML5 counterparts
+ # so we wll include them right here (see
+ # https://www.ampproject.org/docs/reference/components/amp-video)
media_tags = [(media_tag, media_type, '')
for media_tag, media_type
- in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+ in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
# http://www.porntrex.com/maps/videositemap.xml).
- r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
+ r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags:
media_info = {
'formats': [],
src = source_attributes.get('src')
if not src:
continue
- is_plain_url, formats = _media_formats(src, media_type)
+ f = parse_content_type(source_attributes.get('type'))
+ is_plain_url, formats = _media_formats(src, media_type, f)
if is_plain_url:
- f = parse_content_type(source_attributes.get('type'))
f.update(formats[0])
media_info['formats'].append(f)
else:
tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
+ if not isinstance(track, dict):
+ continue
if track.get('kind') != 'captions':
continue
track_url = urljoin(base_url, track.get('file'))
urls = []
formats = []
for source in jwplayer_sources_data:
+ if not isinstance(source, dict):
+ continue
source_url = self._proto_relative_url(source.get('file'))
if not source_url:
continue