)
from ..utils import (
clean_html,
+ encode_dict,
ExtractorError,
float_or_none,
get_element_by_attribute,
'hl': 'en_US',
}
- # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
- # chokes on unicode
- login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
- login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
+ login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii')
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
login_results = self._download_webpage(
'TrustDevice': 'on',
})
- tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
- tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
+ tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii')
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
tfa_results = self._download_webpage(
if 'ratebypass' not in url:
url += '&ratebypass=yes'
- width = None
- height = None
- size_str = url_data.get('size', [''])[0]
- if size_str.count('x') == 1:
- width, height = [int_or_none(x) for x in size_str.split('x')]
-
- format_url = {
+ # Some itags are not included in DASH manifest thus corresponding formats will
+ # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
+ # Trying to extract metadata from url_encoded_fmt_stream_map entry.
+ mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
+ width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
+ dct = {
'format_id': format_id,
'url': url,
'player_url': player_url,
- # As of this writing these are only defined for DASH formats:
'filesize': int_or_none(url_data.get('clen', [None])[0]),
- 'tbr': float_or_none(url_data.get('bitrate', [None])[0], scale=1024),
+ 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
'width': width,
'height': height,
'fps': int_or_none(url_data.get('fps', [None])[0]),
+ 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
}
-
- # drop Nones so they do not overwrite the defaults from self._formats
- format_url = dict((k, v) for k, v in format_url.items() if v is not None)
-
- format_full = self._formats.get(format_id, {}).copy()
- format_full.update(format_url)
-
- formats.append(format_full)
-
+ type_ = url_data.get('type', [None])[0]
+ if type_:
+ type_split = type_.split(';')
+ kind_ext = type_split[0].split('/')
+ if len(kind_ext) == 2:
+ kind, ext = kind_ext
+ dct['ext'] = ext
+ if kind in ('audio', 'video'):
+ codecs = None
+ for mobj in re.finditer(
+ r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
+ if mobj.group('key') == 'codecs':
+ codecs = mobj.group('val')
+ break
+ if codecs:
+ codecs = codecs.split(',')
+ if len(codecs) == 2:
+ acodec, vcodec = codecs[0], codecs[1]
+ else:
+ acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
+ dct.update({
+ 'acodec': acodec,
+ 'vcodec': vcodec,
+ })
+ if format_id in self._formats:
+ dct.update(self._formats[format_id])
+ formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
url_map = self._extract_from_m3u8(manifest_url, video_id)
channel_page = self._download_webpage(
url + '?view=57', channel_id,
'Downloading channel page', fatal=False)
- channel_playlist_id = self._html_search_meta(
- 'channelId', channel_page, 'channel id', default=None)
- if not channel_playlist_id:
- channel_playlist_id = self._search_regex(
- r'data-channel-external-id="([^"]+)"',
- channel_page, 'channel id', default=None)
+ if channel_page is False:
+ channel_playlist_id = False
+ else:
+ channel_playlist_id = self._html_search_meta(
+ 'channelId', channel_page, 'channel id', default=None)
+ if not channel_playlist_id:
+ channel_playlist_id = self._search_regex(
+ r'data-channel-external-id="([^"]+)"',
+ channel_page, 'channel id', default=None)
if channel_playlist_id and channel_playlist_id.startswith('UC'):
playlist_id = 'UU' + channel_playlist_id[2:]
return self.url_result(
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
IE_NAME = 'youtube:show'
_TESTS = [{
- 'url': 'http://www.youtube.com/show/airdisasters',
- 'playlist_mincount': 3,
+ 'url': 'https://www.youtube.com/show/airdisasters',
+ 'playlist_mincount': 5,
'info_dict': {
'id': 'airdisasters',
'title': 'Air Disasters',
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(
- url, playlist_id, 'Downloading show webpage')
+ 'https://www.youtube.com/show/%s/playlists' % playlist_id, playlist_id, 'Downloading show webpage')
# There's one playlist for each season of the show
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
annotation_id=annotation_[^&]+|
x-yt-cl=[0-9]+|
hl=[^&]*|
+ t=[0-9]+
)?
|
attribution_link\?a=[^&]+
}, {
'url': 'https://www.youtube.com/watch?hl=en-GB',
'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?t=2372',
+ 'only_matching': True,
}]
def _real_extract(self, url):