X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=07d58afe7bb2a89a70a61dba447723e1518cc873;hb=39e1c4f08c4cfca81943e73523bd66b890f5aff2;hp=67f49f51b52c85c488851903e309c38c217f23dc;hpb=d16b3c6677f1f699635892876f4566962094221d;p=youtube-dl
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 67f49f51b..07d58afe7 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -662,6 +662,24 @@ class InfoExtractor(object):
else:
return res
+ def _get_netrc_login_info(self, netrc_machine=None):
+ username = None
+ password = None
+ netrc_machine = netrc_machine or self._NETRC_MACHINE
+
+ if self._downloader.params.get('usenetrc', False):
+ try:
+ info = netrc.netrc().authenticators(netrc_machine)
+ if info is not None:
+ username = info[0]
+ password = info[2]
+ else:
+ raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine)
+ except (IOError, netrc.NetrcParseError) as err:
+ self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
+
+ return (username, password)
+
def _get_login_info(self):
"""
Get the login info as (username, password)
@@ -679,16 +697,8 @@ class InfoExtractor(object):
if downloader_params.get('username') is not None:
username = downloader_params['username']
password = downloader_params['password']
- elif downloader_params.get('usenetrc', False):
- try:
- info = netrc.netrc().authenticators(self._NETRC_MACHINE)
- if info is not None:
- username = info[0]
- password = info[2]
- else:
- raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
- except (IOError, netrc.NetrcParseError) as err:
- self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
+ else:
+ username, password = self._get_netrc_login_info()
return (username, password)
@@ -816,11 +826,14 @@ class InfoExtractor(object):
json_ld = self._search_regex(
r'(?s)',
html, 'JSON-LD', group='json_ld', **kwargs)
+ default = kwargs.get('default', NO_DEFAULT)
if not json_ld:
- return {}
- return self._json_ld(
- json_ld, video_id, fatal=kwargs.get('fatal', True),
- expected_type=expected_type)
+ return default if default is not NO_DEFAULT else {}
+ # JSON-LD may be malformed and thus `fatal` should be respected.
+ # At the same time `default` may be passed that assumes `fatal=False`
+ # for _search_regex. Let's simulate the same behavior here as well.
+ fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
+ return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
if isinstance(json_ld, compat_str):
@@ -1140,7 +1153,7 @@ class InfoExtractor(object):
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
- 'preference': -100,
+ 'preference': preference - 100 if preference else -100,
'resolution': 'multiple',
'format_note': 'Quality selection URL',
}
@@ -1682,7 +1695,7 @@ class InfoExtractor(object):
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
- def _parse_html5_media_entries(self, base_url, webpage):
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
@@ -1697,6 +1710,21 @@ class InfoExtractor(object):
return f
return {}
+ def _media_formats(src, cur_media_type):
+ full_url = absolute_url(src)
+ if determine_ext(full_url) == 'm3u8':
+ is_plain_url = False
+ formats = self._extract_m3u8_formats(
+ full_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=m3u8_id)
+ else:
+ is_plain_url = True
+ formats = [{
+ 'url': full_url,
+ 'vcodec': 'none' if cur_media_type == 'audio' else None,
+ }]
+ return is_plain_url, formats
+
entries = []
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)(?P=tag)>', webpage):
media_info = {
@@ -1706,10 +1734,8 @@ class InfoExtractor(object):
media_attributes = extract_attributes(media_tag)
src = media_attributes.get('src')
if src:
- media_info['formats'].append({
- 'url': absolute_url(src),
- 'vcodec': 'none' if media_type == 'audio' else None,
- })
+ _, formats = _media_formats(src)
+ media_info['formats'].extend(formats)
media_info['thumbnail'] = media_attributes.get('poster')
if media_content:
for source_tag in re.findall(r'