X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=be2b6ff6678d6accf8ddbeafbc1436f3913b1061;hb=4080530624eda994d535e1a01c38ddd6d9aa3805;hp=57793537b07573e691c8d664f0a06f95d99a5fde;hpb=4315f74fa8e97ca1fdd1fe919f777b3942da2028;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 57793537b..be2b6ff66 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -45,6 +45,7 @@ from ..utils import ( unescapeHTML, unified_strdate, url_basename, + xpath_element, xpath_text, xpath_with_ns, determine_protocol, @@ -52,6 +53,7 @@ from ..utils import ( mimetype2ext, update_Request, update_url_query, + parse_m3u8_attributes, ) @@ -747,10 +749,12 @@ class InfoExtractor(object): return self._og_search_property('url', html, **kargs) def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): + if not isinstance(name, (list, tuple)): + name = [name] if display_name is None: - display_name = name + display_name = name[0] return self._html_search_regex( - self._meta_regex(name), + [self._meta_regex(n) for n in name], html, display_name, fatal=fatal, group='content', **kwargs) def _dc_search_uploader(self, html): @@ -874,7 +878,11 @@ class InfoExtractor(object): f['ext'] = determine_ext(f['url']) if isinstance(field_preference, (list, tuple)): - return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference) + return tuple( + f.get(field) + if f.get(field) is not None + else ('' if field == 'format_id' else -1) + for field in field_preference) preference = f.get('preference') if preference is None: @@ -1030,7 +1038,7 @@ class InfoExtractor(object): if base_url: base_url = base_url.strip() - bootstrap_info = xpath_text( + bootstrap_info = xpath_element( manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], 'bootstrap info', default=None) @@ -1085,7 +1093,7 @@ class InfoExtractor(object): formats.append({ 'format_id': format_id, 'url': manifest_url, - 'ext': 'flv' if bootstrap_info else None, + 'ext': 'flv' if bootstrap_info is not None else None, 'tbr': tbr, 'width': width, 'height': height, @@ -1149,23 +1157,11 @@ class InfoExtractor(object): }] last_info = None last_media = None - kv_rex = re.compile( - r'(?P[a-zA-Z_-]+)=(?P"[^"]+"|[^",]+)(?:,|$)') for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): - last_info = {} - for m in kv_rex.finditer(line): - v = m.group('val') - if v.startswith('"'): - v = v[1:-1] - last_info[m.group('key')] = v + last_info = parse_m3u8_attributes(line) elif line.startswith('#EXT-X-MEDIA:'): - last_media = {} - for m in kv_rex.finditer(line): - v = m.group('val') - if v.startswith('"'): - v = v[1:-1] - last_media[m.group('key')] = v + last_media = parse_m3u8_attributes(line) elif line.startswith('#') or not line.strip(): continue else: @@ -1733,6 +1729,13 @@ class InfoExtractor(object): def _mark_watched(self, *args, **kwargs): raise NotImplementedError('This method must be implemented by subclasses') + def geo_verification_headers(self): + headers = {} + geo_verification_proxy = self._downloader.params.get('geo_verification_proxy') + if geo_verification_proxy: + headers['Ytdl-request-proxy'] = geo_verification_proxy + return headers + class SearchInfoExtractor(InfoExtractor): """