X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=b7437af5aa769c98f629d1d1f458c4a69adc77bb;hb=912e0b7e46d795df3ec1866f9b0ff071cca8d550;hp=16ae4b98ffe09c97f604981bf6c2ce9dc1e44e03;hpb=11bed5827dace09b5483b159476ce9f8c29d6078;p=youtube-dl
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 16ae4b98f..b7437af5a 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -15,6 +15,7 @@ import xml.etree.ElementTree
from ..compat import (
compat_cookiejar,
compat_cookies,
+ compat_getpass,
compat_HTTPError,
compat_http_client,
compat_urllib_error,
@@ -610,7 +611,7 @@ class InfoExtractor(object):
return (username, password)
- def _get_tfa_info(self):
+ def _get_tfa_info(self, note='two-factor verification code'):
"""
Get the two-factor authentication info
TODO - asking the user will be required for sms/phone verify
@@ -624,7 +625,7 @@ class InfoExtractor(object):
if downloader_params.get('twofactor', None) is not None:
return downloader_params['twofactor']
- return None
+ return compat_getpass('Type %s and press [Return]: ' % note)
# Helper functions for extracting OpenGraph info
@staticmethod
@@ -724,16 +725,18 @@ class InfoExtractor(object):
@staticmethod
def _hidden_inputs(html):
- return dict([
- (input.group('name'), input.group('value')) for input in re.finditer(
- r'''(?x)
- ["\'])hidden(?P=q_hidden)\s+
- name=(?P["\'])(?P.+?)(?P=q_name)\s+
- (?:id=(?P["\']).+?(?P=q_id)\s+)?
- value=(?P["\'])(?P.*?)(?P=q_value)
- ''', html)
- ])
+ hidden_inputs = {}
+ for input in re.findall(r']+)>', html):
+ if not re.search(r'type=(["\'])hidden\1', input):
+ continue
+ name = re.search(r'name=(["\'])(?P.+?)\1', input)
+ if not name:
+ continue
+ value = re.search(r'value=(["\'])(?P.*?)\1', input)
+ if not value:
+ continue
+ hidden_inputs[name.group('value')] = value.group('value')
+ return hidden_inputs
def _form_hidden_inputs(self, form_id, html):
form = self._search_regex(
@@ -1049,7 +1052,7 @@ class InfoExtractor(object):
return self._search_regex(
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base = smil_url
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
b = meta.get('base') or meta.get('httpBase')
@@ -1088,6 +1091,12 @@ class InfoExtractor(object):
'width': width,
'height': height,
})
+ if transform_rtmp_url:
+ streamer, src = transform_rtmp_url(streamer, src)
+ formats[-1].update({
+ 'url': streamer,
+ 'play_path': src,
+ })
continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
@@ -1126,7 +1135,7 @@ class InfoExtractor(object):
return formats
- def _parse_smil_subtitles(self, smil, namespace=None):
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
subtitles = {}
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
src = textstream.get('src')
@@ -1135,9 +1144,14 @@ class InfoExtractor(object):
ext = textstream.get('ext') or determine_ext(src)
if not ext:
type_ = textstream.get('type')
- if type_ == 'text/srt':
- ext = 'srt'
- lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
+ SUBTITLES_TYPES = {
+ 'text/vtt': 'vtt',
+ 'text/srt': 'srt',
+ 'application/smptett+xml': 'tt',
+ }
+ if type_ in SUBTITLES_TYPES:
+ ext = SUBTITLES_TYPES[type_]
+ lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
subtitles.setdefault(lang, []).append({
'url': src,
'ext': ext,
@@ -1265,6 +1279,26 @@ class InfoExtractor(object):
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
+ @staticmethod
+ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+ """ Merge subtitle items for one language. Items with duplicated URLs
+ will be dropped. """
+ list1_urls = set([item['url'] for item in subtitle_list1])
+ ret = list(subtitle_list1)
+ ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
+ return ret
+
+ @classmethod
+ def _merge_subtitles(kls, subtitle_dict1, subtitle_dict2):
+ """ Merge two subtitle dictionaries, language by language. """
+ print(subtitle_dict1)
+ print(subtitle_dict2)
+ ret = dict(subtitle_dict1)
+ for lang in subtitle_dict2:
+ ret[lang] = kls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+ print(ret)
+ return ret
+
def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) or
self._downloader.params.get('listsubtitles')):