X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=9c40d56a9d71a3e2c1754e93d1052e93ba98532f;hb=e7d8e98a9ffdec2502bedb21a4f043df6da225a5;hp=65835d257197361a7ea3e5159b37de6f03ec62ad;hpb=ea99110d247d3c27f1cc2e2cb8c6f73c6405c383;p=youtube-dl
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 65835d257..9c40d56a9 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -152,6 +152,7 @@ class InfoExtractor(object):
description: Full video description.
uploader: Full name of the video uploader.
creator: The main artist who created the video.
+ release_date: The date (YYYYMMDD) when the video was released.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
@@ -510,6 +511,18 @@ class InfoExtractor(object):
"""Report attempt to log in."""
self.to_screen('Logging in')
+ @staticmethod
+ def raise_login_required(msg='This video is only available for registered users'):
+ raise ExtractorError(
+ '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
+ expected=True)
+
+ @staticmethod
+ def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
+ raise ExtractorError(
+ '%s. You might want to use --proxy to workaround.' % msg,
+ expected=True)
+
# Methods for following #608
@staticmethod
def url_result(url, ie=None, video_id=None, video_title=None):
@@ -725,9 +738,10 @@ class InfoExtractor(object):
@staticmethod
def _hidden_inputs(html):
+ html = re.sub(r'', '', html)
hidden_inputs = {}
- for input in re.findall(r']+)>', html):
- if not re.search(r'type=(["\'])hidden\1', input):
+ for input in re.findall(r'(?i)]+)>', html):
+ if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
continue
name = re.search(r'name=(["\'])(?P.+?)\1', input)
if not name:
@@ -740,7 +754,7 @@ class InfoExtractor(object):
def _form_hidden_inputs(self, form_id, html):
form = self._search_regex(
- r'(?s)' % form_id,
+ r'(?is)' % form_id,
html, '%s form' % form_id, group='form')
return self._hidden_inputs(form)
@@ -1052,7 +1066,7 @@ class InfoExtractor(object):
return self._search_regex(
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
base = smil_url
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
b = meta.get('base') or meta.get('httpBase')
@@ -1070,7 +1084,7 @@ class InfoExtractor(object):
if not src:
continue
- bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
filesize = int_or_none(video.get('size') or video.get('fileSize'))
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
@@ -1091,6 +1105,12 @@ class InfoExtractor(object):
'width': width,
'height': height,
})
+ if transform_rtmp_url:
+ streamer, src = transform_rtmp_url(streamer, src)
+ formats[-1].update({
+ 'url': streamer,
+ 'play_path': src,
+ })
continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
@@ -1129,7 +1149,7 @@ class InfoExtractor(object):
return formats
- def _parse_smil_subtitles(self, smil, namespace=None):
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
subtitles = {}
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
src = textstream.get('src')
@@ -1138,9 +1158,14 @@ class InfoExtractor(object):
ext = textstream.get('ext') or determine_ext(src)
if not ext:
type_ = textstream.get('type')
- if type_ == 'text/srt':
- ext = 'srt'
- lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
+ SUBTITLES_TYPES = {
+ 'text/vtt': 'vtt',
+ 'text/srt': 'srt',
+ 'application/smptett+xml': 'tt',
+ }
+ if type_ in SUBTITLES_TYPES:
+ ext = SUBTITLES_TYPES[type_]
+ lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
subtitles.setdefault(lang, []).append({
'url': src,
'ext': ext,
@@ -1268,6 +1293,23 @@ class InfoExtractor(object):
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
+ @staticmethod
+ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+ """ Merge subtitle items for one language. Items with duplicated URLs
+ will be dropped. """
+ list1_urls = set([item['url'] for item in subtitle_list1])
+ ret = list(subtitle_list1)
+ ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
+ return ret
+
+ @classmethod
+ def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
+ """ Merge two subtitle dictionaries, language by language. """
+ ret = dict(subtitle_dict1)
+ for lang in subtitle_dict2:
+ ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+ return ret
+
def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) or
self._downloader.params.get('listsubtitles')):