X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=9c40d56a9d71a3e2c1754e93d1052e93ba98532f;hb=e7d8e98a9ffdec2502bedb21a4f043df6da225a5;hp=65835d257197361a7ea3e5159b37de6f03ec62ad;hpb=ea99110d247d3c27f1cc2e2cb8c6f73c6405c383;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 65835d257..9c40d56a9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -152,6 +152,7 @@ class InfoExtractor(object): description: Full video description. uploader: Full name of the video uploader. creator: The main artist who created the video. + release_date: The date (YYYYMMDD) when the video was released. timestamp: UNIX timestamp of the moment the video became available. upload_date: Video upload date (YYYYMMDD). If not explicitly set, calculated from timestamp. @@ -510,6 +511,18 @@ class InfoExtractor(object): """Report attempt to log in.""" self.to_screen('Logging in') + @staticmethod + def raise_login_required(msg='This video is only available for registered users'): + raise ExtractorError( + '%s. Use --username and --password or --netrc to provide account credentials.' % msg, + expected=True) + + @staticmethod + def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'): + raise ExtractorError( + '%s. You might want to use --proxy to workaround.' % msg, + expected=True) + # Methods for following #608 @staticmethod def url_result(url, ie=None, video_id=None, video_title=None): @@ -725,9 +738,10 @@ class InfoExtractor(object): @staticmethod def _hidden_inputs(html): + html = re.sub(r'', '', html) hidden_inputs = {} - for input in re.findall(r']+)>', html): - if not re.search(r'type=(["\'])hidden\1', input): + for input in re.findall(r'(?i)]+)>', html): + if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): continue name = re.search(r'name=(["\'])(?P.+?)\1', input) if not name: @@ -740,7 +754,7 @@ class InfoExtractor(object): def _form_hidden_inputs(self, form_id, html): form = self._search_regex( - r'(?s)]+?id=(["\'])%s\1[^>]*>(?P
.+?)
' % form_id, + r'(?is)]+?id=(["\'])%s\1[^>]*>(?P
.+?)
' % form_id, html, '%s form' % form_id, group='form') return self._hidden_inputs(form) @@ -1052,7 +1066,7 @@ class InfoExtractor(object): return self._search_regex( r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None) - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): base = smil_url for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): b = meta.get('base') or meta.get('httpBase') @@ -1070,7 +1084,7 @@ class InfoExtractor(object): if not src: continue - bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) + bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) filesize = int_or_none(video.get('size') or video.get('fileSize')) width = int_or_none(video.get('width')) height = int_or_none(video.get('height')) @@ -1091,6 +1105,12 @@ class InfoExtractor(object): 'width': width, 'height': height, }) + if transform_rtmp_url: + streamer, src = transform_rtmp_url(streamer, src) + formats[-1].update({ + 'url': streamer, + 'play_path': src, + }) continue src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) @@ -1129,7 +1149,7 @@ class InfoExtractor(object): return formats - def _parse_smil_subtitles(self, smil, namespace=None): + def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): subtitles = {} for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): src = textstream.get('src') @@ -1138,9 +1158,14 @@ class InfoExtractor(object): ext = textstream.get('ext') or determine_ext(src) if not ext: type_ = textstream.get('type') - if type_ == 'text/srt': - ext = 'srt' - lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') + SUBTITLES_TYPES = { + 'text/vtt': 'vtt', + 'text/srt': 'srt', + 'application/smptett+xml': 'tt', + } + if type_ in SUBTITLES_TYPES: + ext = SUBTITLES_TYPES[type_] + lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang subtitles.setdefault(lang, []).append({ 'url': src, 'ext': ext, @@ -1268,6 +1293,23 @@ class InfoExtractor(object): def _get_subtitles(self, *args, **kwargs): raise NotImplementedError("This method must be implemented by subclasses") + @staticmethod + def _merge_subtitle_items(subtitle_list1, subtitle_list2): + """ Merge subtitle items for one language. Items with duplicated URLs + will be dropped. """ + list1_urls = set([item['url'] for item in subtitle_list1]) + ret = list(subtitle_list1) + ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls]) + return ret + + @classmethod + def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2): + """ Merge two subtitle dictionaries, language by language. """ + ret = dict(subtitle_dict1) + for lang in subtitle_dict2: + ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang]) + return ret + def extract_automatic_captions(self, *args, **kwargs): if (self._downloader.params.get('writeautomaticsub', False) or self._downloader.params.get('listsubtitles')):