X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=248b30ffb329d3870e0249e4d775c3f969849ce8;hb=9d11a41fe4f1f70682640e8522565827047dbf89;hp=9424d5e2669a72e791a0ba0a0120de0bfec27fc8;hpb=5f263296eaa72ddca232d734a2625bcd85771908;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9424d5e26..248b30ffb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -27,6 +27,7 @@ from ..utils import ( get_element_by_id, get_element_by_attribute, ExtractorError, + RegexNotFoundError, unescapeHTML, unified_strdate, orderedSet, @@ -131,6 +132,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): ( (?:https?://|//)? # http(s):// or protocol-independent URL (optional) (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| + (?:www\.)?deturl\.com/www\.youtube\.com/| + (?:www\.)?pwnyoutube\.com| tube\.majestyc\.net/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls @@ -213,6 +216,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50}, '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50}, + + # RTMP (unnamed) + '_rtmp': {'protocol': 'rtmp'}, } IE_NAME = u'youtube' @@ -998,7 +1004,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'lang': lang, 'v': video_id, 'fmt': self._downloader.params.get('subtitlesformat', 'srt'), - 'name': l[0].encode('utf-8'), + 'name': unescapeHTML(l[0]).encode('utf-8'), }) url = u'http://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url @@ -1273,7 +1279,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() - video_url_list = [(None, video_info['conn'][0])] + video_url_list = [('_rtmp', video_info['conn'][0])] elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] if 'rtmpe%3Dyes' in encoded_url_map: @@ -1443,7 +1449,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if re.search(self._MORE_PAGES_INDICATOR, page) is None: break - playlist_title = self._og_search_title(page) + try: + playlist_title = self._og_search_title(page) + except RegexNotFoundError: + self.report_warning( + u'Playlist page is missing OpenGraph title, falling back ...', + playlist_id) + playlist_title = self._html_search_regex( + r'

(.*?)

', page, u'title') url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, playlist_title) @@ -1759,6 +1772,6 @@ class YoutubeTruncatedURLIE(InfoExtractor): u'Did you forget to quote the URL? Remember that & is a meta ' u'character in most shells, so you want to put the URL in quotes, ' u'like youtube-dl ' - u'\'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\'' - u' (or simply youtube-dl BaW_jenozKc ).', + u'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' + u' or simply youtube-dl BaW_jenozKc .', expected=True)