X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=f420b81482a6b0684cc44b42010b02769d869cc4;hb=6feb2d5e803dee49b2e4a8f3a7f33ca7f01f96b7;hp=7c50881c4453eaff4ac69776fcc2dc94feef8d31;hpb=63adb0cc617f136fb080e1df600360834d09b10c;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7c50881c4..f420b8148 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -224,6 +224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50}, @@ -440,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - r'signature=([a-zA-Z]+)', jscode, + r'signature=([$a-zA-Z]+)', jscode, u'Initial JS player signature function name') jsi = JSInterpreter(jscode) @@ -1386,13 +1387,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | p/ ) ( - (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} + (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} # Top tracks, they can also include dots |(?:MC)[\w\.]* ) .* | - ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) + ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) )""" _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _MORE_PAGES_INDICATOR = r'data-link-type="next"' @@ -1697,14 +1698,14 @@ class YoutubeSearchURLIE(InfoExtractor): webpage = self._download_webpage(url, query) result_code = self._search_regex( - r'(?s)
    ', webpage, u'result HTML') + r'(?s)
      ', webpage, u'result HTML') part_codes = re.findall( r'(?s)

      (.*?)

      ', result_code) entries = [] for part_code in part_codes: part_title = self._html_search_regex( - r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) + [r'(?s)title="([^"]+)"', r'>([^<]+)'], part_code, 'item title', fatal=False) part_url_snippet = self._html_search_regex( r'(?s)href="([^"]+)"', part_code, 'item URL') part_url = compat_urlparse.urljoin( @@ -1824,10 +1825,21 @@ class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| + (?:https?://)?[^/]+/watch\?(?: + feature=[a-z_]+| + annotation_id=annotation_[^&]+ + )?$| (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ ''' + _TESTS = [{ + 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', + 'only_matching': True, + }, { + 'url': 'http://www.youtube.com/watch?', + 'only_matching': True, + }] + def _real_extract(self, url): raise ExtractorError( u'Did you forget to quote the URL? Remember that & is a meta '