X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftwitter.py;h=ae1212d19badd383e0f24b2bc1a6b5b5ca0a3a1b;hb=5d6c3d6a665ed5de89bb3b056bb051b043400897;hp=1472f22a71bec6a22e133529c6a31642a9d220d1;hpb=c88aec845a680ef9404b637b3dbcf706dcf00b68;p=youtube-dl diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 1472f22a7..ae1212d19 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -9,6 +9,8 @@ from ..utils import ( float_or_none, xpath_text, remove_end, + int_or_none, + ExtractorError, ) @@ -37,6 +39,19 @@ class TwitterCardIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg', 'duration': 80.155, }, + }, + { + 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', + 'md5': 'b6f35e8b08a0bec6c8af77a2f4b3a814', + 'info_dict': { + 'id': 'dq4Oj5quskI', + 'ext': 'mp4', + 'title': 'Ubuntu 11.10 Overview', + 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/', + 'upload_date': '20111013', + 'uploader': 'OMG! Ubuntu!', + 'uploader_id': 'omgubuntu', + }, } ] @@ -56,6 +71,12 @@ class TwitterCardIE(InfoExtractor): request.add_header('User-Agent', user_agent) webpage = self._download_webpage(request, video_id) + youtube_url = self._html_search_regex( + r']+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', + webpage, 'youtube iframe', default=None) + if youtube_url: + return self.url_result(youtube_url, 'Youtube') + config = self._parse_json(self._html_search_regex( r'data-player-config="([^"]+)"', webpage, 'data player config'), video_id) @@ -101,7 +122,7 @@ class TwitterIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P[^/]+)/status/(?P\d+)' _TEMPLATE_URL = 'https://twitter.com/%s/status/%s' - _TEST = { + _TESTS = [{ 'url': 'https://twitter.com/freethenipple/status/643211948184596480', 'md5': '31cd83a116fc41f99ae3d909d4caf6a0', 'info_dict': { @@ -114,7 +135,19 @@ class TwitterIE(InfoExtractor): 'uploader': 'FREE THE NIPPLE', 'uploader_id': 'freethenipple', }, - } + }, { + 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', + 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42', + 'info_dict': { + 'id': '657991469417025536', + 'ext': 'mp4', + 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai', + 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"', + 'thumbnail': 're:^https?://.*\.png', + 'uploader': 'Gifs', + 'uploader_id': 'giphz', + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -131,17 +164,41 @@ class TwitterIE(InfoExtractor): mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title) title, short_url = mobj.groups() - card_id = self._search_regex( - r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url') - card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id - - return { - '_type': 'url_transparent', - 'ie_key': 'TwitterCard', + info = { 'uploader_id': user_id, 'uploader': username, - 'url': card_url, 'webpage_url': url, 'description': '%s on Twitter: "%s %s"' % (username, title, short_url), 'title': username + ' - ' + title, } + + card_id = self._search_regex( + r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None) + if card_id: + card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id + info.update({ + '_type': 'url_transparent', + 'ie_key': 'TwitterCard', + 'url': card_url, + }) + return info + + mobj = re.search(r'''(?x) + ]+class="animated-gif"[^>]+ + (?:data-height="(?P\d+)")?[^>]+ + (?:data-width="(?P\d+)")?[^>]+ + (?:poster="(?P[^"]+)")?[^>]*>\s* + ]+video-src="(?P[^"]+)" + ''', webpage) + + if mobj: + info.update({ + 'id': twid, + 'url': mobj.group('url'), + 'height': int_or_none(mobj.group('height')), + 'width': int_or_none(mobj.group('width')), + 'thumbnail': mobj.group('poster'), + }) + return info + + raise ExtractorError('There\'s not video in this tweet.')