X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Finstagram.py;h=98f408c18650cf8393869432a861a3486575b533;hb=ec85ded83cbfa652ba94cb080aab52d8b270212a;hp=3cbe77ad80f2fc9a03c738745524d5dac98c9d37;hpb=c23533a100eb0a383b60ed2e5990602e9318fe4b;p=youtube-dl diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 3cbe77ad8..98f408c18 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -8,6 +8,7 @@ from ..utils import ( int_or_none, limit_length, lowercase_escape, + try_get, ) @@ -19,18 +20,32 @@ class InstagramIE(InfoExtractor): 'info_dict': { 'id': 'aye83DjauH', 'ext': 'mp4', - 'uploader_id': 'naomipq', 'title': 'Video by naomipq', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', - } + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1371748545, + 'upload_date': '20130620', + 'uploader_id': 'naomipq', + 'uploader': 'Naomi Leonor Phan-Quang', + 'like_count': int, + 'comment_count': int, + 'comments': list, + }, }, { # missing description 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', 'info_dict': { 'id': 'BA-pQFBG8HZ', 'ext': 'mp4', - 'uploader_id': 'britneyspears', 'title': 'Video by britneyspears', + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1453760977, + 'upload_date': '20160125', + 'uploader_id': 'britneyspears', + 'uploader': 'Britney Spears', + 'like_count': int, + 'comment_count': int, + 'comments': list, }, 'params': { 'skip_download': True, @@ -67,21 +82,74 @@ class InstagramIE(InfoExtractor): url = mobj.group('url') webpage = self._download_webpage(url, video_id) - uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', - webpage, 'uploader id', fatal=False) - desc = self._search_regex( - r'"caption":"(.+?)"', webpage, 'description', default=None) - if desc is not None: - desc = lowercase_escape(desc) + + (video_url, description, thumbnail, timestamp, uploader, + uploader_id, like_count, comment_count, height, width) = [None] * 10 + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + webpage, 'shared data', default='{}'), + video_id, fatal=False) + if shared_data: + media = try_get( + shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) + if media: + video_url = media.get('video_url') + height = int_or_none(media.get('dimensions', {}).get('height')) + width = int_or_none(media.get('dimensions', {}).get('width')) + description = media.get('caption') + thumbnail = media.get('display_src') + timestamp = int_or_none(media.get('date')) + uploader = media.get('owner', {}).get('full_name') + uploader_id = media.get('owner', {}).get('username') + like_count = int_or_none(media.get('likes', {}).get('count')) + comment_count = int_or_none(media.get('comments', {}).get('count')) + comments = [{ + 'author': comment.get('user', {}).get('username'), + 'author_id': comment.get('user', {}).get('id'), + 'id': comment.get('id'), + 'text': comment.get('text'), + 'timestamp': int_or_none(comment.get('created_at')), + } for comment in media.get( + 'comments', {}).get('nodes', []) if comment.get('text')] + + if not video_url: + video_url = self._og_search_video_url(webpage, secure=False) + + formats = [{ + 'url': video_url, + 'width': width, + 'height': height, + }] + + if not uploader_id: + uploader_id = self._search_regex( + r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', + webpage, 'uploader id', fatal=False) + + if not description: + description = self._search_regex( + r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) + if description is not None: + description = lowercase_escape(description) + + if not thumbnail: + thumbnail = self._og_search_thumbnail(webpage) return { 'id': video_id, - 'url': self._og_search_video_url(webpage, secure=False), + 'formats': formats, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, 'uploader_id': uploader_id, - 'description': desc, + 'uploader': uploader, + 'like_count': like_count, + 'comment_count': comment_count, + 'comments': comments, } @@ -101,7 +169,7 @@ class InstagramUserIE(InfoExtractor): 'id': '614605558512799803_462752227', 'ext': 'mp4', 'title': '#Porsche Intelligent Performance.', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'Porsche', 'uploader_id': 'porsche', 'timestamp': 1387486713,