X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Finstagram.py;h=196407b063a9393b94c759be6c8080de9a494277;hb=02af6ec70784d168fd27f379a1bf1d0f228f91b3;hp=8f7f232bea720ce0cfbf3c8e6aa9b38bddb93658;hpb=9a700deea47b2514ef07d4ab7a0c21c7942b8b26;p=youtube-dl diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 8f7f232be..196407b06 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor): 'uploader': 'Naomi Leonor Phan-Quang', 'like_count': int, 'comment_count': int, + 'comments': list, }, }, { # missing description @@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor): 'uploader': 'Britney Spears', 'like_count': int, 'comment_count': int, + 'comments': list, }, 'params': { 'skip_download': True, @@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor): webpage = self._download_webpage(url, video_id) (video_url, description, thumbnail, timestamp, uploader, - uploader_id, like_count, comment_count) = [None] * 8 + uploader_id, like_count, comment_count, height, width) = [None] * 10 shared_data = self._parse_json( self._search_regex( @@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor): shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) if media: video_url = media.get('video_url') + height = int_or_none(media.get('dimensions', {}).get('height')) + width = int_or_none(media.get('dimensions', {}).get('width')) description = media.get('caption') thumbnail = media.get('display_src') timestamp = int_or_none(media.get('date')) @@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor): uploader_id = media.get('owner', {}).get('username') like_count = int_or_none(media.get('likes', {}).get('count')) comment_count = int_or_none(media.get('comments', {}).get('count')) + comments = [{ + 'author': comment.get('user', {}).get('username'), + 'author_id': comment.get('user', {}).get('id'), + 'id': comment.get('id'), + 'text': comment.get('text'), + 'timestamp': int_or_none(comment.get('created_at')), + } for comment in media.get( + 'comments', {}).get('nodes', []) if comment.get('text')] if not video_url: video_url = self._og_search_video_url(webpage, secure=False) + formats = [{ + 'url': video_url, + 'width': width, + 'height': height, + }] + if not uploader_id: uploader_id = self._search_regex( r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', @@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, + 'formats': formats, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, 'description': description, @@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor): 'uploader': uploader, 'like_count': like_count, 'comment_count': comment_count, + 'comments': comments, }