X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Finstagram.py;h=4667335e03faa2a9fac1433718f17eb714b79d64;hb=fd545fc6d1a5b489830e80c421f7e09264e0c80f;hp=8f7f232bea720ce0cfbf3c8e6aa9b38bddb93658;hpb=9a700deea47b2514ef07d4ab7a0c21c7942b8b26;p=youtube-dl diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 8f7f232be..4667335e0 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( get_element_by_attribute, int_or_none, @@ -22,13 +23,14 @@ class InstagramIE(InfoExtractor): 'ext': 'mp4', 'title': 'Video by naomipq', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1371748545, 'upload_date': '20130620', 'uploader_id': 'naomipq', 'uploader': 'Naomi Leonor Phan-Quang', 'like_count': int, 'comment_count': int, + 'comments': list, }, }, { # missing description @@ -37,17 +39,45 @@ class InstagramIE(InfoExtractor): 'id': 'BA-pQFBG8HZ', 'ext': 'mp4', 'title': 'Video by britneyspears', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1453760977, 'upload_date': '20160125', 'uploader_id': 'britneyspears', 'uploader': 'Britney Spears', 'like_count': int, 'comment_count': int, + 'comments': list, }, 'params': { 'skip_download': True, }, + }, { + # multi video post + 'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/', + 'playlist': [{ + 'info_dict': { + 'id': 'BQ0dSaohpPW', + 'ext': 'mp4', + 'title': 'Video 1', + }, + }, { + 'info_dict': { + 'id': 'BQ0dTpOhuHT', + 'ext': 'mp4', + 'title': 'Video 2', + }, + }, { + 'info_dict': { + 'id': 'BQ0dT7RBFeF', + 'ext': 'mp4', + 'title': 'Video 3', + }, + }], + 'info_dict': { + 'id': 'BQ0eAlwhDrw', + 'title': 'Post by instagram', + 'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957', + }, }, { 'url': 'https://instagram.com/p/-Cmh1cukG2/', 'only_matching': True, @@ -82,7 +112,8 @@ class InstagramIE(InfoExtractor): webpage = self._download_webpage(url, video_id) (video_url, description, thumbnail, timestamp, uploader, - uploader_id, like_count, comment_count) = [None] * 8 + uploader_id, like_count, comment_count, comments, height, + width) = [None] * 11 shared_data = self._parse_json( self._search_regex( @@ -91,9 +122,14 @@ class InstagramIE(InfoExtractor): video_id, fatal=False) if shared_data: media = try_get( - shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) + shared_data, + (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], + lambda x: x['entry_data']['PostPage'][0]['media']), + dict) if media: video_url = media.get('video_url') + height = int_or_none(media.get('dimensions', {}).get('height')) + width = int_or_none(media.get('dimensions', {}).get('width')) description = media.get('caption') thumbnail = media.get('display_src') timestamp = int_or_none(media.get('date')) @@ -101,10 +137,50 @@ class InstagramIE(InfoExtractor): uploader_id = media.get('owner', {}).get('username') like_count = int_or_none(media.get('likes', {}).get('count')) comment_count = int_or_none(media.get('comments', {}).get('count')) + comments = [{ + 'author': comment.get('user', {}).get('username'), + 'author_id': comment.get('user', {}).get('id'), + 'id': comment.get('id'), + 'text': comment.get('text'), + 'timestamp': int_or_none(comment.get('created_at')), + } for comment in media.get( + 'comments', {}).get('nodes', []) if comment.get('text')] + if not video_url: + edges = try_get( + media, lambda x: x['edge_sidecar_to_children']['edges'], + list) or [] + if edges: + entries = [] + for edge_num, edge in enumerate(edges, start=1): + node = try_get(edge, lambda x: x['node'], dict) + if not node: + continue + node_video_url = try_get(node, lambda x: x['video_url'], compat_str) + if not node_video_url: + continue + entries.append({ + 'id': node.get('shortcode') or node['id'], + 'title': 'Video %d' % edge_num, + 'url': node_video_url, + 'thumbnail': node.get('display_url'), + 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), + 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), + 'view_count': int_or_none(node.get('video_view_count')), + }) + return self.playlist_result( + entries, video_id, + 'Post by %s' % uploader_id if uploader_id else None, + description) if not video_url: video_url = self._og_search_video_url(webpage, secure=False) + formats = [{ + 'url': video_url, + 'width': width, + 'height': height, + }] + if not uploader_id: uploader_id = self._search_regex( r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', @@ -121,7 +197,7 @@ class InstagramIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, + 'formats': formats, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, 'description': description, @@ -131,6 +207,7 @@ class InstagramIE(InfoExtractor): 'uploader': uploader, 'like_count': like_count, 'comment_count': comment_count, + 'comments': comments, } @@ -150,7 +227,7 @@ class InstagramUserIE(InfoExtractor): 'id': '614605558512799803_462752227', 'ext': 'mp4', 'title': '#Porsche Intelligent Performance.', - 'thumbnail': 're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.jpg', 'uploader': 'Porsche', 'uploader_id': 'porsche', 'timestamp': 1387486713,