X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Finstagram.py;h=4e62098b05fa1ec753f240adc5a27b7da57e2e95;hb=b22ca76204e1a05e1c4b07d24cb6a0dbbc09d18e;hp=e5e16ca3b2aa15b64b253496e129cf7813468abe;hpb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;p=youtube-dl diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index e5e16ca3b..4e62098b0 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -4,8 +4,10 @@ import re from .common import InfoExtractor from ..utils import ( + get_element_by_attribute, int_or_none, limit_length, + lowercase_escape, ) @@ -21,19 +23,45 @@ class InstagramIE(InfoExtractor): 'title': 'Video by naomipq', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', } + }, { + # missing description + 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', + 'info_dict': { + 'id': 'BA-pQFBG8HZ', + 'ext': 'mp4', + 'uploader_id': 'britneyspears', + 'title': 'Video by britneyspears', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://instagram.com/p/-Cmh1cukG2/', 'only_matching': True, }] + @staticmethod + def _extract_embed_url(webpage): + blockquote_el = get_element_by_attribute( + 'class', 'instagram-media', webpage) + if blockquote_el is None: + return + + mobj = re.search( + r']+href=([\'"])(?P[^\'"]+)\1', blockquote_el) + if mobj: + return mobj.group('link') + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', webpage, 'uploader id', fatal=False) - desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description', - fatal=False) + desc = self._search_regex( + r'"caption":"(.+?)"', webpage, 'description', default=None) + if desc is not None: + desc = lowercase_escape(desc) return { 'id': video_id,