class InstagramIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
}, {
'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True,
+ }, {
+ 'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
+ 'only_matching': True,
}]
@staticmethod
def _extract_embed_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
blockquote_el = get_element_by_attribute(
'class', 'instagram-media', webpage)
if blockquote_el is None:
return mobj.group('link')
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ url = mobj.group('url')
webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',