import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
ExtractorError,
)
+
class ImgurIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
- 'title': 'MRW gifv is up and running without any bugs',
- 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
+ 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+ 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
},
}, {
'url': 'https://imgur.com/A61SaA1',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
- 'title': 'MRW gifv is up and running without any bugs',
- 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
+ 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+ 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
- if not sources:
- raise ExtractorError(
- 'No sources found for video %s' % video_id, expected=True)
+ webpage = self._download_webpage(
+ compat_urlparse.urljoin(url, video_id), video_id)
width = int_or_none(self._search_regex(
r'<param name="width" value="([0-9]+)"',
r'<param name="height" value="([0-9]+)"',
webpage, 'height', fatal=False))
- formats = []
video_elements = self._search_regex(
r'(?s)<div class="video-elements">(.*?)</div>',
- webpage, 'video elements')
+ webpage, 'video elements', default=None)
+ if not video_elements:
+ raise ExtractorError(
+ 'No sources found for video %s. Maybe an image?' % video_id,
+ expected=True)
+
formats = []
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
formats.append({
'description': self._og_search_description(webpage),
'title': self._og_search_title(webpage),
}
+
+
+class ImgurAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
+
+ _TEST = {
+ 'url': 'http://imgur.com/gallery/Q95ko',
+ 'info_dict': {
+ 'id': 'Q95ko',
+ },
+ 'playlist_count': 25,
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ album_images = self._download_json(
+ 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
+ album_id)['data']['images']
+
+ entries = [
+ self.url_result('http://imgur.com/%s' % image['hash'])
+ for image in album_images if image.get('hash')]
+
+ return self.playlist_result(entries, album_id)