[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / gfycat.py
index 6de78c49d18a8bac1b9071360e503cf2e8eb30c8..18a30fe678e2c6f97f44e64bce1d30ab31d877ec 100644 (file)
 # coding: utf-8
-
 from __future__ import unicode_literals
 
-import datetime
-
 from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    qualities,
+    ExtractorError,
+)
+
 
 class GfycatIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
-    _TESTS = [
-        {
-            'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
-            'info_dict': {
-                'id':          'DeadlyDecisiveGermanpinscher',
-                'title':       'Ghost in the Shell',
-                'ext':         'mp4',
-                'upload_date': '20140913'
-            }
-        },{
-            'url': 'http://gfycat.com/pleasinghilariouskusimanse',
-            'info_dict': {
-                'id':          'pleasinghilariouskusimanse',
-                'title':       'PleasingHilariousKusimanse',
-                'ext':         'webm',
-                'upload_date': '20150412'
-            },
-            'params': {
-                'format': 'webm',
-            },
-        },{
-            'url': 'http://gfycat.com/requiredunkemptbuzzard',
-            'info_dict': {
-                'id':          'requiredunkemptbuzzard',
-                'title':       'Headshot!',
-                'ext':         'gif',
-                'upload_date': '20150129'
-            },
-            'params': {
-                'format': 'gif',
-            },
-        },
-    ]
+    _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\.]+)'
+    _TESTS = [{
+        'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
+        'info_dict': {
+            'id': 'DeadlyDecisiveGermanpinscher',
+            'ext': 'mp4',
+            'title': 'Ghost in the Shell',
+            'timestamp': 1410656006,
+            'upload_date': '20140914',
+            'uploader': 'anonymous',
+            'duration': 10.4,
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'categories': list,
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
+        'info_dict': {
+            'id': 'JauntyTimelyAmazontreeboa',
+            'ext': 'mp4',
+            'title': 'JauntyTimelyAmazontreeboa',
+            'timestamp': 1411720126,
+            'upload_date': '20140926',
+            'uploader': 'anonymous',
+            'duration': 3.52,
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
+            'categories': list,
+            'age_limit': 0,
+        }
+    }, {
+        'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
+        'only_matching': True
+    }, {
+        'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
+        'only_matching': True
+    }, {
+        'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
+        'only_matching': True
+    }, {
+        'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
+        'only_matching': True
+    }, {
+        'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
+        'only_matching': True
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        json     = self._download_json("http://gfycat.com/cajax/get/" + video_id, video_id, 'Downloading video info')['gfyItem']
-        
-        # Title
-        # Use user title first, else fallback to url formated name
-        if json['title']:
-            video_title = json['title']
-        else:
-            video_title = json['gfyName']
-        
-        # Formats
-        # Pref: mp4, webm, gif
-        formats = [{
-            'format_id':  'mp4',
-            'ext':        'mp4',
-            'url':        json['mp4Url'],
-            'width':      json['width'],
-            'height':     json['height'],
-            'fps':        json['frameRate'],
-            'filesize':   json['mp4Size'],
-            'preference': '-1'
-        }, {
-            'format_id': 'webm',
-            'ext':       'webm',
-            'url':        json['webmUrl'],
-            'width':      json['width'],
-            'height':     json['height'],
-            'fps':        json['frameRate'],
-            'filesize':   json['webmSize'],
-            'preference': 0
-        }, {
-            'format_id':  'gif',
-            'ext':        'gif',
-            'url':        json['gifUrl'],
-            'width':      json['width'],
-            'height':     json['height'],
-            'fps':        json['frameRate'],
-            'filesize':   json['gifSize'],
-            'preference': 1
-        }]
-        
+
+        gfy = self._download_json(
+            'https://api.gfycat.com/v1/gfycats/%s' % video_id,
+            video_id, 'Downloading video info')
+        if 'error' in gfy:
+            raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
+        gfy = gfy['gfyItem']
+
+        title = gfy.get('title') or gfy['gfyName']
+        description = gfy.get('description')
+        timestamp = int_or_none(gfy.get('createDate'))
+        uploader = gfy.get('userName')
+        view_count = int_or_none(gfy.get('views'))
+        like_count = int_or_none(gfy.get('likes'))
+        dislike_count = int_or_none(gfy.get('dislikes'))
+        age_limit = 18 if gfy.get('nsfw') == '1' else 0
+
+        width = int_or_none(gfy.get('width'))
+        height = int_or_none(gfy.get('height'))
+        fps = int_or_none(gfy.get('frameRate'))
+        num_frames = int_or_none(gfy.get('numFrames'))
+
+        duration = float_or_none(num_frames, fps) if num_frames and fps else None
+
+        categories = gfy.get('tags') or gfy.get('extraLemmas') or []
+
+        FORMATS = ('gif', 'webm', 'mp4')
+        quality = qualities(FORMATS)
+
+        formats = []
+        for format_id in FORMATS:
+            video_url = gfy.get('%sUrl' % format_id)
+            if not video_url:
+                continue
+            filesize = int_or_none(gfy.get('%sSize' % format_id))
+            formats.append({
+                'url': video_url,
+                'format_id': format_id,
+                'width': width,
+                'height': height,
+                'fps': fps,
+                'filesize': filesize,
+                'quality': quality(format_id),
+            })
         self._sort_formats(formats)
-        
-        # Date
-        date = datetime.datetime.fromtimestamp(
-            int(json['createDate'])
-        ).strftime('%Y%m%d')
-        
-        # Length
-        duration = json['numFrames'] / json['frameRate']
-        
-        # Age limit
-        # 1 = nsfw / 0 = sfw
-        if json['nsfw'] == 1:
-            age_limit = 18
-        else:
-            age_limit = 0
-        
+
         return {
-            'id':          video_id,
-            'title':       video_title,
-            'formats':     formats,
-            'creator':     json['userName'],
-            'description': json['description'],
-            'upload_date': date,
-            'categories':  json['tags'],
-            'age_limit':   age_limit,
-            'duration':    duration,
-            'view_count':  json['views']
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'duration': duration,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'categories': categories,
+            'age_limit': age_limit,
+            'formats': formats,
         }