_ Git - youtube-dl/blob - youtube_dl/extractor/vine.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4 import json
   5 import itertools
   6
   7 from .common import InfoExtractor
   8 from ..utils import unified_strdate
   9
  10
  11 class VineIE(InfoExtractor):
  12     _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
  13     _TESTS = [{
  14         'url': 'https://vine.co/v/b9KOOWX7HUx',
  15         'md5': '2f36fed6235b16da96ce9b4dc890940d',
  16         'info_dict': {
  17             'id': 'b9KOOWX7HUx',
  18             'ext': 'mp4',
  19             'title': 'Chicken.',
  20             'alt_title': 'Vine by Jack Dorsey',
  21             'description': 'Chicken.',
  22             'upload_date': '20130519',
  23             'uploader': 'Jack Dorsey',
  24             'uploader_id': '76',
  25         },
  26     }, {
  27         'url': 'https://vine.co/v/MYxVapFvz2z',
  28         'md5': '7b9a7cbc76734424ff942eb52c8f1065',
  29         'info_dict': {
  30             'id': 'MYxVapFvz2z',
  31             'ext': 'mp4',
  32             'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
  33             'alt_title': 'Vine by Luna',
  34             'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
  35             'upload_date': '20140815',
  36             'uploader': 'Luna',
  37             'uploader_id': '1102363502380728320',
  38         },
  39     }, {
  40         'url': 'https://vine.co/v/bxVjBbZlPUH',
  41         'md5': 'ea27decea3fa670625aac92771a96b73',
  42         'info_dict': {
  43             'id': 'bxVjBbZlPUH',
  44             'ext': 'mp4',
  45             'title': '#mw3 #ac130 #killcam #angelofdeath',
  46             'alt_title': 'Vine by Z3k3',
  47             'description': '#mw3 #ac130 #killcam #angelofdeath',
  48             'upload_date': '20130430',
  49             'uploader': 'Z3k3',
  50             'uploader_id': '936470460173008896',
  51         },
  52     }]
  53
  54     def _real_extract(self, url):
  55         video_id = self._match_id(url)
  56         webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
  57
  58         data = json.loads(self._html_search_regex(
  59             r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id,
  60             webpage, 'vine data'))
  61
  62         formats = [{
  63             'format_id': '%(format)s-%(rate)s' % f,
  64             'vcodec': f['format'],
  65             'quality': f['rate'],
  66             'url': f['videoUrl'],
  67         } for f in data['videoUrls']]
  68
  69         self._sort_formats(formats)
  70
  71         return {
  72             'id': video_id,
  73             'title': self._og_search_title(webpage),
  74             'alt_title': self._og_search_description(webpage),
  75             'description': data['description'],
  76             'thumbnail': data['thumbnailUrl'],
  77             'upload_date': unified_strdate(data['created']),
  78             'uploader': data['username'],
  79             'uploader_id': data['userIdStr'],
  80             'like_count': data['likes']['count'],
  81             'comment_count': data['comments']['count'],
  82             'repost_count': data['reposts']['count'],
  83             'formats': formats,
  84         }
  85
  86
  87 class VineUserIE(InfoExtractor):
  88     IE_NAME = 'vine:user'
  89     _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
  90     _VINE_BASE_URL = "https://vine.co/"
  91     _TESTS = [
  92         {
  93             'url': 'https://vine.co/Visa',
  94             'info_dict': {
  95                 'id': 'Visa',
  96             },
  97             'playlist_mincount': 46,
  98         },
  99         {
 100             'url': 'https://vine.co/u/941705360593584128',
 101             'only_matching': True,
 102         },
 103     ]
 104
 105     def _real_extract(self, url):
 106         mobj = re.match(self._VALID_URL, url)
 107         user = mobj.group('user')
 108         u = mobj.group('u')
 109
 110         profile_url = "%sapi/users/profiles/%s%s" % (
 111             self._VINE_BASE_URL, 'vanity/' if not u else '', user)
 112         profile_data = self._download_json(
 113             profile_url, user, note='Downloading user profile data')
 114
 115         user_id = profile_data['data']['userId']
 116         timeline_data = []
 117         for pagenum in itertools.count(1):
 118             timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (
 119                 self._VINE_BASE_URL, user_id, pagenum)
 120             timeline_page = self._download_json(
 121                 timeline_url, user, note='Downloading page %d' % pagenum)
 122             timeline_data.extend(timeline_page['data']['records'])
 123             if timeline_page['data']['nextPage'] is None:
 124                 break
 125
 126         entries = [
 127             self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data]
 128         return self.playlist_result(entries, user)