_ Git - youtube-dl/blob - youtube_dl/extractor/flipagram.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5
   6 from ..utils import (
   7     int_or_none,
   8     parse_iso8601,
   9     unified_strdate,
  10     unified_timestamp,
  11 )
  12
  13
  14 class FlipagramIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?_]+)'
  16     _TESTS = [{
  17         'url': 'https://flipagram.com/f/myrWjW9RJw',
  18         'md5': '541988fb6c4c7c375215ea22a4a21841',
  19         'info_dict': {
  20             'id': 'myrWjW9RJw',
  21             'title': 'Flipagram by crystaldolce featuring King and Lionheart by Of Monsters and Men',
  22             'description': 'Herbie\'s first bannana🍌🐢🍌.  #animals #pets #reptile #tortoise #sulcata #tort #justatreat #snacktime #bannanas #rescuepets  #ofmonstersandmen  @animals',
  23             'ext': 'mp4',
  24             'uploader': 'Crystal Dolce',
  25             'creator': 'Crystal Dolce',
  26             'uploader_id': 'crystaldolce',
  27         }
  28     }, {
  29         'url': 'https://flipagram.com/f/nyvTSJMKId',
  30         'only_matching': True,
  31     }]
  32
  33     def _real_extract(self, url):
  34         video_id = self._match_id(url)
  35         webpage = self._download_webpage(url, video_id)
  36
  37         self.report_extraction(video_id)
  38         user_data = self._parse_json(self._search_regex(r'window.reactH2O\s*=\s*({.+});', webpage, 'user data'), video_id)
  39         content_data = self._search_json_ld(webpage, video_id)
  40
  41         flipagram = user_data.get('flipagram', {})
  42         counts = flipagram.get('counts', {})
  43         user = flipagram.get('user', {})
  44         video = flipagram.get('video', {})
  45
  46         thumbnails = []
  47         for cover in flipagram.get('covers', []):
  48             if not cover.get('url'):
  49                 continue
  50             thumbnails.append({
  51                 'url': self._proto_relative_url(cover.get('url')),
  52                 'width': int_or_none(cover.get('width')),
  53                 'height': int_or_none(cover.get('height')),
  54             })
  55
  56         # Note that this only retrieves comments that are initally loaded.
  57         # For videos with large amounts of comments, most won't be retrieved.
  58         comments = []
  59         for comment in user_data.get('comments', {}).get(video_id, {}).get('items', []):
  60             text = comment.get('comment', [])
  61             comments.append({
  62                 'author': comment.get('user', {}).get('name'),
  63                 'author_id': comment.get('user', {}).get('username'),
  64                 'id': comment.get('id'),
  65                 'text': text[0] if text else '',
  66                 'timestamp': unified_timestamp(comment.get('created', '')),
  67             })
  68
  69         tags = [tag for item in flipagram['story'][1:] for tag in item]
  70
  71         formats = []
  72         if flipagram.get('music', {}).get('track', {}).get('previewUrl', {}):
  73             formats.append({
  74                 'url': flipagram.get('music').get('track').get('previewUrl'),
  75                 'ext': 'm4a',
  76                 'vcodec': 'none',
  77             })
  78
  79         formats.append({
  80             'url': video.get('url'),
  81             'ext': 'mp4',
  82             'width': int_or_none(video.get('width')),
  83             'height': int_or_none(video.get('height')),
  84             'filesize': int_or_none(video.get('size')),
  85         })
  86
  87         return {
  88             'id': video_id,
  89             'title': content_data['title'],
  90             'formats': formats,
  91             'thumbnails': thumbnails,
  92             'description': content_data.get('description'),
  93             'uploader': user.get('name'),
  94             'creator': user.get('name'),
  95             'timestamp': parse_iso8601(flipagram.get('iso801Created')),
  96             'upload_date': unified_strdate(flipagram.get('created')),
  97             'uploader_id': user.get('username'),
  98             'view_count': int_or_none(counts.get('plays')),
  99             'repost_count': int_or_none(counts.get('reflips')),
 100             'comment_count': int_or_none(counts.get('comments')),
 101             'comments': comments,
 102             'tags': tags,
 103         }