_ Git - youtube-dl/blob - youtube_dl/extractor/iconosquare.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     int_or_none,
   6     get_element_by_id,
   7 )
   8
   9
  10 class IconosquareIE(InfoExtractor):
  11     _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
  12     _TEST = {
  13         'url': 'http://statigr.am/p/522207370455279102_24101272',
  14         'md5': '6eb93b882a3ded7c378ee1d6884b1814',
  15         'info_dict': {
  16             'id': '522207370455279102_24101272',
  17             'ext': 'mp4',
  18             'title': 'A little over a year ago, I posted my first #dailycortado, a drink introduced to...',
  19             'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
  20             'timestamp': 1376471991,
  21             'upload_date': '20130814',
  22             'uploader': 'aguynamedpatrick',
  23             'uploader_id': '24101272',
  24             'comment_count': int,
  25             'like_count': int,
  26         },
  27     }
  28
  29     def _real_extract(self, url):
  30         video_id = self._match_id(url)
  31
  32         webpage = self._download_webpage(url, video_id)
  33
  34         media = self._parse_json(
  35             get_element_by_id('mediaJson', webpage),
  36             video_id)
  37
  38         formats = [{
  39             'url': f['url'],
  40             'format_id': format_id,
  41             'width': int_or_none(f.get('width')),
  42             'height': int_or_none(f.get('height'))
  43         } for format_id, f in media['videos'].items()]
  44         self._sort_formats(formats)
  45
  46         title = self._html_search_regex(
  47             r'<title>(.+?)</title>',
  48             webpage, 'title')
  49
  50         timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
  51         description = media.get('caption', {}).get('text')
  52
  53         uploader = media.get('user', {}).get('username')
  54         uploader_id = media.get('user', {}).get('id')
  55
  56         comment_count = int_or_none(media.get('comments', {}).get('count'))
  57         like_count = int_or_none(media.get('likes', {}).get('count'))
  58
  59         thumbnails = [{
  60             'url': t['url'],
  61             'id': thumbnail_id,
  62             'width': int_or_none(t.get('width')),
  63             'height': int_or_none(t.get('height'))
  64         } for thumbnail_id, t in media.get('images', {}).items()]
  65
  66         return {
  67             'id': video_id,
  68             'title': title,
  69             'description': description,
  70             'thumbnails': thumbnails,
  71             'timestamp': timestamp,
  72             'uploader': uploader,
  73             'uploader_id': uploader_id,
  74             'comment_count': comment_count,
  75             'like_count': like_count,
  76             'formats': formats,
  77         }