_ Git - youtube-dl/blob - youtube_dl/extractor/patreon.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     clean_html,
   7     determine_ext,
   8     int_or_none,
   9     parse_iso8601,
  10 )
  11
  12
  13 class PatreonIE(InfoExtractor):
  14     _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
  15     _TESTS = [{
  16         'url': 'http://www.patreon.com/creation?hid=743933',
  17         'md5': 'e25505eec1053a6e6813b8ed369875cc',
  18         'info_dict': {
  19             'id': '743933',
  20             'ext': 'mp3',
  21             'title': 'Episode 166: David Smalley of Dogma Debate',
  22             'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
  23             'uploader': 'Cognitive Dissonance Podcast',
  24             'thumbnail': 're:^https?://.*$',
  25             'timestamp': 1406473987,
  26             'upload_date': '20140727',
  27         },
  28     }, {
  29         'url': 'http://www.patreon.com/creation?hid=754133',
  30         'md5': '3eb09345bf44bf60451b8b0b81759d0a',
  31         'info_dict': {
  32             'id': '754133',
  33             'ext': 'mp3',
  34             'title': 'CD 167 Extra',
  35             'uploader': 'Cognitive Dissonance Podcast',
  36             'thumbnail': 're:^https?://.*$',
  37         },
  38         'skip': 'Patron-only content',
  39     }, {
  40         'url': 'https://www.patreon.com/creation?hid=1682498',
  41         'info_dict': {
  42             'id': 'SU4fj_aEMVw',
  43             'ext': 'mp4',
  44             'title': 'I\'m on Patreon!',
  45             'uploader': 'TraciJHines',
  46             'thumbnail': 're:^https?://.*$',
  47             'upload_date': '20150211',
  48             'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
  49             'uploader_id': 'TraciJHines',
  50         },
  51         'params': {
  52             'noplaylist': True,
  53             'skip_download': True,
  54         }
  55     }, {
  56         'url': 'https://www.patreon.com/posts/episode-166-of-743933',
  57         'only_matching': True,
  58     }, {
  59         'url': 'https://www.patreon.com/posts/743933',
  60         'only_matching': True,
  61     }]
  62
  63     # Currently Patreon exposes download URL via hidden CSS, so login is not
  64     # needed. Keeping this commented for when this inevitably changes.
  65     '''
  66     def _login(self):
  67         username, password = self._get_login_info()
  68         if username is None:
  69             return
  70
  71         login_form = {
  72             'redirectUrl': 'http://www.patreon.com/',
  73             'email': username,
  74             'password': password,
  75         }
  76
  77         request = sanitized_Request(
  78             'https://www.patreon.com/processLogin',
  79             compat_urllib_parse_urlencode(login_form).encode('utf-8')
  80         )
  81         login_page = self._download_webpage(request, None, note='Logging in')
  82
  83         if re.search(r'onLoginFailed', login_page):
  84             raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
  85
  86     def _real_initialize(self):
  87         self._login()
  88     '''
  89
  90     def _real_extract(self, url):
  91         video_id = self._match_id(url)
  92         post = self._download_json(
  93             'https://www.patreon.com/api/posts/' + video_id, video_id)
  94         attributes = post['data']['attributes']
  95         title = attributes['title'].strip()
  96         image = attributes.get('image') or {}
  97         info = {
  98             'id': video_id,
  99             'title': title,
 100             'description': clean_html(attributes.get('content')),
 101             'thumbnail': image.get('large_url') or image.get('url'),
 102             'timestamp': parse_iso8601(attributes.get('published_at')),
 103             'like_count': int_or_none(attributes.get('like_count')),
 104             'comment_count': int_or_none(attributes.get('comment_count')),
 105         }
 106
 107         def add_file(file_data):
 108             file_url = file_data.get('url')
 109             if file_url:
 110                 info.update({
 111                     'url': file_url,
 112                     'ext': determine_ext(file_data.get('name'), 'mp3'),
 113                 })
 114
 115         for i in post.get('included', []):
 116             i_type = i.get('type')
 117             if i_type == 'attachment':
 118                 add_file(i.get('attributes') or {})
 119             elif i_type == 'user':
 120                 user_attributes = i.get('attributes')
 121                 if user_attributes:
 122                     info.update({
 123                         'uploader': user_attributes.get('full_name'),
 124                         'uploader_url': user_attributes.get('url'),
 125                     })
 126
 127         if not info.get('url'):
 128             add_file(attributes.get('post_file') or {})
 129
 130         if not info.get('url'):
 131             info.update({
 132                 '_type': 'url',
 133                 'url': attributes['embed']['url'],
 134             })
 135
 136         return info