_ Git - youtube-dl/blob - youtube_dl/extractor/cammodels.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..compat import compat_str
   6 from ..utils import (
   7     ExtractorError,
   8     int_or_none,
   9 )
  10
  11
  12 class CamModelsIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
  14     _TESTS = [{
  15         'url': 'https://www.cammodels.com/cam/AutumnKnight/',
  16         'only_matching': True,
  17     }]
  18
  19     def _real_extract(self, url):
  20         user_id = self._match_id(url)
  21
  22         webpage = self._download_webpage(
  23             url, user_id, headers=self.geo_verification_headers())
  24
  25         manifest_root = self._html_search_regex(
  26             r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
  27
  28         if not manifest_root:
  29             ERRORS = (
  30                 ("I'm offline, but let's stay connected", 'This user is currently offline'),
  31                 ('in a private show', 'This user is in a private show'),
  32                 ('is currently performing LIVE', 'This model is currently performing live'),
  33             )
  34             for pattern, message in ERRORS:
  35                 if pattern in webpage:
  36                     error = message
  37                     expected = True
  38                     break
  39             else:
  40                 error = 'Unable to find manifest URL root'
  41                 expected = False
  42             raise ExtractorError(error, expected=expected)
  43
  44         manifest = self._download_json(
  45             '%s%s.json' % (manifest_root, user_id), user_id)
  46
  47         formats = []
  48         for format_id, format_dict in manifest['formats'].items():
  49             if not isinstance(format_dict, dict):
  50                 continue
  51             encodings = format_dict.get('encodings')
  52             if not isinstance(encodings, list):
  53                 continue
  54             vcodec = format_dict.get('videoCodec')
  55             acodec = format_dict.get('audioCodec')
  56             for media in encodings:
  57                 if not isinstance(media, dict):
  58                     continue
  59                 media_url = media.get('location')
  60                 if not media_url or not isinstance(media_url, compat_str):
  61                     continue
  62
  63                 format_id_list = [format_id]
  64                 height = int_or_none(media.get('videoHeight'))
  65                 if height is not None:
  66                     format_id_list.append('%dp' % height)
  67                 f = {
  68                     'url': media_url,
  69                     'format_id': '-'.join(format_id_list),
  70                     'width': int_or_none(media.get('videoWidth')),
  71                     'height': height,
  72                     'vbr': int_or_none(media.get('videoKbps')),
  73                     'abr': int_or_none(media.get('audioKbps')),
  74                     'fps': int_or_none(media.get('fps')),
  75                     'vcodec': vcodec,
  76                     'acodec': acodec,
  77                 }
  78                 if 'rtmp' in format_id:
  79                     f['ext'] = 'flv'
  80                 elif 'hls' in format_id:
  81                     f.update({
  82                         'ext': 'mp4',
  83                         # hls skips fragments, preferring rtmp
  84                         'preference': -1,
  85                     })
  86                 else:
  87                     continue
  88                 formats.append(f)
  89         self._sort_formats(formats)
  90
  91         return {
  92             'id': user_id,
  93             'title': self._live_title(user_id),
  94             'is_live': True,
  95             'formats': formats,
  96         }