_ Git - youtube-dl/blob - youtube_dl/extractor/cammodels.py

   1 from __future__ import unicode_literals
   2 from .common import InfoExtractor
   3 from .common import ExtractorError
   4 import json
   5 import re
   6 from ..utils import int_or_none
   7
   8
   9 class CamModelsIE(InfoExtractor):
  10     _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)'
  11     _HEADERS = {
  12         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
  13         # Needed because server doesn't return links to video URLs if a browser-like User-Agent is not used
  14     }
  15
  16     def _real_extract(self, url):
  17         video_id = self._match_id(url)
  18         webpage = self._download_webpage(
  19             url,
  20             video_id,
  21             headers=self._HEADERS)
  22         manifest_url_root = self._html_search_regex(
  23             r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
  24             webpage,
  25             'manifest',
  26             None,
  27             False)
  28         if not manifest_url_root:
  29             offline = self._html_search_regex(
  30                 r'(?P<id>I\'m offline, but let\'s stay connected!)',
  31                 webpage,
  32                 'offline indicator',
  33                 None,
  34                 False)
  35             private = self._html_search_regex(
  36                 r'(?P<id>I’m in a private show right now)',
  37                 webpage,
  38                 'private show indicator',
  39                 None,
  40                 False)
  41             err = 'This user is currently offline, so nothing can be downloaded.' if offline \
  42                 else 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' if private \
  43                 else 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.'
  44             raise ExtractorError(
  45                 err,
  46                 expected=True if offline or private else False,
  47                 video_id=video_id
  48             )
  49         manifest_url = manifest_url_root + video_id + '.json'
  50         manifest = self._download_json(
  51             manifest_url,
  52             video_id,
  53             'Downloading links to streams.',
  54             'Link to stream URLs was found, but we couldn\'t access it.',
  55             headers=self._HEADERS)
  56         try:
  57             formats = []
  58             for fmtName in ['mp4-rtmp', 'mp4-hls']:
  59                 for encoding in manifest['formats'][fmtName]['encodings']:
  60                     formats.append({
  61                         'ext': 'mp4',
  62                         'url': encoding['location'],
  63                         'width': int_or_none(encoding.get('videoWidth')),
  64                         'height': int_or_none(encoding.get('videoHeight')),
  65                         'vbr': int_or_none(encoding.get('videoKbps')),
  66                         'abr': int_or_none(encoding.get('audioKbps')),
  67                         'format_id': fmtName + str(encoding.get('videoWidth'))
  68                     })
  69         # If they change the JSON format, then fallback to parsing out RTMP links via regex.
  70         except KeyError:
  71             manifest_json = json.dumps(manifest)
  72             manifest_links = re.finditer(
  73                 r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
  74                 manifest_json)
  75             if not manifest_links:
  76                 raise ExtractorError(
  77                     'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
  78                     expected=False,
  79                     video_id=video_id)
  80             formats = []
  81             for manifest_link in manifest_links:
  82                 url = manifest_link.group('id')
  83                 formats.append({
  84                     'ext': 'mp4',
  85                     'url': url,
  86                     'format_id': url.split(sep='/')[-1]
  87                 })
  88         self._sort_formats(formats)
  89         return {
  90             'id': video_id,
  91             'title': self._live_title(video_id),
  92             'formats': formats
  93         }