[cammodels] Add extractor
[youtube-dl] / youtube_dl / extractor / cammodels.py
1 from __future__ import unicode_literals
2 from .common import InfoExtractor
3 from .common import ExtractorError
4 import json
5 import re
6 from ..utils import int_or_none
7
8
9 class CamModelsIE(InfoExtractor):
10     _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)'
11     _HEADERS = {
12         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
13         # Needed because server doesn't return links to video URLs if a browser-like User-Agent is not used
14     }
15
16     def _real_extract(self, url):
17         video_id = self._match_id(url)
18         webpage = self._download_webpage(
19             url,
20             video_id,
21             headers=self._HEADERS)
22         manifest_url_root = self._html_search_regex(
23             r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
24             webpage,
25             'manifest',
26             None,
27             False)
28         if not manifest_url_root:
29             offline = self._html_search_regex(
30                 r'(?P<id>I\'m offline, but let\'s stay connected!)',
31                 webpage,
32                 'offline indicator',
33                 None,
34                 False)
35             private = self._html_search_regex(
36                 r'(?P<id>I’m in a private show right now)',
37                 webpage,
38                 'private show indicator',
39                 None,
40                 False)
41             err = 'This user is currently offline, so nothing can be downloaded.' if offline \
42                 else 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' if private \
43                 else 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.'
44             raise ExtractorError(
45                 err,
46                 expected=True if offline or private else False,
47                 video_id=video_id
48             )
49         manifest_url = manifest_url_root + video_id + '.json'
50         manifest = self._download_json(
51             manifest_url,
52             video_id,
53             'Downloading links to streams.',
54             'Link to stream URLs was found, but we couldn\'t access it.',
55             headers=self._HEADERS)
56         try:
57             formats = []
58             for fmtName in ['mp4-rtmp', 'mp4-hls']:
59                 for encoding in manifest['formats'][fmtName]['encodings']:
60                     formats.append({
61                         'ext': 'mp4',
62                         'url': encoding['location'],
63                         'width': int_or_none(encoding.get('videoWidth')),
64                         'height': int_or_none(encoding.get('videoHeight')),
65                         'vbr': int_or_none(encoding.get('videoKbps')),
66                         'abr': int_or_none(encoding.get('audioKbps')),
67                         'format_id': fmtName + str(encoding.get('videoWidth'))
68                     })
69         # If they change the JSON format, then fallback to parsing out RTMP links via regex.
70         except KeyError:
71             manifest_json = json.dumps(manifest)
72             manifest_links = re.finditer(
73                 r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
74                 manifest_json)
75             if not manifest_links:
76                 raise ExtractorError(
77                     'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
78                     expected=False,
79                     video_id=video_id)
80             formats = []
81             for manifest_link in manifest_links:
82                 url = manifest_link.group('id')
83                 formats.append({
84                     'ext': 'mp4',
85                     'url': url,
86                     'format_id': url.split(sep='/')[-1]
87                 })
88         self._sort_formats(formats)
89         return {
90             'id': video_id,
91             'title': self._live_title(video_id),
92             'formats': formats
93         }