]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/cammodels.py
[picarto] Extract more metadata (closes #16518)
[youtube-dl] / youtube_dl / extractor / cammodels.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     int_or_none,
8     url_or_none,
9 )
10
11
12 class CamModelsIE(InfoExtractor):
13     _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
14     _TESTS = [{
15         'url': 'https://www.cammodels.com/cam/AutumnKnight/',
16         'only_matching': True,
17     }]
18
19     def _real_extract(self, url):
20         user_id = self._match_id(url)
21
22         webpage = self._download_webpage(
23             url, user_id, headers=self.geo_verification_headers())
24
25         manifest_root = self._html_search_regex(
26             r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
27
28         if not manifest_root:
29             ERRORS = (
30                 ("I'm offline, but let's stay connected", 'This user is currently offline'),
31                 ('in a private show', 'This user is in a private show'),
32                 ('is currently performing LIVE', 'This model is currently performing live'),
33             )
34             for pattern, message in ERRORS:
35                 if pattern in webpage:
36                     error = message
37                     expected = True
38                     break
39             else:
40                 error = 'Unable to find manifest URL root'
41                 expected = False
42             raise ExtractorError(error, expected=expected)
43
44         manifest = self._download_json(
45             '%s%s.json' % (manifest_root, user_id), user_id)
46
47         formats = []
48         for format_id, format_dict in manifest['formats'].items():
49             if not isinstance(format_dict, dict):
50                 continue
51             encodings = format_dict.get('encodings')
52             if not isinstance(encodings, list):
53                 continue
54             vcodec = format_dict.get('videoCodec')
55             acodec = format_dict.get('audioCodec')
56             for media in encodings:
57                 if not isinstance(media, dict):
58                     continue
59                 media_url = url_or_none(media.get('location'))
60                 if not media_url:
61                     continue
62
63                 format_id_list = [format_id]
64                 height = int_or_none(media.get('videoHeight'))
65                 if height is not None:
66                     format_id_list.append('%dp' % height)
67                 f = {
68                     'url': media_url,
69                     'format_id': '-'.join(format_id_list),
70                     'width': int_or_none(media.get('videoWidth')),
71                     'height': height,
72                     'vbr': int_or_none(media.get('videoKbps')),
73                     'abr': int_or_none(media.get('audioKbps')),
74                     'fps': int_or_none(media.get('fps')),
75                     'vcodec': vcodec,
76                     'acodec': acodec,
77                 }
78                 if 'rtmp' in format_id:
79                     f['ext'] = 'flv'
80                 elif 'hls' in format_id:
81                     f.update({
82                         'ext': 'mp4',
83                         # hls skips fragments, preferring rtmp
84                         'preference': -1,
85                     })
86                 else:
87                     continue
88                 formats.append(f)
89         self._sort_formats(formats)
90
91         return {
92             'id': user_id,
93             'title': self._live_title(user_id),
94             'is_live': True,
95             'formats': formats,
96         }