- def _real_extract(self, url):
- domain, display_id = re.match(self._VALID_URL, url).groups()
-
- webpage = self._download_webpage(url, display_id)
-
- if ">Sorry, the Film you're looking for is not available.<" in webpage:
- raise ExtractorError(
- 'Film %s is not available.' % display_id, expected=True)
-
- initial_store_state = self._search_regex(
- r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
- webpage, 'Initial Store State', default=None)
- if initial_store_state:
- modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
- initial_store_state).decode()), display_id)['page']['data']['modules']
- content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
- gist = content_data['gist']
- film_id = gist['id']
- title = gist['title']
- video_assets = try_get(
- content_data, lambda x: x['streamingInfo']['videoAssets'], dict)
- if not video_assets:
- token = self._download_json(
- 'https://prod-api.viewlift.com/identity/anonymous-token',
- film_id, 'Downloading authorization token',
- query={'site': 'snagfilms'})['authorizationToken']
- video_assets = self._download_json(
- 'https://prod-api.viewlift.com/entitlement/video/status',
- film_id, headers={
- 'Authorization': token,
- 'Referer': url,
- }, query={
- 'id': film_id
- })['video']['streamingInfo']['videoAssets']
-
- formats = []
- mpeg_video_assets = video_assets.get('mpeg') or []
- for video_asset in mpeg_video_assets:
- video_asset_url = video_asset.get('url')
- if not video_asset:
- continue
- bitrate = int_or_none(video_asset.get('bitrate'))
- height = int_or_none(self._search_regex(
- r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
- 'height', default=None))
- formats.append({
- 'url': video_asset_url,
- 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
- 'tbr': bitrate,
- 'height': height,
- 'vcodec': video_asset.get('codec'),
- })
-
- hls_url = video_assets.get('hls')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats, ('height', 'tbr', 'format_id'))
-
- info = {
- 'id': film_id,
- 'display_id': display_id,
- 'title': title,
- 'description': gist.get('description'),
- 'thumbnail': gist.get('videoImageUrl'),
- 'duration': int_or_none(gist.get('runtime')),
- 'age_limit': parse_age_limit(content_data.get('parentalRating')),
- 'timestamp': int_or_none(gist.get('publishDate'), 1000),
- 'formats': formats,
- }
- for k in ('categories', 'tags'):
- info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
- return info
- else:
- film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')