[amcnetworks] fix extraction(closes #12127)
[youtube-dl] / youtube_dl / extractor / amcnetworks.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .theplatform import ThePlatformIE
5 from ..utils import (
6     update_url_query,
7     parse_age_limit,
8     int_or_none,
9 )
10
11
12 class AMCNetworksIE(ThePlatformIE):
13     _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
14     _TESTS = [{
15         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
16         'md5': '',
17         'info_dict': {
18             'id': 's3MX01Nl4vPH',
19             'ext': 'mp4',
20             'title': 'Maron - Season 4 - Step 1',
21             'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
22             'age_limit': 17,
23             'upload_date': '20160505',
24             'timestamp': 1462468831,
25             'uploader': 'AMCN',
26         },
27         'params': {
28             # m3u8 download
29             'skip_download': True,
30         },
31         'skip': 'Requires TV provider accounts',
32     }, {
33         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
34         'only_matching': True,
35     }, {
36         'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
37         'only_matching': True,
38     }, {
39         'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
40         'only_matching': True,
41     }, {
42         'url': 'http://www.ifc.com/movies/chaos',
43         'only_matching': True,
44     }, {
45         'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
46         'only_matching': True,
47     }]
48
49     def _real_extract(self, url):
50         display_id = self._match_id(url)
51         webpage = self._download_webpage(url, display_id)
52         query = {
53             'mbr': 'true',
54             'manifest': 'm3u',
55         }
56         media_url = self._search_regex(
57             r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
58             webpage, 'media url')
59         theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
60             r'link\.theplatform\.com/s/([^?]+)',
61             media_url, 'theplatform_path'), display_id)
62         info = self._parse_theplatform_metadata(theplatform_metadata)
63         video_id = theplatform_metadata['pid']
64         title = theplatform_metadata['title']
65         rating = theplatform_metadata['ratings'][0]['rating']
66         auth_required = self._search_regex(
67             r'window\.authRequired\s*=\s*(true|false);',
68             webpage, 'auth required')
69         if auth_required == 'true':
70             requestor_id = self._search_regex(
71                 r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
72                 webpage, 'requestor id')
73             resource = self._get_mvpd_resource(
74                 requestor_id, title, video_id, rating)
75             query['auth'] = self._extract_mvpd_auth(
76                 url, video_id, requestor_id, resource)
77         media_url = update_url_query(media_url, query)
78         formats, subtitles = self._extract_theplatform_smil(
79             media_url, video_id)
80         self._sort_formats(formats)
81         info.update({
82             'id': video_id,
83             'subtitles': subtitles,
84             'formats': formats,
85             'age_limit': parse_age_limit(parse_age_limit(rating)),
86         })
87         ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
88         if ns_keys:
89             ns = list(ns_keys)[0]
90             series = theplatform_metadata.get(ns + '$show')
91             season_number = int_or_none(
92                 theplatform_metadata.get(ns + '$season'))
93             episode = theplatform_metadata.get(ns + '$episodeTitle')
94             episode_number = int_or_none(
95                 theplatform_metadata.get(ns + '$episode'))
96             if season_number:
97                 title = 'Season %d - %s' % (season_number, title)
98             if series:
99                 title = '%s - %s' % (series, title)
100             info.update({
101                 'title': title,
102                 'series': series,
103                 'season_number': season_number,
104                 'episode': episode,
105                 'episode_number': episode_number,
106             })
107         return info