1 from __future__ import unicode_literals
6 from .common import InfoExtractor
22 class PacktPubBaseIE(InfoExtractor):
23 # _PACKT_BASE = 'https://www.packtpub.com'
24 _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/'
27 class PacktPubIE(PacktPubBaseIE):
28 _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)(?:/(?P<display_id>[^/?&#]+))?'
31 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
32 'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
36 'title': 'Project Intro',
37 'thumbnail': r're:(?i)^https?://.*\.jpg',
38 'timestamp': 1490918400,
39 'upload_date': '20170331',
42 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro',
43 'only_matching': True,
45 _NETRC_MACHINE = 'packtpub'
48 def _real_initialize(self):
49 username, password = self._get_login_info()
53 self._TOKEN = self._download_json(
54 'https://services.packtpub.com/auth-v1/users/tokens', None,
55 'Downloading Authorization Token', data=json.dumps({
58 }).encode())['data']['access']
59 except ExtractorError as e:
60 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
61 message = self._parse_json(e.cause.read().decode(), None)['message']
62 raise ExtractorError(message, expected=True)
65 def _real_extract(self, url):
66 course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups()
70 headers['Authorization'] = 'Bearer ' + self._TOKEN
72 video_url = self._download_json(
73 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id,
74 'Downloading JSON video', headers=headers)['data']
75 except ExtractorError as e:
76 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
77 self.raise_login_required('This video is locked')
80 # TODO: find a better way to avoid duplicating course requests
81 # metadata = self._download_json(
82 # '%s/products/%s/chapters/%s/sections/%s/metadata'
83 # % (self._MAPT_REST, course_id, chapter_id, video_id),
86 # title = metadata['pageTitle']
87 # course_title = metadata.get('title')
89 # title = remove_end(title, ' - %s' % course_title)
90 # timestamp = unified_timestamp(metadata.get('publicationDate'))
91 # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
96 'title': display_id or video_id, # title,
97 # 'thumbnail': thumbnail,
98 # 'timestamp': timestamp,
102 class PacktPubCourseIE(PacktPubBaseIE):
103 _VALID_URL = r'(?P<url>https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<id>\d+))'
105 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
107 'id': '9781787122215',
108 'title': 'Learn Nodejs by building 12 projects [Video]',
109 'description': 'md5:489da8d953f416e51927b60a1c7db0aa',
111 'playlist_count': 90,
113 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215',
114 'only_matching': True,
118 def suitable(cls, url):
119 return False if PacktPubIE.suitable(url) else super(
120 PacktPubCourseIE, cls).suitable(url)
122 def _real_extract(self, url):
123 mobj = re.match(self._VALID_URL, url)
124 url, course_id = mobj.group('url', 'id')
126 course = self._download_json(
127 self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id)
128 metadata = self._download_json(
129 self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id,
130 course_id, fatal=False) or {}
133 for chapter_num, chapter in enumerate(course['chapters'], 1):
134 chapter_id = str_or_none(chapter.get('id'))
135 sections = chapter.get('sections')
136 if not chapter_id or not isinstance(sections, list):
139 'chapter': chapter.get('title'),
140 'chapter_number': chapter_num,
141 'chapter_id': chapter_id,
143 for section in sections:
144 section_id = str_or_none(section.get('id'))
145 if not section_id or section.get('contentType') != 'video':
148 '_type': 'url_transparent',
149 'url': '/'.join([url, chapter_id, section_id]),
150 'title': strip_or_none(section.get('title')),
151 'description': clean_html(section.get('summary')),
152 'thumbnail': metadata.get('coverImage'),
153 'timestamp': unified_timestamp(metadata.get('publicationDate')),
154 'ie_key': PacktPubIE.ie_key(),
156 entry.update(chapter_info)
157 entries.append(entry)
159 return self.playlist_result(
160 entries, course_id, metadata.get('title'),
161 clean_html(metadata.get('about')))