[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / hgtv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6
7 class HGTVComShowIE(InfoExtractor):
8     IE_NAME = 'hgtv.com:show'
9     _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
10     _TESTS = [{
11         # data-module="video"
12         'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
13         'info_dict': {
14             'id': 'flip-or-flop-full-episodes-season-4-videos',
15             'title': 'Flip or Flop Full Episodes',
16         },
17         'playlist_mincount': 15,
18     }, {
19         # data-deferred-module="video"
20         'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
21         'only_matching': True,
22     }]
23
24     def _real_extract(self, url):
25         display_id = self._match_id(url)
26
27         webpage = self._download_webpage(url, display_id)
28
29         config = self._parse_json(
30             self._search_regex(
31                 r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
32                 webpage, 'video config'),
33             display_id)['channels'][0]
34
35         entries = [
36             self.url_result(video['releaseUrl'])
37             for video in config['videos'] if video.get('releaseUrl')]
38
39         return self.playlist_result(
40             entries, display_id, config.get('title'), config.get('description'))