[hgtvcom:show] Add extractor (Closes #10365)
authorSergey M․ <dstftw@gmail.com>
Wed, 17 Aug 2016 21:37:14 +0000 (04:37 +0700)
committerSergey M․ <dstftw@gmail.com>
Wed, 17 Aug 2016 21:37:14 +0000 (04:37 +0700)
youtube_dl/extractor/extractors.py
youtube_dl/extractor/hgtv.py

index 55c639158dda61b53f290d880a47602aa0bb4616..e61bb11c3be354c446938d81a5a9e65823d1ccc9 100644 (file)
@@ -324,7 +324,10 @@ from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
-from .hgtv import HGTVIE
+from .hgtv import (
+    HGTVIE,
+    HGTVComShowIE,
+)
 from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hornbunny import HornBunnyIE
index c3f0733cf7708287918d92e95a3e4179f733f4ef..69543bff2cb3c844b8ebe82d38cd33672e168bbe 100644 (file)
@@ -46,3 +46,34 @@ class HGTVIE(InfoExtractor):
             'episode_number': int_or_none(embed_vars.get('episode')),
             'ie_key': 'ThePlatform',
         }
+
+
+class HGTVComShowIE(InfoExtractor):
+    IE_NAME = 'hgtv.com:show'
+    _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos',
+        'info_dict': {
+            'id': 'flip-or-flop-full-episodes-videos',
+            'title': 'Flip or Flop Full Episodes',
+        },
+        'playlist_mincount': 15,
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        config = self._parse_json(
+            self._search_regex(
+                r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
+                webpage, 'video config'),
+            display_id)['channels'][0]
+
+        entries = [
+            self.url_result(video['releaseUrl'])
+            for video in config['videos'] if video.get('releaseUrl')]
+
+        return self.playlist_result(
+            entries, display_id, config.get('title'), config.get('description'))