[thisoldhouse] Add new extractor(closes #10837)
authorRemita Amine <remitamine@gmail.com>
Mon, 3 Oct 2016 14:27:09 +0000 (15:27 +0100)
committerRemita Amine <remitamine@gmail.com>
Mon, 3 Oct 2016 14:27:09 +0000 (15:27 +0100)
youtube_dl/extractor/extractors.py
youtube_dl/extractor/thisoldhouse.py [new file with mode: 0644]

index e8928307c122e17e365651fa1f5ba5a34f6f79f4..dca4973d4462690a3b57236fff0bfff6c1f401d5 100644 (file)
@@ -892,6 +892,7 @@ from .thesixtyone import TheSixtyOneIE
 from .thestar import TheStarIE
 from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
 from .threeqsdn import ThreeQSDNIE
 from .tinypic import TinyPicIE
 from .tlc import TlcDeIE
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
new file mode 100644 (file)
index 0000000..7629f0d
--- /dev/null
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisOldHouseIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
+        'md5': '568acf9ca25a639f0c4ff905826b662f',
+        'info_dict': {
+            'id': '2REGtUDQ',
+            'ext': 'mp4',
+            'title': 'How to Build a Storage Bench',
+            'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
+            'timestamp': 1442548800,
+            'upload_date': '20150918',
+        }
+    }, {
+        'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        drupal_settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), display_id)
+        video_id = drupal_settings['jwplatform']['video_id']
+        return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)