[thisoldhouse] Improve video id extraction (closes #24549)
[youtube-dl] / youtube_dl / extractor / thisoldhouse.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6
7 class ThisOldHouseIE(InfoExtractor):
8     _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/]+/)?\d+)/(?P<id>[^/?#]+)'
9     _TESTS = [{
10         'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
11         'info_dict': {
12             'id': '5dcdddf673c3f956ef5db202',
13             'ext': 'mp4',
14             'title': 'How to Build a Storage Bench',
15             'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
16             'timestamp': 1442548800,
17             'upload_date': '20150918',
18         },
19         'params': {
20             'skip_download': True,
21         },
22     }, {
23         'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
24         'only_matching': True,
25     }, {
26         'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
27         'only_matching': True,
28     }, {
29         'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
30         'only_matching': True,
31     }, {
32         'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
33         'only_matching': True,
34     }, {
35         # iframe www.thisoldhouse.com
36         'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
37         'only_matching': True,
38     }]
39     _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
40
41     def _real_extract(self, url):
42         display_id = self._match_id(url)
43         webpage = self._download_webpage(url, display_id)
44         video_id = self._search_regex(
45             r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})',
46             webpage, 'video id')
47         return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)