X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fthisoldhouse.py;h=6ab147ad726306ba9250599d34491a50e64e82d0;hb=56667d622c3f6e7594a04f8cd5f4371875940725;hp=7629f0d10e4ebc40bf25b0f02f52b2524ab9e303;hpb=fb27d0ce5e91216296e3406d461fe5b7af78c477;p=youtube-dl diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py index 7629f0d10..6ab147ad7 100644 --- a/youtube_dl/extractor/thisoldhouse.py +++ b/youtube_dl/extractor/thisoldhouse.py @@ -2,10 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str +from ..utils import try_get class ThisOldHouseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', 'md5': '568acf9ca25a639f0c4ff905826b662f', @@ -20,13 +22,23 @@ class ThisOldHouseIE(InfoExtractor): }, { 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'only_matching': True, + }, { + 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric', + 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', - webpage, 'drupal settings'), display_id) - video_id = drupal_settings['jwplatform']['video_id'] + video_id = self._search_regex( + (r'data-mid=(["\'])(?P(?:(?!\1).)+)\1', + r'id=(["\'])inline-video-player-(?P(?:(?!\1).)+)\1'), + webpage, 'video id', default=None, group='id') + if not video_id: + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + video_id = try_get( + drupal_settings, lambda x: x['jwplatform']['video_id'], + compat_str) or list(drupal_settings['comScore'])[0] return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)