[thisoldhouse] Improve video id extraction (closes #24549)
authorSergey M․ <dstftw@gmail.com>
Sat, 11 Apr 2020 13:07:12 +0000 (20:07 +0700)
committerSergey M․ <dstftw@gmail.com>
Sat, 11 Apr 2020 13:07:37 +0000 (20:07 +0700)
youtube_dl/extractor/thisoldhouse.py

index 33269705f48fd0f458699840bc711d7de017b6df..a3d9b4017b93b1d9381b896967fa9e68da59eaec 100644 (file)
@@ -19,20 +19,6 @@ class ThisOldHouseIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
-    }, {
-        'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
-        'note': 'test for updated video URL',
-        'info_dict': {
-            'id': '5e2b70e95216cc0001615120',
-            'ext': 'mp4',
-            'title': 'E12 | The Westerly Project | Seaside Transformation',
-            'description': 'Kevin and Tommy take the tour with the homeowners and Jeff. Norm presents his pine coffee table. Jenn gives Tommy the garden tour. Everyone meets at the flagpole to raise the flags.',
-            'timestamp': 1579755600,
-            'upload_date': '20200123',
-        },
-        'params': {
-            'skip_download': True,
-        },
     }, {
         'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
         'only_matching': True,
@@ -45,6 +31,10 @@ class ThisOldHouseIE(InfoExtractor):
     }, {
         'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
         'only_matching': True,
+    }, {
+        # iframe www.thisoldhouse.com
+        'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
+        'only_matching': True,
     }]
     _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
 
@@ -52,6 +42,6 @@ class ThisOldHouseIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         video_id = self._search_regex(
-            r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.|)thisoldhouse(?:\.chorus\.build|\.com)/videos/zype/([0-9a-f]{24})',
+            r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})',
             webpage, 'video id')
         return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)