[thisoldhouse] Fix video id extraction (closes #24548)
authorAndrewMBL <62922222+AndrewMBL@users.noreply.github.com>
Tue, 31 Mar 2020 04:25:04 +0000 (15:25 +1100)
committerSergey M․ <dstftw@gmail.com>
Sat, 11 Apr 2020 13:07:32 +0000 (20:07 +0700)
Added support for:
with of without "www."
and either  ".chorus.build" or ".com"

It now validated correctly on older URL's
```
<iframe src="https://thisoldhouse.chorus.build/videos/zype/5e33baec27d2e50001d5f52f
```
and newer ones
```
<iframe src="https://www.thisoldhouse.com/videos/zype/5e2b70e95216cc0001615120
```

youtube_dl/extractor/thisoldhouse.py

index 387f955eee5752ac8797c85375070ba77a897075..33269705f48fd0f458699840bc711d7de017b6df 100644 (file)
@@ -19,6 +19,20 @@ class ThisOldHouseIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
+        'note': 'test for updated video URL',
+        'info_dict': {
+            'id': '5e2b70e95216cc0001615120',
+            'ext': 'mp4',
+            'title': 'E12 | The Westerly Project | Seaside Transformation',
+            'description': 'Kevin and Tommy take the tour with the homeowners and Jeff. Norm presents his pine coffee table. Jenn gives Tommy the garden tour. Everyone meets at the flagpole to raise the flags.',
+            'timestamp': 1579755600,
+            'upload_date': '20200123',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
         'only_matching': True,
@@ -38,6 +52,6 @@ class ThisOldHouseIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         video_id = self._search_regex(
-            r'<iframe[^>]+src=[\'"](?:https?:)?//thisoldhouse\.chorus\.build/videos/zype/([0-9a-f]{24})',
+            r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.|)thisoldhouse(?:\.chorus\.build|\.com)/videos/zype/([0-9a-f]{24})',
             webpage, 'video id')
         return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)