[youtube] Relax TFA regexes
[youtube-dl] / youtube_dl / extractor / nytimes.py
index 266d8d4006377fc68e48c4329c0b985694367852..681683e86f54e796f1c954de2c0cb374016fe303 100644 (file)
@@ -18,8 +18,9 @@ class NYTimesBaseIE(InfoExtractor):
         description = video_data.get('summary')
         duration = float_or_none(video_data.get('duration'), 1000)
 
-        uploader = video_data['byline']
-        timestamp = parse_iso8601(video_data['publication_date'][:-8])
+        uploader = video_data.get('byline')
+        publication_date = video_data.get('publication_date')
+        timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
 
         def get_file_size(file_size):
             if isinstance(file_size, int):
@@ -37,7 +38,7 @@ class NYTimesBaseIE(InfoExtractor):
                 'width': int_or_none(video.get('width')),
                 'height': int_or_none(video.get('height')),
                 'filesize': get_file_size(video.get('fileSize')),
-            } for video in video_data['renditions']
+            } for video in video_data['renditions'] if video.get('url')
         ]
         self._sort_formats(formats)
 
@@ -46,7 +47,7 @@ class NYTimesBaseIE(InfoExtractor):
                 'url': 'http://www.nytimes.com/%s' % image['url'],
                 'width': int_or_none(image.get('width')),
                 'height': int_or_none(image.get('height')),
-            } for image in video_data['images']
+            } for image in video_data.get('images', []) if image.get('url')
         ]
 
         return {
@@ -89,8 +90,8 @@ class NYTimesIE(NYTimesBaseIE):
 
 
 class NYTimesArticleIE(NYTimesBaseIE):
-    _VALID_URL = r'https?://(?:www)?\.nytimes\.com/\d{4}/\d{2}/\d{2}/(?:[^/]+/)*(?P<id>[^.]+)\.html'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
+    _TESTS = [{
         'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
         'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
         'info_dict': {
@@ -102,7 +103,10 @@ class NYTimesArticleIE(NYTimesBaseIE):
             'upload_date': '20150414',
             'uploader': 'Matthew Williams',
         }
-    }
+    }, {
+        'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)