[extractor/generic] Fix typo (closes #14902)
[youtube-dl] / youtube_dl / extractor / generic.py
index 39630b6f691f6e9f05dd6229e4d6292d1a23e41a..c8b7c2e63fb709329ef4da6044e1b89a9d9cf6b4 100644 (file)
@@ -102,6 +102,7 @@ from .joj import JojIE
 from .megaphone import MegaphoneIE
 from .vzaar import VzaarIE
 from .channel9 import Channel9IE
+from .vshare import VShareIE
 
 
 class GenericIE(InfoExtractor):
@@ -1091,23 +1092,24 @@ class GenericIE(InfoExtractor):
                 'ext': 'mp4',
                 'upload_date': '20150212',
                 'uploader': 'The National Archives UK',
-                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
+                'description': 'md5:8078af856dca76edc42910b61273dbbf',
                 'uploader_id': 'NationalArchives08',
                 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
             },
         },
         # jwplayer rtmp
         {
-            'url': 'http://www.suffolk.edu/sjc/',
+            'url': 'http://www.suffolk.edu/sjc/live.php',
             'info_dict': {
-                'id': 'sjclive',
+                'id': 'live',
                 'ext': 'flv',
                 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
                 'uploader': 'www.suffolk.edu',
             },
             'params': {
                 'skip_download': True,
-            }
+            },
+            'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
         },
         # Complex jwplayer
         {
@@ -1116,6 +1118,7 @@ class GenericIE(InfoExtractor):
                 'id': 'videos',
                 'ext': 'mp4',
                 'title': 'king machine trailer 1',
+                'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
                 'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
@@ -1133,6 +1136,19 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
+        {
+            # JWPlatform iframe
+            'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
+            'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
+            'info_dict': {
+                'id': 'O0c5JcKT',
+                'ext': 'mp4',
+                'upload_date': '20171122',
+                'timestamp': 1511366290,
+                'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
+            },
+            'add_ie': [JWPlatformIE.ie_key()],
+        },
         {
             # Video.js embed, multiple formats
             'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
@@ -1168,7 +1184,7 @@ class GenericIE(InfoExtractor):
             'playlist_mincount': 5,
             'info_dict': {
                 'id': 'aanslagen-kopenhagen',
-                'title': 'Aanslagen Kopenhagen | RTL Nieuws',
+                'title': 'Aanslagen Kopenhagen',
             }
         },
         # Zapiks embed
@@ -1300,6 +1316,7 @@ class GenericIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'This video is unavailable.',
         },
         # Pladform embed
         {
@@ -1313,6 +1330,7 @@ class GenericIE(InfoExtractor):
                 'duration': 694,
                 'age_limit': 0,
             },
+            'skip': 'HTTP Error 404: Not Found',
         },
         # Playwire embed
         {
@@ -1333,6 +1351,14 @@ class GenericIE(InfoExtractor):
                 'id': '518726732',
                 'ext': 'mp4',
                 'title': 'Facebook Creates "On This Day" | Crunch Report',
+                'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
+                'timestamp': 1427237531,
+                'uploader': 'Crunch Report',
+                'upload_date': '20150324',
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
             },
         },
         # SVT embed
@@ -1384,16 +1410,20 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20140107',
                 'timestamp': 1389118457,
             },
+            'skip': 'Invalid Page URL',
         },
         # NBC News embed
         {
             'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
             'md5': '1aa589c675898ae6d37a17913cf68d66',
             'info_dict': {
-                'id': '701714499682',
+                'id': 'x_dtl_oa_LettermanliftPR_160608',
                 'ext': 'mp4',
-                'title': 'PREVIEW: On Assignment: David Letterman',
+                'title': 'David Letterman: A Preview',
                 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
+                'upload_date': '20160609',
+                'timestamp': 1465431544,
+                'uploader': 'NBCU-NEWS',
             },
         },
         # UDN embed
@@ -1410,6 +1440,7 @@ class GenericIE(InfoExtractor):
                 # m3u8 download
                 'skip_download': True,
             },
+            'expected_warnings': ['Failed to parse JSON Expecting value'],
         },
         # Ooyala embed
         {
@@ -1417,7 +1448,7 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
                 'ext': 'mp4',
-                'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
+                'description': 'Index/Match versus VLOOKUP.',
                 'title': 'This is what separates the Excel masters from the wannabes',
                 'duration': 191.933,
             },
@@ -1455,7 +1486,8 @@ class GenericIE(InfoExtractor):
                 'upload_date': '20150622',
                 'uploader': 'Public Sénat',
                 'uploader_id': 'xa9gza',
-            }
+            },
+            'skip': 'File not found.',
         },
         # OnionStudios embed
         {
@@ -1903,6 +1935,16 @@ class GenericIE(InfoExtractor):
                 'title': 'Rescue Kit 14 Free Edition - Getting started',
             },
             'playlist_count': 4,
+        },
+        {
+            # vshare embed
+            'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
+            'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+            'info_dict': {
+                'id': '0f64ce6',
+                'title': 'vl14062007715967',
+                'ext': 'mp4',
+            }
         }
         # {
         #     # TODO: find another test
@@ -2153,7 +2195,7 @@ class GenericIE(InfoExtractor):
                 return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                 info_dict['formats'] = self._parse_mpd_formats(
-                    doc, video_id,
+                    doc,
                     mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
                     mpd_url=url)
                 self._sort_formats(info_dict['formats'])
@@ -2253,7 +2295,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded rtl.nl player
         matches = re.findall(
-            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
+            r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
             webpage)
         if matches:
             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
@@ -2652,7 +2694,7 @@ class GenericIE(InfoExtractor):
 
         # Look for UDN embeds
         mobj = re.search(
-            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
+            r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
         if mobj is not None:
             return self.url_result(
                 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -2861,6 +2903,11 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
 
+        vshare_urls = VShareIE._extract_urls(webpage)
+        if vshare_urls:
+            return self.playlist_from_matches(
+                vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
+
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():