[README.md] Make download URLs consistent
[youtube-dl] / youtube_dl / extractor / generic.py
index 31527d1c6cd6f6aebb468c2d6e9e3af7bcf96076..6d346cb1c1b26b41af3e4cd6613d1fc63c5e86e6 100644 (file)
@@ -1249,6 +1249,20 @@ class GenericIE(InfoExtractor):
                 'uploader': 'www.hudl.com',
             },
         },
+        # twitter:player:stream embed
+        {
+            'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
+            'info_dict': {
+                'id': 'master',
+                'ext': 'mp4',
+                'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
+                'uploader': 'www.rtl.be',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            },
+        },
         # twitter:player embed
         {
             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
@@ -1313,6 +1327,38 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
+        {
+            # Non-standard Vimeo embed
+            'url': 'https://openclassrooms.com/courses/understanding-the-web',
+            'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
+            'info_dict': {
+                'id': '148867247',
+                'ext': 'mp4',
+                'title': 'Understanding the web - Teaser',
+                'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
+                'upload_date': '20151214',
+                'uploader': 'OpenClassrooms',
+                'uploader_id': 'openclassrooms',
+            },
+            'add_ie': ['Vimeo'],
+        },
+        # {
+        #     # TODO: find another test
+        #     # http://schema.org/VideoObject
+        #     'url': 'https://flipagram.com/f/nyvTSJMKId',
+        #     'md5': '888dcf08b7ea671381f00fab74692755',
+        #     'info_dict': {
+        #         'id': 'nyvTSJMKId',
+        #         'ext': 'mp4',
+        #         'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+        #         'description': '#love for cats.',
+        #         'timestamp': 1461244995,
+        #         'upload_date': '20160421',
+        #     },
+        #     'params': {
+        #         'force_generic_extractor': True,
+        #     },
+        # }
     ]
 
     def report_following_redirect(self, new_url):
@@ -2152,10 +2198,18 @@ class GenericIE(InfoExtractor):
                 'uploader': video_uploader,
             }
 
-        # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
-        embed_url = self._html_search_meta('twitter:player', webpage, default=None)
-        if embed_url:
-            return self.url_result(embed_url)
+        # Looking for http://schema.org/VideoObject
+        json_ld = self._search_json_ld(
+            webpage, video_id, default=None, expected_type='VideoObject')
+        if json_ld and json_ld.get('url'):
+            info_dict.update({
+                'title': video_title or info_dict['title'],
+                'description': video_description,
+                'thumbnail': video_thumbnail,
+                'age_limit': age_limit
+            })
+            info_dict.update(json_ld)
+            return info_dict
 
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
@@ -2200,6 +2254,9 @@ class GenericIE(InfoExtractor):
                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
         if not found:
             # Try to find twitter cards info
+            # twitter:player:stream should be checked before twitter:player since
+            # it is expected to contain a raw stream (see
+            # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
             found = filter_video(re.findall(
                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
         if not found:
@@ -2233,6 +2290,15 @@ class GenericIE(InfoExtractor):
                     '_type': 'url',
                     'url': new_url,
                 }
+
+        if not found:
+            # twitter:player is a https URL to iframe player that may or may not
+            # be supported by youtube-dl thus this is checked the very last (see
+            # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+            embed_url = self._html_search_meta('twitter:player', webpage, default=None)
+            if embed_url:
+                return self.url_result(embed_url)
+
         if not found:
             raise UnsupportedError(url)