]> git.bitcoin.ninja Git - youtube-dl/commitdiff
Merge pull request #2342 from MikeCol/tube8
authorFilippo Valsorda <filosottile.wiki@gmail.com>
Sat, 8 Feb 2014 03:00:50 +0000 (04:00 +0100)
committerFilippo Valsorda <filosottile.wiki@gmail.com>
Sat, 8 Feb 2014 03:00:50 +0000 (04:00 +0100)
[Tube8] Extended valid urls schema

youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/channel9.py
youtube_dl/extractor/chilloutzone.py [new file with mode: 0644]
youtube_dl/extractor/ivi.py
youtube_dl/extractor/nfb.py

index fed2d91dc450009a4cd3bcc3f07cbf50de1cd52e..e81366851fec4e495e571e2bad1ed4ab06d3fcc6 100644 (file)
@@ -41,6 +41,7 @@ __authors__  = (
     'Chris Gahan',
     'Saimadhav Heblikar',
     'Mike Col',
+    'Andreas Schmitz',
 )
 
 __license__ = 'Public Domain'
index a13b5cfb8ea957e2f50b9c2216b537580e391ee4..c0a57c73d860d2320ed60126ef69d8a5e1660fec 100644 (file)
@@ -25,6 +25,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .channel9 import Channel9IE
+from .chilloutzone import ChilloutzoneIE
 from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
index 3867d785051dbf41608ae578b945a2c4e7add235..4f000292b7c4273c40df11252852986df08f5e01 100644 (file)
@@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
     '''
     IE_DESC = 'Channel 9'
     IE_NAME = 'channel9'
-    _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
+    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
 
     _TESTS = [
         {
             'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
-            'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
             'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
             'info_dict': {
+                'id': 'Events/TechEd/Australia/2013/KOS002',
+                'ext': 'mp4',
                 'title': 'Developer Kick-Off Session: Stuff We Love',
                 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
                 'duration': 4576,
@@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
         },
         {
             'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
-            'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
             'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
             'info_dict': {
+                'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
+                'ext': 'mp4',
                 'title': 'Self-service BI with Power BI - nuclear testing',
                 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
                 'duration': 1540,
diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py
new file mode 100644 (file)
index 0000000..524f06d
--- /dev/null
@@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    ExtractorError
+)
+
+
+class ChilloutzoneIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
+        'md5': 'a76f3457e813ea0037e5244f509e66d1',
+        'info_dict': {
+            'id': 'enemene-meck-alle-katzen-weg',
+            'ext': 'mp4',
+            'title': 'Enemene Meck - Alle Katzen weg',
+            'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
+        },
+    }, {
+        'note': 'Video hosted at YouTube',
+        'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
+        'info_dict': {
+            'id': '1YVQaAgHyRU',
+            'ext': 'mp4',
+            'title': '16 Photos Taken 1 Second Before Disaster',
+            'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
+            'uploader': 'BuzzFeedVideo',
+            'uploader_id': 'BuzzFeedVideo',
+            'upload_date': '20131105',
+        },
+    }, {
+        'note': 'Video hosted at Vimeo',
+        'url': 'http://www.chilloutzone.net/video/icon-blending.html',
+        'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
+        'info_dict': {
+            'id': '85523671',
+            'ext': 'mp4',
+            'title': 'The Sunday Times - Icons',
+            'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
+            'uploader': 'Us',
+            'uploader_id': 'usfilms',
+            'upload_date': '20140131'
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        base64_video_info = self._html_search_regex(
+            r'var cozVidData = "(.+?)";', webpage, 'video data')
+        decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
+        video_info_dict = json.loads(decoded_video_info)
+
+        # get video information from dict
+        video_url = video_info_dict['mediaUrl']
+        description = clean_html(video_info_dict.get('description'))
+        title = video_info_dict['title']
+        native_platform = video_info_dict['nativePlatform']
+        native_video_id = video_info_dict['nativeVideoId']
+        source_priority = video_info_dict['sourcePriority']
+
+        # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
+        if native_platform is None:
+            youtube_url = self._html_search_regex(
+                r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
+                webpage, 'fallback video URL', default=None)
+            if youtube_url is not None:
+                return self.url_result(youtube_url, ie='Youtube')
+
+        # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
+        # the own CDN
+        if source_priority == 'native':
+            if native_platform == 'youtube':
+                return self.url_result(native_video_id, ie='Youtube')
+            if native_platform == 'vimeo':
+                return self.url_result(
+                    'http://vimeo.com/' + native_video_id, ie='Vimeo')
+
+        if not video_url:
+            raise ExtractorError('No video found')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': title,
+            'description': description,
+        }
index 18dd9cb1e467a7503b4d095a797b3e7d73850502..1ba4966c724ee15637dc0f2d08d3029dec16f4e3 100644 (file)
@@ -14,15 +14,16 @@ from ..utils import (
 class IviIE(InfoExtractor):
     IE_DESC = 'ivi.ru'
     IE_NAME = 'ivi'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
 
     _TESTS = [
         # Single movie
         {
             'url': 'http://www.ivi.ru/watch/53141',
-            'file': '53141.mp4',
             'md5': '6ff5be2254e796ed346251d117196cf4',
             'info_dict': {
+                'id': '53141',
+                'ext': 'mp4',
                 'title': 'Иван Васильевич меняет профессию',
                 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
                 'duration': 5498,
@@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
         # Serial's serie
         {
             'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
-            'file': '74791.mp4',
             'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
             'info_dict': {
+                'id': '74791',
+                'ext': 'mp4',
                 'title': 'Дежурный ангел - 1 серия',
                 'duration': 2490,
                 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
@@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
 class IviCompilationIE(InfoExtractor):
     IE_DESC = 'ivi.ru compilations'
     IE_NAME = 'ivi:compilation'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
 
     def _extract_entries(self, html, compilation_id):
         return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
index 92b4bb8df9e8b310ae5b044deb6d523099b3d8bb..a8c514f53d96a016e3147b21f9e6f438b9450904 100644 (file)
@@ -49,20 +49,37 @@ class NFBIE(InfoExtractor):
 
         config = self._download_xml(request, video_id, 'Downloading player config XML')
 
-        thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text
-        video = config.find("./player/stream/media[@type='video']")
-        duration = int(video.get('duration'))
-        title = video.find('title').text
-        description = video.find('description').text
+        title = None
+        description = None
+        thumbnail = None
+        duration = None
+        formats = []
 
-        # It seems assets always go from lower to better quality, so no need to sort
-        formats = [{
-            'url': x.find('default/streamerURI').text + '/',
-            'play_path': x.find('default/url').text,
-            'rtmp_live': False,
-            'ext': 'mp4',
-            'format_id': x.get('quality'),
-        } for x in video.findall('assets/asset')]
+        def extract_thumbnail(media):
+            thumbnails = {}
+            for asset in media.findall('assets/asset'):
+                thumbnails[asset.get('quality')] = asset.find('default/url').text
+            if not thumbnails:
+                return None
+            if 'high' in thumbnails:
+                return thumbnails['high']
+            return list(thumbnails.values())[0]
+
+        for media in config.findall('./player/stream/media'):
+            if media.get('type') == 'posterImage':
+                thumbnail = extract_thumbnail(media)
+            elif media.get('type') == 'video':
+                duration = int(media.get('duration'))
+                title = media.find('title').text
+                description = media.find('description').text
+                # It seems assets always go from lower to better quality, so no need to sort
+                formats = [{
+                    'url': x.find('default/streamerURI').text + '/',
+                    'play_path': x.find('default/url').text,
+                    'rtmp_live': False,
+                    'ext': 'mp4',
+                    'format_id': x.get('quality'),
+                } for x in media.findall('assets/asset')]
 
         return {
             'id': video_id,