Merge pull request #2342 from MikeCol/tube8

author Filippo Valsorda <filosottile.wiki@gmail.com>

Sat, 8 Feb 2014 03:00:50 +0000 (04:00 +0100)

committer Filippo Valsorda <filosottile.wiki@gmail.com>

Sat, 8 Feb 2014 03:00:50 +0000 (04:00 +0100)
author Filippo Valsorda <filosottile.wiki@gmail.com>
Sat, 8 Feb 2014 03:00:50 +0000 (04:00 +0100)
committer Filippo Valsorda <filosottile.wiki@gmail.com>
Sat, 8 Feb 2014 03:00:50 +0000 (04:00 +0100)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index fed2d91dc450009a4cd3bcc3f07cbf50de1cd52e..e81366851fec4e495e571e2bad1ed4ab06d3fcc6 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -41,6 +41,7 @@ __authors__  = (
      'Chris Gahan',
      'Saimadhav Heblikar',
      'Mike Col',
+    'Andreas Schmitz',
  )
  
  __license__ = 'Public Domain'
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index a13b5cfb8ea957e2f50b9c2216b537580e391ee4..c0a57c73d860d2320ed60126ef69d8a5e1660fec 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -25,6 +25,7 @@ from .canalplus import CanalplusIE
  from .canalc2 import Canalc2IE
  from .cbs import CBSIE
  from .channel9 import Channel9IE
+from .chilloutzone import ChilloutzoneIE
  from .cinemassacre import CinemassacreIE
  from .clipfish import ClipfishIE
  from .cliphunter import CliphunterIE
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py

index 3867d785051dbf41608ae578b945a2c4e7add235..4f000292b7c4273c40df11252852986df08f5e01 100644 (file)
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
      '''
      IE_DESC = 'Channel 9'
      IE_NAME = 'channel9'
-    _VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
+    _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
  
      _TESTS = [
          {
              'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
-            'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
              'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
              'info_dict': {
+                'id': 'Events/TechEd/Australia/2013/KOS002',
+                'ext': 'mp4',
                  'title': 'Developer Kick-Off Session: Stuff We Love',
                  'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
                  'duration': 4576,
@@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
          },
          {
              'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
-            'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
              'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
              'info_dict': {
+                'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
+                'ext': 'mp4',
                  'title': 'Self-service BI with Power BI - nuclear testing',
                  'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
                  'duration': 1540,
diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py

new file mode 100644 (file)

index 0000000..524f06d
--- /dev/null
+++ b/youtube_dl/extractor/chilloutzone.py
@@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    ExtractorError
+)
+
+
+class ChilloutzoneIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
+        'md5': 'a76f3457e813ea0037e5244f509e66d1',
+        'info_dict': {
+            'id': 'enemene-meck-alle-katzen-weg',
+            'ext': 'mp4',
+            'title': 'Enemene Meck - Alle Katzen weg',
+            'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
+        },
+    }, {
+        'note': 'Video hosted at YouTube',
+        'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
+        'info_dict': {
+            'id': '1YVQaAgHyRU',
+            'ext': 'mp4',
+            'title': '16 Photos Taken 1 Second Before Disaster',
+            'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
+            'uploader': 'BuzzFeedVideo',
+            'uploader_id': 'BuzzFeedVideo',
+            'upload_date': '20131105',
+        },
+    }, {
+        'note': 'Video hosted at Vimeo',
+        'url': 'http://www.chilloutzone.net/video/icon-blending.html',
+        'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
+        'info_dict': {
+            'id': '85523671',
+            'ext': 'mp4',
+            'title': 'The Sunday Times - Icons',
+            'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
+            'uploader': 'Us',
+            'uploader_id': 'usfilms',
+            'upload_date': '20140131'
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        base64_video_info = self._html_search_regex(
+            r'var cozVidData = "(.+?)";', webpage, 'video data')
+        decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
+        video_info_dict = json.loads(decoded_video_info)
+
+        # get video information from dict
+        video_url = video_info_dict['mediaUrl']
+        description = clean_html(video_info_dict.get('description'))
+        title = video_info_dict['title']
+        native_platform = video_info_dict['nativePlatform']
+        native_video_id = video_info_dict['nativeVideoId']
+        source_priority = video_info_dict['sourcePriority']
+
+        # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
+        if native_platform is None:
+            youtube_url = self._html_search_regex(
+                r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
+                webpage, 'fallback video URL', default=None)
+            if youtube_url is not None:
+                return self.url_result(youtube_url, ie='Youtube')
+
+        # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
+        # the own CDN
+        if source_priority == 'native':
+            if native_platform == 'youtube':
+                return self.url_result(native_video_id, ie='Youtube')
+            if native_platform == 'vimeo':
+                return self.url_result(
+                    'http://vimeo.com/' + native_video_id, ie='Vimeo')
+
+        if not video_url:
+            raise ExtractorError('No video found')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': title,
+            'description': description,
+        }
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py

index 18dd9cb1e467a7503b4d095a797b3e7d73850502..1ba4966c724ee15637dc0f2d08d3029dec16f4e3 100644 (file)
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -14,15 +14,16 @@ from ..utils import (
  class IviIE(InfoExtractor):
      IE_DESC = 'ivi.ru'
      IE_NAME = 'ivi'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
  
      _TESTS = [
          # Single movie
          {
              'url': 'http://www.ivi.ru/watch/53141',
-            'file': '53141.mp4',
              'md5': '6ff5be2254e796ed346251d117196cf4',
              'info_dict': {
+                'id': '53141',
+                'ext': 'mp4',
                  'title': 'Иван Васильевич меняет профессию',
                  'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
                  'duration': 5498,
@@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
          # Serial's serie
          {
              'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
-            'file': '74791.mp4',
              'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
              'info_dict': {
+                'id': '74791',
+                'ext': 'mp4',
                  'title': 'Дежурный ангел - 1 серия',
                  'duration': 2490,
                  'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
@@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
  class IviCompilationIE(InfoExtractor):
      IE_DESC = 'ivi.ru compilations'
      IE_NAME = 'ivi:compilation'
-    _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
  
      def _extract_entries(self, html, compilation_id):
          return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py

index 92b4bb8df9e8b310ae5b044deb6d523099b3d8bb..a8c514f53d96a016e3147b21f9e6f438b9450904 100644 (file)
--- a/youtube_dl/extractor/nfb.py
+++ b/youtube_dl/extractor/nfb.py
@@ -49,20 +49,37 @@ class NFBIE(InfoExtractor):
  
          config = self._download_xml(request, video_id, 'Downloading player config XML')
  
-        thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text
-        video = config.find("./player/stream/media[@type='video']")
-        duration = int(video.get('duration'))
-        title = video.find('title').text
-        description = video.find('description').text
+        title = None
+        description = None
+        thumbnail = None
+        duration = None
+        formats = []
  
-        # It seems assets always go from lower to better quality, so no need to sort
-        formats = [{
-            'url': x.find('default/streamerURI').text + '/',
-            'play_path': x.find('default/url').text,
-            'rtmp_live': False,
-            'ext': 'mp4',
-            'format_id': x.get('quality'),
-        } for x in video.findall('assets/asset')]
+        def extract_thumbnail(media):
+            thumbnails = {}
+            for asset in media.findall('assets/asset'):
+                thumbnails[asset.get('quality')] = asset.find('default/url').text
+            if not thumbnails:
+                return None
+            if 'high' in thumbnails:
+                return thumbnails['high']
+            return list(thumbnails.values())[0]
+
+        for media in config.findall('./player/stream/media'):
+            if media.get('type') == 'posterImage':
+                thumbnail = extract_thumbnail(media)
+            elif media.get('type') == 'video':
+                duration = int(media.get('duration'))
+                title = media.find('title').text
+                description = media.find('description').text
+                # It seems assets always go from lower to better quality, so no need to sort
+                formats = [{
+                    'url': x.find('default/streamerURI').text + '/',
+                    'play_path': x.find('default/url').text,
+                    'rtmp_live': False,
+                    'ext': 'mp4',
+                    'format_id': x.get('quality'),
+                } for x in media.findall('assets/asset')]
  
          return {
              'id': video_id,
author	Filippo Valsorda <filosottile.wiki@gmail.com>
	Sat, 8 Feb 2014 03:00:50 +0000 (04:00 +0100)
committer	Filippo Valsorda <filosottile.wiki@gmail.com>
	Sat, 8 Feb 2014 03:00:50 +0000 (04:00 +0100)
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/channel9.py		patch \| blob \| history
youtube_dl/extractor/chilloutzone.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ivi.py		patch \| blob \| history
youtube_dl/extractor/nfb.py		patch \| blob \| history