Merge branch 'patch-8239' of https://github.com/ping/youtube-dl into ping-patch-8239
authorYen Chi Hsuan <yan12125@gmail.com>
Tue, 19 Jan 2016 20:15:46 +0000 (04:15 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Tue, 19 Jan 2016 20:15:46 +0000 (04:15 +0800)
19 files changed:
README.md
youtube_dl/YoutubeDL.py
youtube_dl/compat.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/ae.py [deleted file]
youtube_dl/extractor/aenetworks.py [new file with mode: 0644]
youtube_dl/extractor/atresplayer.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/common.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/digiteka.py [new file with mode: 0644]
youtube_dl/extractor/generic.py
youtube_dl/extractor/lemonde.py [new file with mode: 0644]
youtube_dl/extractor/letv.py
youtube_dl/extractor/ultimedia.py [deleted file]
youtube_dl/extractor/ustream.py
youtube_dl/extractor/viewster.py
youtube_dl/extractor/weiqitv.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py

index efb51027bdf55ba7e0714bc380f458347f8078fb..9dbeae1bcf8cbb47a66b56493b8bd9ef609cce6b 100644 (file)
--- a/README.md
+++ b/README.md
@@ -529,6 +529,9 @@ $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
 
 # Download best video only format but no bigger that 50 MB
 $ youtube-dl -f 'best[filesize<50M]'
+
+# Download best format available via direct link over HTTP/HTTPS protocol
+$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
 ```
 
 
index 18b1443b887ca323e87348636c0febbb878705cc..09d2b18f262690023598dcc0aacf0628729a9c79 100755 (executable)
@@ -46,6 +46,7 @@ from .utils import (
     DateRange,
     DEFAULT_OUTTMPL,
     determine_ext,
+    determine_protocol,
     DownloadError,
     encode_compat_str,
     encodeFilename,
@@ -1309,6 +1310,10 @@ class YoutubeDL(object):
             # Automatically determine file extension if missing
             if 'ext' not in format:
                 format['ext'] = determine_ext(format['url']).lower()
+            # Automatically determine protocol if missing (useful for format
+            # selection purposes)
+            if 'protocol' not in format:
+                format['protocol'] = determine_protocol(format)
             # Add HTTP headers, so that external programs can use them from the
             # json output
             full_format_info = info_dict.copy()
index a3e85264acda8dbefe0883ea50c901302d01d29f..8ab6880011f6084cc1b28d928dec904740c0c6ff 100644 (file)
@@ -433,7 +433,7 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
 else:
     compat_getpass = getpass.getpass
 
-# Old 2.6 and 2.7 releases require kwargs to be bytes
+# Python < 2.6.5 require kwargs to be bytes
 try:
     def _testfunc(x):
         pass
index 2c56797a50a833bb6783c946e2aa776cf9c87e07..bab3d7b46d4fc5efe3c1bd40421b2325fd740376 100644 (file)
@@ -15,7 +15,7 @@ from .adobetv import (
     AdobeTVVideoIE,
 )
 from .adultswim import AdultSwimIE
-from .ae import AEIE
+from .aenetworks import AENetworksIE
 from .aftonbladet import AftonbladetIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
@@ -333,10 +333,12 @@ from .kuwo import (
 from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
 from .lecture2go import Lecture2GoIE
+from .lemonde import LemondeIE
 from .letv import (
     LetvIE,
     LetvTvIE,
-    LetvPlaylistIE
+    LetvPlaylistIE,
+    LetvCloudIE,
 )
 from .libsyn import LibsynIE
 from .lifenews import (
@@ -777,7 +779,7 @@ from .udemy import (
     UdemyCourseIE
 )
 from .udn import UDNEmbedIE
-from .ultimedia import UltimediaIE
+from .digiteka import DigitekaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
 from .ustream import UstreamIE, UstreamChannelIE
@@ -856,6 +858,7 @@ from .webofstories import (
     WebOfStoriesPlaylistIE,
 )
 from .weibo import WeiboIE
+from .weiqitv import WeiqiTVIE
 from .wimp import WimpIE
 from .wistia import WistiaIE
 from .worldstarhiphop import WorldStarHipHopIE
diff --git a/youtube_dl/extractor/ae.py b/youtube_dl/extractor/ae.py
deleted file mode 100644 (file)
index 3bc7c12..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import smuggle_url
-
-
-class AEIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
-
-    _TESTS = [{
-        'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
-        'info_dict': {
-            'id': 'g12m5Gyt3fdR',
-            'ext': 'mp4',
-            'title': "Bet You Didn't Know: Valentine's Day",
-            'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-        'add_ie': ['ThePlatform'],
-    }, {
-        'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
-        'info_dict': {
-            'id': 'eg47EERs_JsZ',
-            'ext': 'mp4',
-            'title': "Winter Is Coming",
-            'description': 'md5:a40e370925074260b1c8a633c632c63a',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-        'add_ie': ['ThePlatform'],
-    }, {
-        'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
-        'only_matching': True
-    }, {
-        'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
-        'only_matching': True
-    }, {
-        'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
-        'only_matching': True
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-
-        video_url_re = [
-            r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
-            r"media_url\s*=\s*'([^']+)'"
-        ]
-        video_url = self._search_regex(video_url_re, webpage, 'video url')
-
-        info = self._search_json_ld(webpage, video_id, fatal=False)
-        info.update({
-            '_type': 'url_transparent',
-            'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}),
-        })
-        return info
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
new file mode 100644 (file)
index 0000000..43d7b05
--- /dev/null
@@ -0,0 +1,66 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class AENetworksIE(InfoExtractor):
+    IE_NAME = 'aenetworks'
+    IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
+    _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
+
+    _TESTS = [{
+        'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
+        'info_dict': {
+            'id': 'g12m5Gyt3fdR',
+            'ext': 'mp4',
+            'title': "Bet You Didn't Know: Valentine's Day",
+            'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['ThePlatform'],
+        'expected_warnings': ['JSON-LD'],
+    }, {
+        'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
+        'info_dict': {
+            'id': 'eg47EERs_JsZ',
+            'ext': 'mp4',
+            'title': "Winter Is Coming",
+            'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['ThePlatform'],
+    }, {
+        'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
+        'only_matching': True
+    }, {
+        'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
+        'only_matching': True
+    }, {
+        'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
+        'only_matching': True
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url_re = [
+            r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
+            r"media_url\s*=\s*'([^']+)'"
+        ]
+        video_url = self._search_regex(video_url_re, webpage, 'video url')
+
+        info = self._search_json_ld(webpage, video_id, fatal=False)
+        info.update({
+            '_type': 'url_transparent',
+            'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}),
+        })
+        return info
index 3fb042cea4bfa68f0624cadf9e65e2d9e27c3c4a..b8f9ae005fbb0ab41589258886b8342eba0ac288 100644 (file)
@@ -132,11 +132,6 @@ class AtresPlayerIE(InfoExtractor):
                 })
             formats.append(format_info)
 
-        m3u8_url = player.get('urlVideoHls')
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
-
         timestamp = int_or_none(self._download_webpage(
             self._TIME_API_URL,
             video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
index ce99a34abf109a8d8daef6c23643281d079afa9c..1c493b72d9d4a08accaea8f8f7dc68fe693f3fe9 100644 (file)
@@ -718,19 +718,10 @@ class BBCIE(BBCCoUkIE):
 
         webpage = self._download_webpage(url, playlist_id)
 
-        timestamp = None
-        playlist_title = None
-        playlist_description = None
-
-        ld = self._parse_json(
-            self._search_regex(
-                r'(?s)<script type="application/ld\+json">(.+?)</script>',
-                webpage, 'ld json', default='{}'),
-            playlist_id, fatal=False)
-        if ld:
-            timestamp = parse_iso8601(ld.get('datePublished'))
-            playlist_title = ld.get('headline')
-            playlist_description = ld.get('articleBody')
+        json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
+        timestamp = json_ld_info.get('timestamp')
+        playlist_title = json_ld_info.get('title')
+        playlist_description = json_ld_info.get('description')
 
         if not timestamp:
             timestamp = parse_iso8601(self._search_regex(
index 92e2e4f438e319d18db283d2493b52d9fa94a7e9..8da70ae14409e4868c9ab72766c49c2b10454b9d 100644 (file)
@@ -763,13 +763,13 @@ class InfoExtractor(object):
         return self._html_search_meta('twitter:player', html,
                                       'twitter card player')
 
-    def _search_json_ld(self, html, video_id, fatal=True):
+    def _search_json_ld(self, html, video_id, **kwargs):
         json_ld = self._search_regex(
             r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
-            html, 'JSON-LD', fatal=fatal, group='json_ld')
+            html, 'JSON-LD', group='json_ld', **kwargs)
         if not json_ld:
             return {}
-        return self._json_ld(json_ld, video_id, fatal=fatal)
+        return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
 
     def _json_ld(self, json_ld, video_id, fatal=True):
         if isinstance(json_ld, compat_str):
index b687ec4d664579f64bc0dd8b484d3f837475fad2..6e462af6904ab5ee0915f3224f21b210d58ab931 100644 (file)
@@ -37,7 +37,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
 
 
 class DailymotionIE(DailymotionBaseInfoExtractor):
-    _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
+    _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:embed|swf|#)/)?video/(?P<id>[^/?_]+)'
     IE_NAME = 'dailymotion'
 
     _FORMATS = [
@@ -104,6 +104,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
         {
             'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
             'only_matching': True,
+        },
+        {
+            'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
+            'only_matching': True,
         }
     ]
 
@@ -336,7 +340,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
 
 class DailymotionUserIE(DailymotionPlaylistIE):
     IE_NAME = 'dailymotion:user'
-    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
     _TESTS = [{
         'url': 'https://www.dailymotion.com/user/nqtv',
diff --git a/youtube_dl/extractor/digiteka.py b/youtube_dl/extractor/digiteka.py
new file mode 100644 (file)
index 0000000..7bb79ff
--- /dev/null
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DigitekaIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
+        (?:
+            deliver/
+            (?P<embed_type>
+                generic|
+                musique
+            )
+            (?:/[^/]+)*/
+            (?:
+                src|
+                article
+            )|
+            default/index/video
+            (?P<site_type>
+                generic|
+                music
+            )
+            /id
+        )/(?P<id>[\d+a-z]+)'''
+    _TESTS = [{
+        # news
+        'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+        'md5': '276a0e49de58c7e85d32b057837952a2',
+        'info_dict': {
+            'id': 's8uk0r',
+            'ext': 'mp4',
+            'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 74,
+            'upload_date': '20150317',
+            'timestamp': 1426604939,
+            'uploader_id': '3fszv',
+        },
+    }, {
+        # music
+        'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+        'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+        'info_dict': {
+            'id': 'xvpfp8',
+            'ext': 'mp4',
+            'title': 'Two - C\'est La Vie (clip)',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 233,
+            'upload_date': '20150224',
+            'timestamp': 1424760500,
+            'uploader_id': '3rfzk',
+        },
+    }, {
+        'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        video_type = mobj.group('embed_type') or mobj.group('site_type')
+        if video_type == 'music':
+            video_type = 'musique'
+
+        deliver_info = self._download_json(
+            'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
+            video_id)
+
+        yt_id = deliver_info.get('yt_id')
+        if yt_id:
+            return self.url_result(yt_id, 'Youtube')
+
+        jwconf = deliver_info['jwconf']
+
+        formats = []
+        for source in jwconf['playlist'][0]['sources']:
+            formats.append({
+                'url': source['file'],
+                'format_id': source.get('label'),
+            })
+
+        self._sort_formats(formats)
+
+        title = deliver_info['title']
+        thumbnail = jwconf.get('image')
+        duration = int_or_none(deliver_info.get('duration'))
+        timestamp = int_or_none(deliver_info.get('release_time'))
+        uploader_id = deliver_info.get('owner_id')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'uploader_id': uploader_id,
+            'formats': formats,
+        }
index b3f8efc80ba5f6ecaf99f528832f5ad4db8b11c3..26d3698c8b1e99a6899037a0a514e0a6480b2293 100644 (file)
@@ -57,7 +57,7 @@ from .pladform import PladformIE
 from .videomore import VideomoreIE
 from .googledrive import GoogleDriveIE
 from .jwplatform import JWPlatformIE
-from .ultimedia import UltimediaIE
+from .digiteka import DigitekaIE
 
 
 class GenericIE(InfoExtractor):
@@ -1402,7 +1402,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded Dailymotion player
         matches = re.findall(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
+            r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
         if matches:
             return _playlist_from_matches(
                 matches, lambda m: unescapeHTML(m[1]))
@@ -1814,10 +1814,10 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
 
-        # Look for Ulltimedia embeds
-        ultimedia_url = UltimediaIE._extract_url(webpage)
-        if ultimedia_url:
-            return self.url_result(self._proto_relative_url(ultimedia_url), 'Ultimedia')
+        # Look for Digiteka embeds
+        digiteka_url = DigitekaIE._extract_url(webpage)
+        if digiteka_url:
+            return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
 
         # Look for AdobeTVVideo embeds
         mobj = re.search(
diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dl/extractor/lemonde.py
new file mode 100644 (file)
index 0000000..be66fff
--- /dev/null
@@ -0,0 +1,34 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LemondeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
+        'md5': '01fb3c92de4c12c573343d63e163d302',
+        'info_dict': {
+            'id': 'lqm3kl',
+            'ext': 'mp4',
+            'title': "Comprendre l'affaire Bygmalion en 5 minutes",
+            'thumbnail': 're:^https?://.*\.jpg',
+            'duration': 320,
+            'upload_date': '20160119',
+            'timestamp': 1453194778,
+            'uploader_id': '3pmkp',
+        },
+    }, {
+        'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        digiteka_url = self._proto_relative_url(self._search_regex(
+            r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
+            webpage, 'digiteka url', group='url'))
+        return self.url_result(digiteka_url, 'Digiteka')
index be648000ea1bfec896c7228159170e16aa8338ee..08bdae8a2182be28d2f7c5b3bc6d3fdcb66e1a49 100644 (file)
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import datetime
 import re
 import time
+import base64
 
 from .common import InfoExtractor
 from ..compat import (
@@ -16,7 +17,9 @@ from ..utils import (
     parse_iso8601,
     sanitized_Request,
     int_or_none,
+    str_or_none,
     encode_data_uri,
+    url_basename,
 )
 
 
@@ -239,3 +242,80 @@ class LetvPlaylistIE(LetvTvIE):
         },
         'playlist_mincount': 7
     }]
+
+
+class LetvCloudIE(InfoExtractor):
+    IE_DESC = '乐视云'
+    _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
+
+    _TESTS = [{
+        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
+        'md5': '26450599afd64c513bc77030ad15db44',
+        'info_dict': {
+            'id': 'p7jnfw5hw9_467623dedf',
+            'ext': 'mp4',
+            'title': 'Video p7jnfw5hw9_467623dedf',
+        },
+    }, {
+        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
+        'info_dict': {
+            'id': 'p7jnfw5hw9_ec93197892',
+            'ext': 'mp4',
+            'title': 'Video p7jnfw5hw9_ec93197892',
+        },
+    }, {
+        'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
+        'info_dict': {
+            'id': 'p7jnfw5hw9_187060b6fd',
+            'ext': 'mp4',
+            'title': 'Video p7jnfw5hw9_187060b6fd',
+        },
+    }]
+
+    def _real_extract(self, url):
+        uu_mobj = re.search('uu=([\w]+)', url)
+        vu_mobj = re.search('vu=([\w]+)', url)
+
+        if not uu_mobj or not vu_mobj:
+            raise ExtractorError('Invalid URL: %s' % url, expected=True)
+
+        uu = uu_mobj.group(1)
+        vu = vu_mobj.group(1)
+        media_id = uu + '_' + vu
+
+        play_json_req = sanitized_Request(
+            'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' +
+            'uu=' + uu + '&vu=' + vu)
+        play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data')
+
+        if not play_json.get('data'):
+            if play_json.get('message'):
+                raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True)
+            elif play_json.get('code'):
+                raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
+            else:
+                raise ExtractorError('Letv cloud returned an unknwon error')
+
+        def b64decode(s):
+            return base64.b64decode(s.encode('utf-8')).decode('utf-8')
+
+        formats = []
+        for media in play_json['data']['video_info']['media'].values():
+            play_url = media['play_url']
+            url = b64decode(play_url['main_url'])
+            decoded_url = b64decode(url_basename(url))
+            formats.append({
+                'url': url,
+                'ext': determine_ext(decoded_url),
+                'format_id': int_or_none(play_url.get('vtype')),
+                'format_note': str_or_none(play_url.get('definition')),
+                'width': int_or_none(play_url.get('vwidth')),
+                'height': int_or_none(play_url.get('vheight')),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': media_id,
+            'title': 'Video %s' % media_id,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
deleted file mode 100644 (file)
index 6032812..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class UltimediaIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-        https?://(?:www\.)?ultimedia\.com/
-        (?:
-            deliver/
-            (?P<embed_type>
-                generic|
-                musique
-            )
-            (?:/[^/]+)*/
-            (?:
-                src|
-                article
-            )|
-            default/index/video
-            (?P<site_type>
-                generic|
-                music
-            )
-            /id
-        )/(?P<id>[\d+a-z]+)'''
-    _TESTS = [{
-        # news
-        'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
-        'md5': '276a0e49de58c7e85d32b057837952a2',
-        'info_dict': {
-            'id': 's8uk0r',
-            'ext': 'mp4',
-            'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
-            'thumbnail': 're:^https?://.*\.jpg',
-            'duration': 74,
-            'upload_date': '20150317',
-            'timestamp': 1426604939,
-            'uploader_id': '3fszv',
-        },
-    }, {
-        # music
-        'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
-        'md5': '2ea3513813cf230605c7e2ffe7eca61c',
-        'info_dict': {
-            'id': 'xvpfp8',
-            'ext': 'mp4',
-            'title': 'Two - C\'est La Vie (clip)',
-            'thumbnail': 're:^https?://.*\.jpg',
-            'duration': 233,
-            'upload_date': '20150224',
-            'timestamp': 1424760500,
-            'uploader_id': '3rfzk',
-        },
-    }]
-
-    @staticmethod
-    def _extract_url(webpage):
-        mobj = re.search(
-            r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
-            webpage)
-        if mobj:
-            return mobj.group('url')
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        video_type = mobj.group('embed_type') or mobj.group('site_type')
-        if video_type == 'music':
-            video_type = 'musique'
-
-        deliver_info = self._download_json(
-            'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
-            video_id)
-
-        yt_id = deliver_info.get('yt_id')
-        if yt_id:
-            return self.url_result(yt_id, 'Youtube')
-
-        jwconf = deliver_info['jwconf']
-
-        formats = []
-        for source in jwconf['playlist'][0]['sources']:
-            formats.append({
-                'url': source['file'],
-                'format_id': source.get('label'),
-            })
-
-        self._sort_formats(formats)
-
-        title = deliver_info['title']
-        thumbnail = jwconf.get('image')
-        duration = int_or_none(deliver_info.get('duration'))
-        timestamp = int_or_none(deliver_info.get('release_time'))
-        uploader_id = deliver_info.get('owner_id')
-
-        return {
-            'id': video_id,
-            'title': title,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'timestamp': timestamp,
-            'uploader_id': uploader_id,
-            'formats': formats,
-        }
index 73b05ecab82a10a6c80360b0b980f285dfbb9c45..b5fe753d7115923d16ed6cf7de34c8723368f82f 100644 (file)
@@ -47,7 +47,7 @@ class UstreamIE(InfoExtractor):
         m = re.match(self._VALID_URL, url)
         video_id = m.group('id')
 
-        # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
+        # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
         if m.group('type') == 'embed/recorded':
             video_id = m.group('id')
             desktop_url = 'http://www.ustream.tv/recorded/' + video_id
index 185b1c1194f72122c99b26abd7ffb32d41934388..fe94a479339035dfd9a70386e903e5c3770fdfea 100644 (file)
@@ -155,10 +155,10 @@ class ViewsterIE(InfoExtractor):
 
         self._sort_formats(formats)
 
-        synopsis = info.get('Synopsis', {})
+        synopsis = info.get('Synopsis') or {}
         # Prefer title outside synopsis since it's less messy
         title = (info.get('Title') or synopsis['Title']).strip()
-        description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short')
+        description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
         duration = int_or_none(info.get('Duration'))
         timestamp = parse_iso8601(info.get('ReleaseDate'))
 
diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py
new file mode 100644 (file)
index 0000000..e333ae3
--- /dev/null
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class WeiqiTVIE(InfoExtractor):
+    IE_DESC = 'WQTV'
+    _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
+
+    _TESTS = [{
+        'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3',
+        'md5': '26450599afd64c513bc77030ad15db44',
+        'info_dict': {
+            'id': '53c744f09874f0e76a8b46f3',
+            'ext': 'mp4',
+            'title': '2013年度盘点',
+        },
+    }, {
+        'url': 'http://www.weiqitv.com/index/video_play?videoId=567379a2d4c36cca518b4569',
+        'info_dict': {
+            'id': '567379a2d4c36cca518b4569',
+            'ext': 'mp4',
+            'title': '民国围棋史',
+        },
+    }, {
+        'url': 'http://www.weiqitv.com/index/video_play?videoId=5430220a9874f088658b4567',
+        'info_dict': {
+            'id': '5430220a9874f088658b4567',
+            'ext': 'mp4',
+            'title': '二路托过的手段和运用',
+        },
+    }]
+
+    def _real_extract(self, url):
+        media_id = self._match_id(url)
+        page = self._download_webpage(url, media_id)
+
+        info_json_str = self._search_regex(
+            'var\s+video\s*=\s*(.+});', page, 'info json str')
+        info_json = self._parse_json(info_json_str, media_id)
+
+        letvcloud_url = self._search_regex(
+            'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url')
+
+        return {
+            '_type': 'url_transparent',
+            'ie_key': 'LetvCloud',
+            'url': letvcloud_url,
+            'title': info_json['name'],
+            'id': media_id,
+        }
index e4f227f199d10ae3923acbc97d09263a9531470a..d31161d21c7279d33885fb7849b82e7513c96645 100644 (file)
@@ -613,7 +613,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
             'params': {
                 'skip_download': 'requires avconv',
-            }
+            },
+            'skip': 'This live event has ended.',
         },
         # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
         {
@@ -706,6 +707,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         },
         {
             # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
+            # Also tests cut-off URL expansion in video description (see
+            # https://github.com/rg3/youtube-dl/issues/1892,
+            # https://github.com/rg3/youtube-dl/issues/8164)
             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
             'info_dict': {
                 'id': 'lsguqyKfVQg',
@@ -1237,7 +1241,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
                     (?:title|href)="([^"]+)"\s+
                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                    class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)".*?>
+                    class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
                 [^<]+\.{3}\s*
                 </a>
             ''', r'\1', video_description)
@@ -1505,6 +1509,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             for a_format in formats:
                 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
         else:
+            unavailable_message = self._html_search_regex(
+                r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
+                video_webpage, 'unavailable message', default=None)
+            if unavailable_message:
+                raise ExtractorError(unavailable_message, expected=True)
             raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
 
         # Look for the DASH manifest