# Download best video only format but no bigger that 50 MB
$ youtube-dl -f 'best[filesize<50M]'
+
+# Download best format available via direct link over HTTP/HTTPS protocol
+$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
```
DateRange,
DEFAULT_OUTTMPL,
determine_ext,
+ determine_protocol,
DownloadError,
encode_compat_str,
encodeFilename,
# Automatically determine file extension if missing
if 'ext' not in format:
format['ext'] = determine_ext(format['url']).lower()
+ # Automatically determine protocol if missing (useful for format
+ # selection purposes)
+ if 'protocol' not in format:
+ format['protocol'] = determine_protocol(format)
# Add HTTP headers, so that external programs can use them from the
# json output
full_format_info = info_dict.copy()
else:
compat_getpass = getpass.getpass
-# Old 2.6 and 2.7 releases require kwargs to be bytes
+# Python < 2.6.5 require kwargs to be bytes
try:
def _testfunc(x):
pass
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
-from .ae import AEIE
+from .aenetworks import AENetworksIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .lecture2go import Lecture2GoIE
+from .lemonde import LemondeIE
from .letv import (
LetvIE,
LetvTvIE,
- LetvPlaylistIE
+ LetvPlaylistIE,
+ LetvCloudIE,
)
from .libsyn import LibsynIE
from .lifenews import (
UdemyCourseIE
)
from .udn import UDNEmbedIE
-from .ultimedia import UltimediaIE
+from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
WebOfStoriesPlaylistIE,
)
from .weibo import WeiboIE
+from .weiqitv import WeiqiTVIE
from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
+++ /dev/null
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import smuggle_url
-
-
-class AEIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
-
- _TESTS = [{
- 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
- 'info_dict': {
- 'id': 'g12m5Gyt3fdR',
- 'ext': 'mp4',
- 'title': "Bet You Didn't Know: Valentine's Day",
- 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['ThePlatform'],
- }, {
- 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
- 'info_dict': {
- 'id': 'eg47EERs_JsZ',
- 'ext': 'mp4',
- 'title': "Winter Is Coming",
- 'description': 'md5:a40e370925074260b1c8a633c632c63a',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['ThePlatform'],
- }, {
- 'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
- 'only_matching': True
- }, {
- 'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
- 'only_matching': True
- }, {
- 'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
- 'only_matching': True
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_url_re = [
- r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
- r"media_url\s*=\s*'([^']+)'"
- ]
- video_url = self._search_regex(video_url_re, webpage, 'video url')
-
- info = self._search_json_ld(webpage, video_id, fatal=False)
- info.update({
- '_type': 'url_transparent',
- 'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}),
- })
- return info
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class AENetworksIE(InfoExtractor):
+ IE_NAME = 'aenetworks'
+ IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
+
+ _TESTS = [{
+ 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
+ 'info_dict': {
+ 'id': 'g12m5Gyt3fdR',
+ 'ext': 'mp4',
+ 'title': "Bet You Didn't Know: Valentine's Day",
+ 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ 'expected_warnings': ['JSON-LD'],
+ }, {
+ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
+ 'info_dict': {
+ 'id': 'eg47EERs_JsZ',
+ 'ext': 'mp4',
+ 'title': "Winter Is Coming",
+ 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'add_ie': ['ThePlatform'],
+ }, {
+ 'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url_re = [
+ r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
+ r"media_url\s*=\s*'([^']+)'"
+ ]
+ video_url = self._search_regex(video_url_re, webpage, 'video url')
+
+ info = self._search_json_ld(webpage, video_id, fatal=False)
+ info.update({
+ '_type': 'url_transparent',
+ 'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}),
+ })
+ return info
})
formats.append(format_info)
- m3u8_url = player.get('urlVideoHls')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
-
timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
webpage = self._download_webpage(url, playlist_id)
- timestamp = None
- playlist_title = None
- playlist_description = None
-
- ld = self._parse_json(
- self._search_regex(
- r'(?s)<script type="application/ld\+json">(.+?)</script>',
- webpage, 'ld json', default='{}'),
- playlist_id, fatal=False)
- if ld:
- timestamp = parse_iso8601(ld.get('datePublished'))
- playlist_title = ld.get('headline')
- playlist_description = ld.get('articleBody')
+ json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
+ timestamp = json_ld_info.get('timestamp')
+ playlist_title = json_ld_info.get('title')
+ playlist_description = json_ld_info.get('description')
if not timestamp:
timestamp = parse_iso8601(self._search_regex(
return self._html_search_meta('twitter:player', html,
'twitter card player')
- def _search_json_ld(self, html, video_id, fatal=True):
+ def _search_json_ld(self, html, video_id, **kwargs):
json_ld = self._search_regex(
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
- html, 'JSON-LD', fatal=fatal, group='json_ld')
+ html, 'JSON-LD', group='json_ld', **kwargs)
if not json_ld:
return {}
- return self._json_ld(json_ld, video_id, fatal=fatal)
+ return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
def _json_ld(self, json_ld, video_id, fatal=True):
if isinstance(json_ld, compat_str):
class DailymotionIE(DailymotionBaseInfoExtractor):
- _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
+ _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:embed|swf|#)/)?video/(?P<id>[^/?_]+)'
IE_NAME = 'dailymotion'
_FORMATS = [
{
'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
'only_matching': True,
+ },
+ {
+ 'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
+ 'only_matching': True,
}
]
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = 'dailymotion:user'
- _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DigitekaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
+ (?:
+ deliver/
+ (?P<embed_type>
+ generic|
+ musique
+ )
+ (?:/[^/]+)*/
+ (?:
+ src|
+ article
+ )|
+ default/index/video
+ (?P<site_type>
+ generic|
+ music
+ )
+ /id
+ )/(?P<id>[\d+a-z]+)'''
+ _TESTS = [{
+ # news
+ 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+ 'md5': '276a0e49de58c7e85d32b057837952a2',
+ 'info_dict': {
+ 'id': 's8uk0r',
+ 'ext': 'mp4',
+ 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 74,
+ 'upload_date': '20150317',
+ 'timestamp': 1426604939,
+ 'uploader_id': '3fszv',
+ },
+ }, {
+ # music
+ 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+ 'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+ 'info_dict': {
+ 'id': 'xvpfp8',
+ 'ext': 'mp4',
+ 'title': 'Two - C\'est La Vie (clip)',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 233,
+ 'upload_date': '20150224',
+ 'timestamp': 1424760500,
+ 'uploader_id': '3rfzk',
+ },
+ }, {
+ 'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ video_type = mobj.group('embed_type') or mobj.group('site_type')
+ if video_type == 'music':
+ video_type = 'musique'
+
+ deliver_info = self._download_json(
+ 'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
+ video_id)
+
+ yt_id = deliver_info.get('yt_id')
+ if yt_id:
+ return self.url_result(yt_id, 'Youtube')
+
+ jwconf = deliver_info['jwconf']
+
+ formats = []
+ for source in jwconf['playlist'][0]['sources']:
+ formats.append({
+ 'url': source['file'],
+ 'format_id': source.get('label'),
+ })
+
+ self._sort_formats(formats)
+
+ title = deliver_info['title']
+ thumbnail = jwconf.get('image')
+ duration = int_or_none(deliver_info.get('duration'))
+ timestamp = int_or_none(deliver_info.get('release_time'))
+ uploader_id = deliver_info.get('owner_id')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ }
from .videomore import VideomoreIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
-from .ultimedia import UltimediaIE
+from .digiteka import DigitekaIE
class GenericIE(InfoExtractor):
# Look for embedded Dailymotion player
matches = re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
+ r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
if matches:
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]))
if mobj is not None:
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
- # Look for Ulltimedia embeds
- ultimedia_url = UltimediaIE._extract_url(webpage)
- if ultimedia_url:
- return self.url_result(self._proto_relative_url(ultimedia_url), 'Ultimedia')
+ # Look for Digiteka embeds
+ digiteka_url = DigitekaIE._extract_url(webpage)
+ if digiteka_url:
+ return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
# Look for AdobeTVVideo embeds
mobj = re.search(
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class LemondeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
+ 'md5': '01fb3c92de4c12c573343d63e163d302',
+ 'info_dict': {
+ 'id': 'lqm3kl',
+ 'ext': 'mp4',
+ 'title': "Comprendre l'affaire Bygmalion en 5 minutes",
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 320,
+ 'upload_date': '20160119',
+ 'timestamp': 1453194778,
+ 'uploader_id': '3pmkp',
+ },
+ }, {
+ 'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ digiteka_url = self._proto_relative_url(self._search_regex(
+ r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
+ webpage, 'digiteka url', group='url'))
+ return self.url_result(digiteka_url, 'Digiteka')
import datetime
import re
import time
+import base64
from .common import InfoExtractor
from ..compat import (
parse_iso8601,
sanitized_Request,
int_or_none,
+ str_or_none,
encode_data_uri,
+ url_basename,
)
},
'playlist_mincount': 7
}]
+
+
+class LetvCloudIE(InfoExtractor):
+ IE_DESC = '乐视云'
+ _VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
+
+ _TESTS = [{
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
+ 'md5': '26450599afd64c513bc77030ad15db44',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_467623dedf',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_467623dedf',
+ },
+ }, {
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_ec93197892',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_ec93197892',
+ },
+ }, {
+ 'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
+ 'info_dict': {
+ 'id': 'p7jnfw5hw9_187060b6fd',
+ 'ext': 'mp4',
+ 'title': 'Video p7jnfw5hw9_187060b6fd',
+ },
+ }]
+
+ def _real_extract(self, url):
+ uu_mobj = re.search('uu=([\w]+)', url)
+ vu_mobj = re.search('vu=([\w]+)', url)
+
+ if not uu_mobj or not vu_mobj:
+ raise ExtractorError('Invalid URL: %s' % url, expected=True)
+
+ uu = uu_mobj.group(1)
+ vu = vu_mobj.group(1)
+ media_id = uu + '_' + vu
+
+ play_json_req = sanitized_Request(
+ 'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' +
+ 'uu=' + uu + '&vu=' + vu)
+ play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data')
+
+ if not play_json.get('data'):
+ if play_json.get('message'):
+ raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True)
+ elif play_json.get('code'):
+ raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
+ else:
+ raise ExtractorError('Letv cloud returned an unknwon error')
+
+ def b64decode(s):
+ return base64.b64decode(s.encode('utf-8')).decode('utf-8')
+
+ formats = []
+ for media in play_json['data']['video_info']['media'].values():
+ play_url = media['play_url']
+ url = b64decode(play_url['main_url'])
+ decoded_url = b64decode(url_basename(url))
+ formats.append({
+ 'url': url,
+ 'ext': determine_ext(decoded_url),
+ 'format_id': int_or_none(play_url.get('vtype')),
+ 'format_note': str_or_none(play_url.get('definition')),
+ 'width': int_or_none(play_url.get('vwidth')),
+ 'height': int_or_none(play_url.get('vheight')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': media_id,
+ 'title': 'Video %s' % media_id,
+ 'formats': formats,
+ }
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class UltimediaIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://(?:www\.)?ultimedia\.com/
- (?:
- deliver/
- (?P<embed_type>
- generic|
- musique
- )
- (?:/[^/]+)*/
- (?:
- src|
- article
- )|
- default/index/video
- (?P<site_type>
- generic|
- music
- )
- /id
- )/(?P<id>[\d+a-z]+)'''
- _TESTS = [{
- # news
- 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
- 'md5': '276a0e49de58c7e85d32b057837952a2',
- 'info_dict': {
- 'id': 's8uk0r',
- 'ext': 'mp4',
- 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
- 'thumbnail': 're:^https?://.*\.jpg',
- 'duration': 74,
- 'upload_date': '20150317',
- 'timestamp': 1426604939,
- 'uploader_id': '3fszv',
- },
- }, {
- # music
- 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
- 'md5': '2ea3513813cf230605c7e2ffe7eca61c',
- 'info_dict': {
- 'id': 'xvpfp8',
- 'ext': 'mp4',
- 'title': 'Two - C\'est La Vie (clip)',
- 'thumbnail': 're:^https?://.*\.jpg',
- 'duration': 233,
- 'upload_date': '20150224',
- 'timestamp': 1424760500,
- 'uploader_id': '3rfzk',
- },
- }]
-
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
- webpage)
- if mobj:
- return mobj.group('url')
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- video_type = mobj.group('embed_type') or mobj.group('site_type')
- if video_type == 'music':
- video_type = 'musique'
-
- deliver_info = self._download_json(
- 'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
- video_id)
-
- yt_id = deliver_info.get('yt_id')
- if yt_id:
- return self.url_result(yt_id, 'Youtube')
-
- jwconf = deliver_info['jwconf']
-
- formats = []
- for source in jwconf['playlist'][0]['sources']:
- formats.append({
- 'url': source['file'],
- 'format_id': source.get('label'),
- })
-
- self._sort_formats(formats)
-
- title = deliver_info['title']
- thumbnail = jwconf.get('image')
- duration = int_or_none(deliver_info.get('duration'))
- timestamp = int_or_none(deliver_info.get('release_time'))
- uploader_id = deliver_info.get('owner_id')
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'uploader_id': uploader_id,
- 'formats': formats,
- }
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
- # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
+ # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
if m.group('type') == 'embed/recorded':
video_id = m.group('id')
desktop_url = 'http://www.ustream.tv/recorded/' + video_id
self._sort_formats(formats)
- synopsis = info.get('Synopsis', {})
+ synopsis = info.get('Synopsis') or {}
# Prefer title outside synopsis since it's less messy
title = (info.get('Title') or synopsis['Title']).strip()
- description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short')
+ description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
duration = int_or_none(info.get('Duration'))
timestamp = parse_iso8601(info.get('ReleaseDate'))
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class WeiqiTVIE(InfoExtractor):
+ IE_DESC = 'WQTV'
+ _VALID_URL = r'http://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3',
+ 'md5': '26450599afd64c513bc77030ad15db44',
+ 'info_dict': {
+ 'id': '53c744f09874f0e76a8b46f3',
+ 'ext': 'mp4',
+ 'title': '2013年度盘点',
+ },
+ }, {
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=567379a2d4c36cca518b4569',
+ 'info_dict': {
+ 'id': '567379a2d4c36cca518b4569',
+ 'ext': 'mp4',
+ 'title': '民国围棋史',
+ },
+ }, {
+ 'url': 'http://www.weiqitv.com/index/video_play?videoId=5430220a9874f088658b4567',
+ 'info_dict': {
+ 'id': '5430220a9874f088658b4567',
+ 'ext': 'mp4',
+ 'title': '二路托过的手段和运用',
+ },
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ page = self._download_webpage(url, media_id)
+
+ info_json_str = self._search_regex(
+ 'var\s+video\s*=\s*(.+});', page, 'info json str')
+ info_json = self._parse_json(info_json_str, media_id)
+
+ letvcloud_url = self._search_regex(
+ 'var\s+letvurl\s*=\s*"([^"]+)', page, 'letvcloud url')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'LetvCloud',
+ 'url': letvcloud_url,
+ 'title': info_json['name'],
+ 'id': media_id,
+ }
},
'params': {
'skip_download': 'requires avconv',
- }
+ },
+ 'skip': 'This live event has ended.',
},
# Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
{
},
{
# Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
+ # Also tests cut-off URL expansion in video description (see
+ # https://github.com/rg3/youtube-dl/issues/1892,
+ # https://github.com/rg3/youtube-dl/issues/8164)
'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
'info_dict': {
'id': 'lsguqyKfVQg',
(?:[a-zA-Z-]+="[^"]+"\s+)*?
(?:title|href)="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
- class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)".*?>
+ class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
[^<]+\.{3}\s*
</a>
''', r'\1', video_description)
for a_format in formats:
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
else:
+ unavailable_message = self._html_search_regex(
+ r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
+ video_webpage, 'unavailable message', default=None)
+ if unavailable_message:
+ raise ExtractorError(unavailable_message, expected=True)
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# Look for the DASH manifest