To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
-# BUILD INSTRUCTIONS
+# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
* zip
* nosetests
+### Adding support for a new site
+
+If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
+
# BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
--- /dev/null
+# This allows the youtube-dl command to be installed in ZSH using antigen.
+# Antigen is a bundle manager. It allows you to enhance the functionality of
+# your zsh session by installing bundles and themes easily.
+
+# Antigen documentation:
+# http://antigen.sharats.me/
+# https://github.com/zsh-users/antigen
+
+# Install youtube-dl:
+# antigen bundle rg3/youtube-dl
+# Bundles installed by antigen are available for use immediately.
+
+# Update youtube-dl (and all other antigen bundles):
+# antigen update
+
+# The antigen command will download the git repository to a folder and then
+# execute an enabling script (this file). The complete process for loading the
+# code is documented here:
+# https://github.com/zsh-users/antigen#notes-on-writing-plugins
+
+# This specific script just aliases youtube-dl to the python script that this
+# library provides. This requires updating the PYTHONPATH to ensure that the
+# full set of code can be located.
+alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .la7 import LA7IE
+from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE
from .lynda import (
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
- ooyala_code = self._search_regex(r'<source src="http://player.ooyala.com/player/[^/]+/([^".]+)', webpage, u'ooyala url')
- return OoyalaIE._build_url_result(ooyala_code)
+ ooyala_url = self._twitter_search_player(webpage)
+ return self.url_result(ooyala_url, OoyalaIE.ie_key())
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
'age_limit': 10,
},
- }]
+ },
+ # embedded youtube video
+ {
+ 'url': 'http://www.collegehumor.com/embed/6950457',
+ 'info_dict': {
+ 'id': 'W5gMp3ZjYg4',
+ 'ext': 'mp4',
+ 'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
+ 'uploader': 'Funnyplox TV',
+ 'uploader_id': 'funnyploxtv',
+ 'description': 'md5:7e8899d3f749db50fa089eb243cba17f',
+ 'upload_date': '20140128',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Youtube'],
+ },
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
data = json.loads(self._download_webpage(
jsonUrl, video_id, 'Downloading info JSON'))
vdata = data['video']
+ if vdata.get('youtubeId') is not None:
+ return {
+ '_type': 'url',
+ 'url': vdata['youtubeId'],
+ 'ie_key': 'Youtube',
+ }
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
rating = vdata.get('rating')
PREFS = {'high_quality': 2, 'low_quality': 0}
formats = []
for format_key in ('mp4', 'webm'):
- for qname, qurl in vdata[format_key].items():
+ for qname, qurl in vdata.get(format_key, {}).items():
formats.append({
'format_id': format_key + '_' + qname,
'url': qurl,
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
+ _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
(video-clips|episodes|cc-studios|video-collections)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@staticmethod
def _transform_rtmp_url(rtmp_video_url):
- m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+ m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
if not m:
raise ExtractorError('Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
# Helper functions for extracting OpenGraph info
@staticmethod
def _og_regexes(prop):
- content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
+ content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
}
return RATING_TABLE.get(rating.lower(), None)
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
def _sort_formats(self, formats):
if not formats:
raise ExtractorError(u'No video formats found')
# encoding: utf-8
+from __future__ import unicode_literals
+
import re, base64, zlib
from hashlib import sha1
from math import pow, sqrt, floor
)
class CrunchyrollIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
- u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
- u'file': u'645513.flv',
- #u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
- u'info_dict': {
- u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
- u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
- u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
- u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
- u'upload_date': u'20131013',
+ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
+ 'file': '645513.flv',
+ #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
+ 'info_dict': {
+ 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
+ 'description': 'md5:2d17137920c64f2f49981a7797d275ef',
+ 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+ 'uploader': 'Yomiuri Telecasting Corporation (YTV)',
+ 'upload_date': '20131013',
},
- u'params': {
+ 'params': {
# rtmp
- u'skip_download': True,
+ 'skip_download': True,
},
}]
_FORMAT_IDS = {
- u'360': (u'60', u'106'),
- u'480': (u'61', u'106'),
- u'720': (u'62', u'106'),
- u'1080': (u'80', u'108'),
+ '360': ('60', '106'),
+ '480': ('61', '106'),
+ '720': ('62', '106'),
+ '1080': ('80', '108'),
}
def _decrypt_subtitles(self, data, iv, id):
num3 = key ^ num1
num4 = num3 ^ (num3 >> 3) ^ num2
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
- shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
+ shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
# Extend 160 Bit hash to 256 Bit
return shaHash + [0] * 12
def _convert_subtitles_to_srt(self, subtitles):
i=1
- output = u''
+ output = ''
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
- start = start.replace(u'.', u',')
- end = end.replace(u'.', u',')
+ start = start.replace('.', ',')
+ end = end.replace('.', ',')
text = clean_html(text)
- text = text.replace(u'\\N', u'\n')
+ text = text.replace('\\N', '\n')
if not text:
continue
- output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
+ output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
i+=1
return output
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('video_id')
+
+ if mobj.group('prefix') == 'm':
+ mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
+ webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
+ else:
+ webpage_url = 'http://www.' + mobj.group('url')
- webpage_url = u'http://www.' + mobj.group('url')
- video_id = mobj.group(u'video_id')
- webpage = self._download_webpage(webpage_url, video_id)
- note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
+ webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
+ note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
if note_m:
raise ExtractorError(note_m)
- video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
- video_title = re.sub(r' {2,}', u' ', video_title)
- video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
+ video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
+ video_title = re.sub(r' {2,}', ' ', video_title)
+ video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
if not video_description:
video_description = None
- video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
+ video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
- video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
+ video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
- playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
+ playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
- playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
- playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
- playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
+ playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
+ playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
- stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
- video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
+ stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
+ video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
- video_format = fmt+u'p'
- streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
+ video_format = fmt+'p'
+ streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
- streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
- streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
- streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
- streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
- video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
- video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
+ streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
+ streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
+ streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
+ video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
+ video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
formats.append({
- u'url': video_url,
- u'play_path': video_play_path,
- u'ext': 'flv',
- u'format': video_format,
- u'format_id': video_format,
+ 'url': video_url,
+ 'play_path': video_play_path,
+ 'ext': 'flv',
+ 'format': video_format,
+ 'format_id': video_format,
})
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
- sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
- video_id, note=u'Downloading subtitles for '+sub_name)
- id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
- iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
- data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
+ sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
+ video_id, note='Downloading subtitles for '+sub_name)
+ id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
+ iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
+ data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
- subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
- lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
+ subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+ lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
return {
- u'id': video_id,
- u'title': video_title,
- u'description': video_description,
- u'thumbnail': video_thumbnail,
- u'uploader': video_uploader,
- u'upload_date': video_upload_date,
- u'subtitles': subtitles,
- u'formats': formats,
+ 'id': video_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ 'upload_date': video_upload_date,
+ 'subtitles': subtitles,
+ 'formats': formats,
}
# encoding: utf-8
+
+from __future__ import unicode_literals
+
import re
import json
class PluzzIE(FranceTVBaseInfoExtractor):
- IE_NAME = u'pluzz.francetv.fr'
+ IE_NAME = 'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
# Can't use tests, videos expire in 7 days
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
- IE_NAME = u'francetvinfo.fr'
+ IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
_TEST = {
- u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
- u'file': u'84981923.mp4',
- u'info_dict': {
- u'title': u'Soir 3',
+ 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
+ 'file': '84981923.mp4',
+ 'info_dict': {
+ 'title': 'Soir 3',
},
- u'params': {
- u'skip_download': True,
+ 'params': {
+ 'skip_download': True,
},
}
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
- video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
+ video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
return self._extract_video(video_id)
class FranceTVIE(FranceTVBaseInfoExtractor):
- IE_NAME = u'francetv'
- IE_DESC = u'France 2, 3, 4, 5 and Ô'
+ IE_NAME = 'francetv'
+ IE_DESC = 'France 2, 3, 4, 5 and Ô'
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
(?:
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
_TESTS = [
# france2
{
- u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
- u'file': u'75540104.mp4',
- u'info_dict': {
- u'title': u'13h15, le samedi...',
- u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+ 'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
+ 'file': '75540104.mp4',
+ 'info_dict': {
+ 'title': '13h15, le samedi...',
+ 'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
},
- u'params': {
+ 'params': {
# m3u8 download
- u'skip_download': True,
+ 'skip_download': True,
},
},
# france3
{
- u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
- u'info_dict': {
- u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
- u'ext': u'flv',
- u'title': u'Le scandale du prix des médicaments',
- u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
+ 'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
+ 'info_dict': {
+ 'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
+ 'ext': 'flv',
+ 'title': 'Le scandale du prix des médicaments',
+ 'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
},
- u'params': {
+ 'params': {
# rtmp download
- u'skip_download': True,
+ 'skip_download': True,
},
},
# france4
{
- u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
- u'info_dict': {
- u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
- u'ext': u'flv',
- u'title': u'Hero Corp Making of - Extrait 1',
- u'description': u'md5:c87d54871b1790679aec1197e73d650a',
+ 'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+ 'info_dict': {
+ 'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
+ 'ext': 'flv',
+ 'title': 'Hero Corp Making of - Extrait 1',
+ 'description': 'md5:c87d54871b1790679aec1197e73d650a',
},
- u'params': {
+ 'params': {
# rtmp download
- u'skip_download': True,
+ 'skip_download': True,
},
},
# france5
{
- u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
- u'info_dict': {
- u'id': u'92837968',
- u'ext': u'mp4',
- u'title': u'C à dire ?!',
- u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
+ 'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
+ 'info_dict': {
+ 'id': '92837968',
+ 'ext': 'mp4',
+ 'title': 'C à dire ?!',
+ 'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
},
- u'params': {
+ 'params': {
# m3u8 download
- u'skip_download': True,
+ 'skip_download': True,
},
},
# franceo
{
- u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
- u'info_dict': {
- u'id': u'92327925',
- u'ext': u'mp4',
- u'title': u'Infô-Afrique',
- u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
+ 'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
+ 'info_dict': {
+ 'id': '92327925',
+ 'ext': 'mp4',
+ 'title': 'Infô-Afrique',
+ 'description': 'md5:ebf346da789428841bee0fd2a935ea55',
},
- u'params': {
+ 'params': {
# m3u8 download
- u'skip_download': True,
+ 'skip_download': True,
},
- u'skip': u'The id changes frequently',
+ 'skip': 'The id changes frequently',
},
]
'\.fr/\?id-video=([^"/&]+)'),
(r'<a class="video" id="ftv_player_(.+?)"'),
]
- video_id = self._html_search_regex(id_res, webpage, u'video ID')
+ video_id = self._html_search_regex(id_res, webpage, 'video ID')
else:
video_id = mobj.group('id')
return self._extract_video(video_id)
class GenerationQuoiIE(InfoExtractor):
- IE_NAME = u'france2.fr:generation-quoi'
+ IE_NAME = 'france2.fr:generation-quoi'
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
_TEST = {
- u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
- u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
- u'info_dict': {
- u'title': u'Génération Quoi - Garde à Vous',
- u'uploader': u'Génération Quoi',
+ 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
+ 'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
+ 'info_dict': {
+ 'title': 'Génération Quoi - Garde à Vous',
+ 'uploader': 'Génération Quoi',
},
- u'params': {
+ 'params': {
# It uses Dailymotion
- u'skip_download': True,
+ 'skip_download': True,
},
}
class CultureboxIE(FranceTVBaseInfoExtractor):
- IE_NAME = u'culturebox.francetvinfo.fr'
+ IE_NAME = 'culturebox.francetvinfo.fr'
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = {
- u'url': u'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
- u'info_dict': {
- u'id': u'EV_6785',
- u'ext': u'mp4',
- u'title': u'Einstein on the beach au Théâtre du Châtelet',
- u'description': u'md5:9ce2888b1efefc617b5e58b3f6200eeb',
+ 'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
+ 'info_dict': {
+ 'id': 'EV_6785',
+ 'ext': 'mp4',
+ 'title': 'Einstein on the beach au Théâtre du Châtelet',
+ 'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
},
- u'params': {
+ 'params': {
# m3u8 download
- u'skip_download': True,
+ 'skip_download': True,
},
}
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
- video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, u'video id')
+ video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
return self._extract_video(video_id)
'title': 'R\u00e9gis plante sa Jeep',
}
},
- # embedded vimeo video
- {
- 'add_ie': ['Vimeo'],
- 'url': 'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
- 'file': '22444065.mp4',
- 'md5': '2903896e23df39722c33f015af0666e2',
- 'info_dict': {
- 'title': 'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
- 'uploader_id': 'skillsmatter',
- 'uploader': 'Skills Matter',
- }
- },
# bandcamp page with custom domain
{
'add_ie': ['Bandcamp'],
# Look for embedded (iframe) Vimeo player
mobj = re.search(
- r'<iframe[^>]+?src="((?:https?:)?//player.vimeo.com/video/.+?)"', webpage)
+ r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
if mobj:
player_url = unescapeHTML(mobj.group(1))
surl = smuggle_url(player_url, {'Referer': url})
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
- r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
+ r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
if mobj:
return self.url_result(mobj.group(1), 'Vimeo')
return self.url_result(mobj.group(1), 'Aparat')
# Look for MPORA videos
- mobj = re.search(r'<iframe .*?src="(http://mpora\.com/videos/[^"]+)"', webpage)
+ mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
# Look for embedded Huffington Post player
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'HuffPost')
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
# Look for gorilla-vid style embedding
- mobj = re.search(r'(?s)jw_plugins.*?file:\s*["\'](.*?)["\']', webpage)
+ mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
if mobj is None:
# Broaden the search a little bit
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+from __future__ import unicode_literals
+
import base64
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
-
- ExtractorError,
)
class InfoQIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
_TEST = {
- u"name": u"InfoQ",
- u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
- u"file": u"12-jan-pythonthings.mp4",
- u"info_dict": {
- u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
- u"title": u"A Few of My Favorite [Python] Things"
+ "name": "InfoQ",
+ "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
+ "file": "12-jan-pythonthings.mp4",
+ "info_dict": {
+ "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
+ "title": "A Few of My Favorite [Python] Things",
+ },
+ "params": {
+ "skip_download": True,
},
- u"params": {
- u"skip_download": True
- }
}
def _real_extract(self, url):
self.report_extraction(url)
# Extract video URL
- mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
- if mobj is None:
- raise ExtractorError(u'Unable to extract video url')
- real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
+ encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
+ real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
# Extract title
video_title = self._search_regex(r'contentTitle = "(.*?)";',
- webpage, u'title')
+ webpage, 'title')
# Extract description
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
- webpage, u'description', fatal=False)
+ webpage, 'description', fatal=False)
video_filename = video_url.split('/')[-1]
video_id, extension = video_filename.split('.')
- info = {
+ return {
'id': video_id,
'url': video_url,
- 'uploader': None,
- 'upload_date': None,
'title': video_title,
'ext': extension, # Extension is always(?) mp4, but seems to be flv
- 'thumbnail': None,
'description': video_description,
}
-
- return [info]
\ No newline at end of file
--- /dev/null
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class LifeNewsIE(InfoExtractor):
+ IE_NAME = 'lifenews'
+ IE_DESC = 'LIFE | NEWS'
+ _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://lifenews.ru/news/126342',
+ 'file': '126342.mp4',
+ 'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
+ 'info_dict': {
+ 'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
+ 'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
+ 'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
+ 'upload_date': '20140130',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
+
+ video_url = self._html_search_regex(
+ r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
+
+ thumbnail = self._html_search_regex(
+ r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
+
+ title = self._og_search_title(webpage)
+ TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
+ if title.endswith(TITLE_SUFFIX):
+ title = title[:-len(TITLE_SUFFIX)]
+
+ description = self._og_search_description(webpage)
+
+ view_count = self._html_search_regex(
+ r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
+ comment_count = self._html_search_regex(
+ r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
+
+ upload_date = self._html_search_regex(
+ r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'title': title,
+ 'description': description,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'upload_date': unified_strdate(upload_date),
+ }
\ No newline at end of file
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
class LiveLeakIE(InfoExtractor):
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680',
'file': '757_1364311680.mp4',
'md5': '0813c2430bea7a46bf13acf3406992f4',
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident'
}
- }
+ },
+ {
+ 'url': 'http://www.liveleak.com/view?i=f93_1390833151',
+ 'file': 'f93_1390833151.mp4',
+ 'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
+ 'info_dict': {
+ 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
+ 'uploader': 'ARD_Stinkt',
+ 'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
+ }
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(
- r'file: "(.*?)",', webpage, 'video URL')
+ sources_raw = self._search_regex(
+ r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
+ if sources_raw is None:
+ sources_raw = '[{ %s}]' % (
+ self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
+
+ sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
+ sources = json.loads(sources_json)
+
+ formats = [{
+ 'format_note': s.get('label'),
+ 'url': s['file'],
+ } for s in sources]
+ self._sort_formats(formats)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
return {
'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
'title': video_title,
'description': video_description,
- 'uploader': video_uploader
+ 'uploader': video_uploader,
+ 'formats': formats,
}
'info_dict': {
"title": "Bien dur",
"age_limit": 18,
- }
+ },
+ 'skip': 'This video has been deleted.'
}
def _real_extract(self, url):
if mgid.endswith('.swf'):
mgid = mgid[:-4]
except RegexNotFoundError:
- mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
+ mgid = self._search_regex(
+ [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
+ webpage, u'mgid')
return self._get_videos_info(mgid)
+from __future__ import unicode_literals
import os.path
from .common import InfoExtractor
class MySpassIE(InfoExtractor):
_VALID_URL = r'http://www\.myspass\.de/.*'
_TEST = {
- u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
- u'file': u'11741.mp4',
- u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
- u'info_dict': {
- u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
- u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
- }
+ 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
+ 'file': '11741.mp4',
+ 'md5': '0b49f4844a068f8b33f4b7c88405862b',
+ 'info_dict': {
+ "description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
+ "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
+ },
}
def _real_extract(self, url):
# extract values from metadata
url_flv_el = metadata.find('url_flv')
if url_flv_el is None:
- raise ExtractorError(u'Unable to extract download url')
+ raise ExtractorError('Unable to extract download url')
video_url = url_flv_el.text
- extension = os.path.splitext(video_url)[1][1:]
title_el = metadata.find('title')
if title_el is None:
- raise ExtractorError(u'Unable to extract title')
+ raise ExtractorError('Unable to extract title')
title = title_el.text
format_id_el = metadata.find('format_id')
if format_id_el is None:
thumbnail = imagePreview_el.text
else:
thumbnail = None
- info = {
+
+ return {
'id': video_id,
'url': video_url,
'title': title,
- 'ext': extension,
'format': format,
'thumbnail': thumbnail,
- 'description': description
+ 'description': description,
}
- return [info]
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
-)
class NBAIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
_TEST = {
- u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
- u'file': u'0021200253-okc-bkn-recap.nba.mp4',
- u'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
- u'info_dict': {
- u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
- u"title": u"Thunder vs. Nets"
- }
+ 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
+ 'file': u'0021200253-okc-bkn-recap.nba.mp4',
+ 'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
+ 'info_dict': {
+ 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
+ 'title': 'Thunder vs. Nets',
+ },
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
-
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
- video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
+ video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
shortened_video_id = video_id.rpartition('/')[2]
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
- # It isn't there in the HTML it returns to us
- # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
-
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
- info = {
+ return {
'id': shortened_video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
- # 'uploader_date': uploader_date,
'description': description,
}
- return [info]
from ..utils import unescapeHTML
class OoyalaIE(InfoExtractor):
- _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
+ _VALID_URL = r'https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=(?P<id>.+?)(&|$)'
_TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+# encoding: utf-8
+from __future__ import unicode_literals
+
import json
import re
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
_TEST = {
- u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
- u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
- u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
- u'info_dict': {
- u"uploader_id": u"ford-lopatin",
- u"location": u"Spain",
- u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
- u"uploader": u"Ford & Lopatin",
- u"title": u"Live at Primavera Sound 2011"
- }
+ 'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
+ 'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
+ 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
+ 'info_dict': {
+ "uploader_id": "ford-lopatin",
+ "location": "Spain",
+ "description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
+ "uploader": "Ford & Lopatin",
+ "title": "Live at Primavera Sound 2011",
+ },
}
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
- webpage, u'json data', flags=re.MULTILINE)
+ webpage, 'json data', flags=re.MULTILINE)
try:
data = json.loads(json_data)
except ValueError as e:
- raise ExtractorError(u'Invalid JSON: ' + str(e))
+ raise ExtractorError('Invalid JSON: ' + str(e))
video_url = data['akamai_url'] + '&cbr=256'
url_parts = compat_urllib_parse_urlparse(video_url)
- video_ext = url_parts.path.rpartition('.')[2]
- info = {
- 'id': video_id,
- 'url': video_url,
- 'ext': video_ext,
- 'title': data['title'],
- 'description': data.get('teaser_text'),
- 'location': data.get('country_of_origin'),
- 'uploader': data.get('host', {}).get('name'),
- 'uploader_id': data.get('host', {}).get('slug'),
- 'thumbnail': data.get('image', {}).get('large_url_2x'),
- 'duration': data.get('duration'),
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': data['title'],
+ 'description': data.get('teaser_text'),
+ 'location': data.get('country_of_origin'),
+ 'uploader': data.get('host', {}).get('name'),
+ 'uploader_id': data.get('host', {}).get('slug'),
+ 'thumbnail': data.get('image', {}).get('large_url_2x'),
+ 'duration': data.get('duration'),
}
- return [info]
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
IE_NAME = '220.ro'
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
- u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
- u'file': u'LYV6doKo7f.mp4',
- u'md5': u'03af18b73a07b4088753930db7a34add',
- u'info_dict': {
- u"title": u"Luati-le Banii sez 4 ep 1",
- u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+ "url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
+ 'file': 'LYV6doKo7f.mp4',
+ 'md5': '03af18b73a07b4088753930db7a34add',
+ 'info_dict': {
+ "title": "Luati-le Banii sez 4 ep 1",
+ "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
}
}
webpage = self._download_webpage(url, video_id)
flashVars_str = self._search_regex(
r'<param name="flashVars" value="([^"]+)"',
- webpage, u'flashVars')
+ webpage, 'flashVars')
flashVars = compat_parse_qs(flashVars_str)
- info = {
+ return {
'_type': 'video',
'id': video_id,
'ext': 'mp4',
'description': clean_html(flashVars['desc'][0]),
'thumbnail': flashVars['preview'][0],
}
- return info
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
_TESTS = [{
- u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
- u'file': u'1259285.mp4',
- u'md5': u'2c2754212136f35fb4b19767d242f66e',
- u'info_dict': {
- u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
- }
+ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
+ 'file': '1259285.mp4',
+ 'md5': '2c2754212136f35fb4b19767d242f66e',
+ 'info_dict': {
+ 'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
+ },
},
{
- u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
- u'file': u'1309159.mp4',
- u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
- u'info_dict': {
- u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
- }
+ 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
+ 'file': '1309159.mp4',
+ 'md5': 'f2cdf638d7aa47654e251e1aee360af1',
+ 'info_dict': {
+ 'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
+ },
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(
- r'<div class="module-title">(.*?)</div>', webpage, u'title')
+ r'<div class="module-title">(.*?)</div>', webpage, 'title')
- xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
+ xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
idoc = self._download_xml(
xml_url, video_id,
- note=u'Downloading XML', errnote=u'Failed to download XML')
+ note='Downloading XML', errnote='Failed to download XML')
formats = [
{
'format_id': n.tag.rpartition('type')[2],
- 'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
+ 'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
self._sort_formats(formats)
- info = {
+ return {
'id': video_id,
'title': video_title,
'duration': duration,
'formats': formats,
}
- return info
+from __future__ import unicode_literals
import base64
import re
compat_parse_qs,
)
+
class TutvIE(InfoExtractor):
- _VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
_TEST = {
- u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
- u'file': u'2742556.flv',
- u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
- u'info_dict': {
- u"title": u"Noah en pabellon cuahutemoc"
- }
+ 'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
+ 'file': '2742556.flv',
+ 'md5': '5eb766671f69b82e528dc1e7769c5cb2',
+ 'info_dict': {
+ 'title': 'Noah en pabellon cuahutemoc',
+ },
}
def _real_extract(self, url):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
+ internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
- data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
- data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
+ data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
+ data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
data = compat_parse_qs(data_content)
video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
- ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
- info = {
+ return {
'id': internal_id,
'url': video_url,
- 'ext': ext,
'title': self._og_search_title(webpage),
}
- return [info]
+from __future__ import unicode_literals
+
import json
import re
class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
- IE_NAME = u'ustream'
+ IE_NAME = 'ustream'
_TEST = {
- u'url': u'http://www.ustream.tv/recorded/20274954',
- u'file': u'20274954.flv',
- u'md5': u'088f151799e8f572f84eb62f17d73e5c',
- u'info_dict': {
- u"uploader": u"Young Americans for Liberty",
- u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
- }
+ 'url': 'http://www.ustream.tv/recorded/20274954',
+ 'file': '20274954.flv',
+ 'md5': '088f151799e8f572f84eb62f17d73e5c',
+ 'info_dict': {
+ "uploader": "Young Americans for Liberty",
+ "title": "Young Americans for Liberty February 7, 2012 2:28 AM",
+ },
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('videoID')
- video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
+ video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
- webpage, u'title')
+ webpage, 'title')
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
- webpage, u'uploader', fatal=False, flags=re.DOTALL)
+ webpage, 'uploader', fatal=False, flags=re.DOTALL)
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
- webpage, u'thumbnail', fatal=False)
-
- info = {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'flv',
- 'title': video_title,
- 'uploader': uploader,
- 'thumbnail': thumbnail,
- }
- return info
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'flv',
+ 'title': video_title,
+ 'uploader': uploader,
+ 'thumbnail': thumbnail,
+ }
+
class UstreamChannelIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
- IE_NAME = u'ustream:channel'
+ IE_NAME = 'ustream:channel'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
+from __future__ import unicode_literals
+
import re
import json
import xml.etree.ElementTree
vevo:)
(?P<id>[^&?#]+)'''
_TESTS = [{
- u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
- u'file': u'GB1101300280.mp4',
- u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
- u'info_dict': {
- u"upload_date": u"20130624",
- u"uploader": u"Hurts",
- u"title": u"Somebody to Die For",
- u"duration": 230.12,
- u"width": 1920,
- u"height": 1080,
+ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+ 'file': 'GB1101300280.mp4',
+ "md5": "06bea460acb744eab74a9d7dcb4bfd61",
+ 'info_dict': {
+ "upload_date": "20130624",
+ "uploader": "Hurts",
+ "title": "Somebody to Die For",
+ "duration": 230.12,
+ "width": 1920,
+ "height": 1080,
}
}]
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
if version['version'] > last_version['version']:
last_version = version
if last_version['version'] == -1:
- raise ExtractorError(u'Unable to extract last version of the video')
+ raise ExtractorError('Unable to extract last version of the video')
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
formats = []
format_url = self._SMIL_BASE_URL + m.group('path')
formats.append({
'url': format_url,
- 'format_id': u'SMIL_' + m.group('cbr'),
+ 'format_id': 'SMIL_' + m.group('cbr'),
'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
video_id = mobj.group('id')
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
- info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
- video_info = json.loads(info_json)['video']
+ video_info = self._download_json(json_url, video_id)['video']
formats = self._formats_from_json(video_info)
try:
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
self._SMIL_BASE_URL, video_id, video_id.lower())
smil_xml = self._download_webpage(smil_url, video_id,
- u'Downloading SMIL info')
+ 'Downloading SMIL info')
formats.extend(self._formats_from_smil(smil_xml))
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError):
raise
self._downloader.report_warning(
- u'Cannot download SMIL information, falling back to JSON ..')
+ 'Cannot download SMIL information, falling back to JSON ..')
timestamp_ms = int(self._search_regex(
- r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
+ r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
- info = {
+ return {
'id': video_id,
'title': video_info['title'],
'formats': formats,
'uploader': video_info['mainArtists'][0]['artistName'],
'duration': video_info['duration'],
}
-
- return info
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
class YouJizzIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
_TEST = {
- u'url': u'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
- u'file': u'2189178.flv',
- u'md5': u'07e15fa469ba384c7693fd246905547c',
- u'info_dict': {
- u"title": u"Zeichentrick 1",
- u"age_limit": 18,
+ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
+ 'file': '2189178.flv',
+ 'md5': '07e15fa469ba384c7693fd246905547c',
+ 'info_dict': {
+ "title": "Zeichentrick 1",
+ "age_limit": 18,
}
}
# Get the video title
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
- webpage, u'title').strip()
+ webpage, 'title').strip()
# Get the embed page
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
if result is None:
- raise ExtractorError(u'ERROR: unable to extract embed page')
+ raise ExtractorError('ERROR: unable to extract embed page')
embed_page_url = result.group(0).strip()
video_id = result.group('videoid')
if m_playlist is not None:
playlist_url = m_playlist.group('playlist')
playlist_page = self._download_webpage(playlist_url, video_id,
- u'Downloading playlist page')
+ 'Downloading playlist page')
m_levels = list(re.finditer(r'<level bitrate="(\d+?)" file="(.*?)"', playlist_page))
if len(m_levels) == 0:
- raise ExtractorError(u'Unable to extract video url')
+ raise ExtractorError('Unable to extract video url')
videos = [(int(m.group(1)), m.group(2)) for m in m_levels]
(_, video_url) = sorted(videos)[0]
video_url = video_url.replace('%252F', '%2F')
else:
video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
- webpage, u'video URL')
-
- info = {'id': video_id,
- 'url': video_url,
- 'title': video_title,
- 'ext': 'flv',
- 'format': 'flv',
- 'player_url': embed_page_url,
- 'age_limit': age_limit}
+ webpage, 'video URL')
- return [info]
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': video_title,
+ 'ext': 'flv',
+ 'format': 'flv',
+ 'player_url': embed_page_url,
+ 'age_limit': age_limit,
+ }
'_type': 'url',
'url': video_id,
'ie_key': 'Youtube',
- 'id': 'video_id',
+ 'id': video_id,
'title': title,
}
url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
-__version__ = '2014.01.28.1'
+__version__ = '2014.01.30.2'