from __future__ import print_function
+import os.path
import pkg_resources
+import warnings
import sys
try:
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
params = py2exe_params
else:
+ files_spec = [
+ ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
+ ('share/doc/youtube_dl', ['README.txt']),
+ ('share/man/man1', ['youtube-dl.1'])
+ ]
+ root = os.path.dirname(os.path.abspath(__file__))
+ data_files = []
+ for dirname, files in files_spec:
+ resfiles = []
+ for fn in files:
+ if not os.path.exists(fn):
+ warnings.warn('Skipping file %s since it is not present. Type make to build all automatically generated files.' % fn)
+ else:
+ resfiles.append(fn)
+ data_files.append((dirname, resfiles))
+
params = {
- 'data_files': [ # Installing system-wide would require sudo...
- ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
- ('share/doc/youtube_dl', ['README.txt']),
- ('share/man/man1', ['youtube-dl.1'])
- ]
+ 'data_files': data_files,
}
if setuptools_available:
params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
'Michael Orlitzky',
'Chris Gahan',
'Saimadhav Heblikar',
+ 'Mike Col',
)
__license__ = 'Public Domain'
+from __future__ import unicode_literals
+
from .common import FileDownloader
from .hls import HlsFD
from .http import HttpFD
def get_suitable_downloader(info_dict):
"""Get the downloader class that can handle the info dict."""
url = info_dict['url']
+ protocol = info_dict.get('protocol')
if url.startswith('rtmp'):
return RtmpFD
- if determine_ext(url) == u'm3u8':
+ if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
return HlsFD
if url.startswith('mms') or url.startswith('rtsp'):
return MplayerFD
from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
+from .huffpost import HuffPostIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
from .imdb import (
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
+from .la7 import LA7IE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE
from .lynda import (
LyndaCourseIE
)
from .macgamestore import MacGameStoreIE
+from .malemotion import MalemotionIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .viki import VikiIE
from .vk import VKIE
from .wat import WatIE
-from .websurg import WeBSurgIE
from .weibo import WeiboIE
from .wimp import WimpIE
from .wistia import WistiaIE
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
- "http", "https", "rtsp", "rtmp" or so.
+ "http", "https", "rtsp", "rtmp", "m3u8" or so.
* preference Order number of this format. If this field is
present and not None, the formats get sorted
by this field.
return RATING_TABLE.get(rating.lower(), None)
def _sort_formats(self, formats):
+ if not formats:
+ raise ExtractorError(u'No video formats found')
+
def _formats_key(f):
# TODO remove the following workaround
from ..utils import determine_ext
# Look for embedded Facebook player
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Facebook')
+ # Look for embedded Huffington Post player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'HuffPost')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ unified_strdate,
+)
+
+
+class HuffPostIE(InfoExtractor):
+ IE_DESC = 'Huffington Post'
+ _VALID_URL = r'''(?x)
+ https?://(embed\.)?live\.huffingtonpost\.com/
+ (?:
+ r/segment/[^/]+/|
+ HPLEmbedPlayer/\?segmentId=
+ )
+ (?P<id>[0-9a-f]+)'''
+
+ _TEST = {
+ 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
+ 'file': '52dd3e4b02a7602131000677.mp4',
+ 'md5': '55f5e8981c1c80a64706a44b74833de8',
+ 'info_dict': {
+ 'title': 'Legalese It! with @MikeSacksHP',
+ 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
+ 'duration': 1549,
+ 'upload_date': '20140124',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
+ data = self._download_json(api_url, video_id)['data']
+
+ video_title = data['title']
+ duration = parse_duration(data['running_time'])
+ upload_date = unified_strdate(data['schedule']['starts_at'])
+ description = data.get('description')
+
+ thumbnails = []
+ for url in data['images'].values():
+ m = re.match('.*-([0-9]+x[0-9]+)\.', url)
+ if not m:
+ continue
+ thumbnails.append({
+ 'url': url,
+ 'resolution': m.group(1),
+ })
+
+ formats = [{
+ 'format': key,
+ 'format_id': key.replace('/', '.'),
+ 'ext': 'mp4',
+ 'url': url,
+ 'vcodec': 'none' if key.startswith('audio/') else None,
+ } for key, url in data['sources']['live'].items()]
+ if data.get('fivemin_id'):
+ fid = data['fivemin_id']
+ fcat = str(int(fid) // 100 + 1)
+ furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
+ formats.append({
+ 'format': 'fivemin',
+ 'url': furl,
+ 'preference': 1,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': description,
+ 'formats': formats,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ }
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+)
+
+
+class LA7IE(InfoExtractor):
+ IE_NAME = 'la7.tv'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?la7\.tv/
+ (?:
+ richplayer/\?assetid=|
+ \?contentId=
+ )
+ (?P<id>[0-9]+)'''
+
+ _TEST = {
+ 'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
+ 'file': '50355319.mp4',
+ 'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
+ 'info_dict': {
+ 'title': 'IL DIVO',
+ 'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
+ 'duration': 6254,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
+ doc = self._download_xml(xml_url, video_id)
+
+ video_title = doc.find('title').text
+ description = doc.find('description').text
+ duration = parse_duration(doc.find('duration').text)
+ thumbnail = doc.find('img').text
+ view_count = int(doc.find('views').text)
+
+ prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:')
+
+ formats = [{
+ 'format': vnode.find('quality').text,
+ 'tbr': int(vnode.find('quality').text),
+ 'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
+ } for vnode in doc.findall('.//videos/video')]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'view_count': view_count,
+ }
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+)
+
+class MalemotionIE(InfoExtractor):
+ _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
+ _TEST = {
+ 'url': 'http://malemotion.com/video/bien-dur.10ew',
+ 'file': '10ew.mp4',
+ 'md5': 'b3cc49f953b107e4a363cdff07d100ce',
+ 'info_dict': {
+ "title": "Bien dur",
+ "age_limit": 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group("id")
+
+ webpage = self._download_webpage(url, video_id)
+
+ self.report_extraction(video_id)
+
+ # Extract video URL
+ video_url = compat_urllib_parse.unquote(
+ self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
+
+ # Extract title
+ video_title = self._html_search_regex(
+ r'<title>(.*?)</title', webpage, 'title')
+
+ # Extract video thumbnail
+ video_thumbnail = self._search_regex(
+ r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': 'mp4',
+ 'preference': 1,
+ }]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'uploader': None,
+ 'upload_date': None,
+ 'title': video_title,
+ 'thumbnail': video_thumbnail,
+ 'description': None,
+ 'age_limit': 18,
+ }
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
class TumblrIE(InfoExtractor):
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
_TEST = {
- u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
- u'file': u'54196191430.mp4',
- u'md5': u'479bb068e5b16462f5176a6828829767',
- u'info_dict': {
- u"title": u"tatiana maslany news"
+ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
+ 'file': '54196191430.mp4',
+ 'md5': '479bb068e5b16462f5176a6828829767',
+ 'info_dict': {
+ "title": "tatiana maslany news"
}
}
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
video = re.search(re_video, webpage)
if video is None:
- raise ExtractorError(u'Unable to extract video')
+ raise ExtractorError('Unable to extract video')
video_url = video.group('video_url')
ext = video.group('ext')
- video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
- webpage, u'thumbnail', fatal=False) # We pick the first poster
- if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
+ video_thumbnail = self._search_regex(
+ r'posters.*?\[\\x22(.*?)\\x22',
+ webpage, 'thumbnail', fatal=False) # We pick the first poster
+ if video_thumbnail:
+ video_thumbnail = video_thumbnail.replace('\\\\/', '/')
# The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos
video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
- webpage, u'title', flags=re.DOTALL)
+ webpage, 'title', flags=re.DOTALL)
return [{'id': video_id,
'url': video_url,
+++ /dev/null
-# coding: utf-8
-
-import re
-
-from ..utils import (
- compat_urllib_request,
- compat_urllib_parse
-)
-
-from .common import InfoExtractor
-
-class WeBSurgIE(InfoExtractor):
- IE_NAME = u'websurg.com'
- _VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)'
-
- _TEST = {
- u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012',
- u'file': u'vd01en4012.mp4',
- u'params': {
- u'skip_download': True,
- },
- u'skip': u'Requires login information',
- }
-
- _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1'
-
- def _real_initialize(self):
-
- login_form = {
- 'username': self._downloader.params['username'],
- 'password': self._downloader.params['password'],
- 'Submit': 1
- }
-
- request = compat_urllib_request.Request(
- self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
- request.add_header(
- 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8')
- compat_urllib_request.urlopen(request).info()
- webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in')
-
- if webpage != 'OK':
- self._downloader.report_error(
- u'Unable to log in: bad username/password')
-
- def _real_extract(self, url):
- video_id = re.match(self._VALID_URL, url).group(1)
-
- webpage = self._download_webpage(url, video_id)
-
- url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage)
-
- return {'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- 'ext' : 'mp4',
- 'url' : url_info.group(1) + '/' + url_info.group(2),
- 'thumbnail': self._og_search_thumbnail(webpage)
- }
-__version__ = '2014.01.23.4'
+__version__ = '2014.01.27.1'