'Sören Schulze',
'Kevin Ngo',
'Ori Avtalion',
+ 'shizeeg',
)
__license__ = 'Public Domain'
-__version__ = '2011.11.22'
+__version__ = '2011.11.23'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
self.trouble(u'ERROR: invalid system charset or erroneous output template')
return None
+ def _match_entry(self, info_dict):
+ """ Returns None iff the file should be downloaded """
+
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
+ return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
+ return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ return None
+
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
+
+ reason = self._match_entry(info_dict)
+ if reason is not None:
+ self.to_screen(u'[download] ' + reason)
+ return
+
+ max_downloads = self.params.get('max_downloads')
+ if max_downloads is not None:
+ if self._num_downloads > int(max_downloads):
+ self.to_screen(u'[download] Maximum number of downloads reached. Skipping ' + info_dict['title'])
+ return
+
filename = self.prepare_filename(info_dict)
# Forced printings
if filename is None:
return
- matchtitle=self.params.get('matchtitle',False)
- rejecttitle=self.params.get('rejecttitle',False)
- title=info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
- if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
- self.to_screen(u'[download] "%s" title did not match pattern "%s"' % (title, matchtitle))
- return
- if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
- self.to_screen(u'[download] "%s" title matched reject pattern "%s"' % (title, rejecttitle))
- return
-
if self.params.get('nooverwrites', False) and os.path.exists(filename):
self.to_stderr(u'WARNING: file exists and will be skipped')
return
video_url = mediaURL
- mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
+ mobj = re.search(r'(?im)<title>\s*(.+)\s*-\s*Video\s+Dailymotion</title>', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = mobj.group(1).decode('utf-8')
- simple_title = _simple_title(video_title)
+ simple_title = _simplify_title(video_title)
# Extract uploader
mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
except UnavailableVideoError, err:
self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
+class MixcloudIE(InfoExtractor):
+ """Information extractor for www.mixcloud.com"""
+ _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
+ IE_NAME = u'mixcloud'
+
+ def __init__(self, downloader=None):
+ InfoExtractor.__init__(self, downloader)
+
+ def report_download_json(self, file_id):
+ """Report JSON download."""
+ self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME)
+
+ def report_extraction(self, file_id):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
+
+ def get_urls(self, jsonData, fmt, bitrate='best'):
+ """Get urls from 'audio_formats' section in json"""
+ file_url = None
+ try:
+ bitrate_list = jsonData[fmt]
+ if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
+ bitrate = max(bitrate_list) # select highest
+
+ url_list = jsonData[fmt][bitrate]
+ except TypeError: # we have no bitrate info.
+ url_list = jsonData[fmt]
+
+ return url_list
+
+ def check_urls(self, url_list):
+ """Returns 1st active url from list"""
+ for url in url_list:
+ try:
+ urllib2.urlopen(url)
+ return url
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ url = None
+
+ return None
+
+ def _print_formats(self, formats):
+ print 'Available formats:'
+ for fmt in formats.keys():
+ for b in formats[fmt]:
+ try:
+ ext = formats[fmt][b][0]
+ print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
+ except TypeError: # we have no bitrate info
+ ext = formats[fmt][0]
+ print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
+ break
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+ return
+ # extract uploader & filename from url
+ uploader = mobj.group(1).decode('utf-8')
+ file_id = uploader + "-" + mobj.group(2).decode('utf-8')
+
+ # construct API request
+ file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
+ # retrieve .json file with links to files
+ request = urllib2.Request(file_url)
+ try:
+ self.report_download_json(file_url)
+ jsonData = urllib2.urlopen(request).read()
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
+ return
+
+ # parse JSON
+ json_data = json.loads(jsonData)
+ player_url = json_data['player_swf_url']
+ formats = dict(json_data['audio_formats'])
+
+ req_format = self._downloader.params.get('format', None)
+ bitrate = None
+
+ if self._downloader.params.get('listformats', None):
+ self._print_formats(formats)
+ return
+
+ if req_format is None or req_format == 'best':
+ for format_param in formats.keys():
+ url_list = self.get_urls(formats, format_param)
+ # check urls
+ file_url = self.check_urls(url_list)
+ if file_url is not None:
+ break # got it!
+ else:
+ if req_format not in formats.keys():
+ self._downloader.trouble(u'ERROR: format is not available')
+ return
+
+ url_list = self.get_urls(formats, req_format)
+ file_url = self.check_urls(url_list)
+ format_param = req_format
+
+ # We have audio
+ self._downloader.increment_downloads()
+ try:
+ # Process file information
+ self._downloader.process_info({
+ 'id': file_id.decode('utf-8'),
+ 'url': file_url.decode('utf-8'),
+ 'uploader': uploader.decode('utf-8'),
+ 'upload_date': u'NA',
+ 'title': json_data['name'],
+ 'stitle': _simplify_title(json_data['name']),
+ 'ext': file_url.split('.')[-1].decode('utf-8'),
+ 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
+ 'thumbnail': json_data['thumbnail_url'],
+ 'description': json_data['description'],
+ 'player_url': player_url.decode('utf-8'),
+ })
+ except UnavailableVideoError, err:
+ self._downloader.trouble(u'ERROR: unable to download file')
+
class PostProcessor(object):
# Deferred imports
import getpass
import optparse
+ import shlex
+
+ def _readOptions(filename):
+ try:
+ optionf = open(filename)
+ except IOError:
+ return [] # silently skip if file is not present
+ try:
+ res = []
+ for l in optionf:
+ res += shlex.split(l, comments=True)
+ finally:
+ optionf.close()
+ return res
def _format_option_string(option):
''' ('-o', '--option') -> -o, --format METAVAR'''
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
+ selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
authentication.add_option('-u', '--username',
dest='username', metavar='USERNAME', help='account username')
action='store_true', dest='autonumber',
help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output',
- dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, and %% for a literal percent')
+ dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites',
parser.add_option_group(authentication)
parser.add_option_group(postproc)
- opts, args = parser.parse_args()
+ xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
+ if xdg_config_home:
+ userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
+ else:
+ userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
+ argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
+ opts, args = parser.parse_args(argv)
return parser, opts, args
XVideosIE(),
SoundcloudIE(),
InfoQIE(),
+ MixcloudIE(),
GenericIE()
]
'writeinfojson': opts.writeinfojson,
'matchtitle': opts.matchtitle,
'rejecttitle': opts.rejecttitle,
+ 'max_downloads': opts.max_downloads,
})
for extractor in extractors:
fd.add_info_extractor(extractor)