class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
- _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
+ _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
# upload date
upload_date = u'NA'
- mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
+ mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
if mobj is not None:
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
# Decide which formats to download
req_format = self._downloader.params.get('format', None)
- if 'fmt_url_map' in video_info:
+ if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]:
url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
format_limit = self._downloader.params.get('format_limit', None)
if format_limit is not None and format_limit in self._available_formats:
# Retrieve video webpage to extract further information
request = urllib2.Request(url)
+ request.add_header('Cookie', 'family_filter=off')
try:
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
# Extract URL, uploader and title from webpage
self.report_extraction(video_id)
- mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
+ mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract media URL')
return
- mediaURL = urllib.unquote(mobj.group(1))
+ sequence = urllib.unquote(mobj.group(1))
+ mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
+ if mobj is None:
+ self._downloader.trouble(u'ERROR: unable to extract media URL')
+ return
+ mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
# if needed add http://www.dailymotion.com/ if relative URL
video_url = mediaURL
- # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
- mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
+ mobj = re.search(r'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
video_title = mobj.group(1).decode('utf-8')
video_title = sanitize_title(video_title)
- mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
+ mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
@staticmethod
def get_audio_codec(path):
- handle = subprocess.Popen(['ffprobe', '-show_streams', path],
- stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
- output = handle.communicate()[0]
- if handle.wait() != 0:
+ try:
+ cmd = ['ffprobe', '-show_streams', '--', path]
+ handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
+ output = handle.communicate()[0]
+ if handle.wait() != 0:
+ return None
+ except (IOError, OSError):
return None
audio_codec = None
for line in output.split('\n'):
@staticmethod
def run_ffmpeg(path, out_path, codec, more_opts):
try:
- ret = subprocess.call(['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + [out_path],
- stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
+ cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
+ ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
return (ret == 0)
except (IOError, OSError):
return False
filecodec = self.get_audio_codec(path)
if filecodec is None:
- self._downloader.to_stderr(u'WARNING: no audio codec found in file')
+ self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
return None
more_opts = []
status = self.run_ffmpeg(path, new_path, acodec, more_opts)
if not status:
- self._downloader.to_stderr(u'WARNING: error running ffmpeg' % ret)
+ self._downloader.to_stderr(u'WARNING: error running ffmpeg')
return None
try:
# Parse command line
parser = optparse.OptionParser(
usage='Usage: %prog [options] url...',
- version='2011.02.25',
+ version='2011.03.29',
conflict_handler='resolve',
)