[^<]+?]+?>([^<]+?)', webpage)
if mobj is None:
- self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
- return
- video_uploader = mobj.group(1)
+ # lookin for official user
+ mobj_official = re.search(r']+?>([^<]+?)', webpage)
+ if mobj_official is None:
+ self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
+ else:
+ video_uploader = mobj_official.group(1)
+ else:
+ video_uploader = mobj.group(1)
+
+ video_upload_date = u'NA'
+ mobj = re.search(r'([0-9]{2})-([0-9]{2})-([0-9]{4})
', webpage)
+ if mobj is not None:
+ video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
return [{
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
- 'upload_date': u'NA',
+ 'upload_date': video_upload_date,
'title': video_title,
'ext': video_extension.decode('utf-8'),
'format': u'NA',
@@ -729,7 +754,7 @@ class GoogleIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Extract URL, uploader, and title from webpage
@@ -768,7 +793,7 @@ class GoogleIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
mobj = re.search(r'', webpage)
if mobj is None:
@@ -824,7 +849,7 @@ class PhotobucketIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Extract URL, uploader, and title from webpage
@@ -894,7 +919,7 @@ class YahooIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@ -918,7 +943,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Extract uploader and title from webpage
@@ -976,7 +1001,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Extract media URL from playlist XML
@@ -1005,7 +1030,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
+ _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
IE_NAME = u'vimeo'
def __init__(self, downloader=None):
@@ -1034,7 +1059,7 @@ class VimeoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Now we begin extracting as much information as we can from what we
@@ -1075,21 +1100,32 @@ class VimeoIE(InfoExtractor):
timestamp = config['request']['timestamp']
# Vimeo specific: extract video codec and quality information
+ # First consider quality, then codecs, then take everything
# TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
- for codec in codecs:
- if codec[0] in config["video"]["files"]:
- video_codec = codec[0]
- video_extension = codec[1]
- if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
- else: quality = 'sd'
+ files = { 'hd': [], 'sd': [], 'other': []}
+ for codec_name, codec_extension in codecs:
+ if codec_name in config["video"]["files"]:
+ if 'hd' in config["video"]["files"][codec_name]:
+ files['hd'].append((codec_name, codec_extension, 'hd'))
+ elif 'sd' in config["video"]["files"][codec_name]:
+ files['sd'].append((codec_name, codec_extension, 'sd'))
+ else:
+ files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+
+ for quality in ('hd', 'sd', 'other'):
+ if len(files[quality]) > 0:
+ video_quality = files[quality][0][2]
+ video_codec = files[quality][0][0]
+ video_extension = files[quality][0][1]
+ self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
break
else:
self._downloader.trouble(u'ERROR: no known codec found')
return
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
- %(video_id, sig, timestamp, quality, video_codec.upper())
+ %(video_id, sig, timestamp, video_quality, video_codec.upper())
return [{
'id': video_id,
@@ -1104,6 +1140,143 @@ class VimeoIE(InfoExtractor):
}]
+class ArteTvIE(InfoExtractor):
+ """arte.tv information extractor."""
+
+ _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
+ _LIVE_URL = r'index-[0-9]+\.html$'
+
+ IE_NAME = u'arte.tv'
+
+ def __init__(self, downloader=None):
+ InfoExtractor.__init__(self, downloader)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
+
+ def report_extraction(self, video_id):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
+
+ def fetch_webpage(self, url):
+ self._downloader.increment_downloads()
+ request = urllib2.Request(url)
+ try:
+ self.report_download_webpage(url)
+ webpage = urllib2.urlopen(request).read()
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ return
+ except ValueError, err:
+ self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ return
+ return webpage
+
+ def grep_webpage(self, url, regex, regexFlags, matchTuples):
+ page = self.fetch_webpage(url)
+ mobj = re.search(regex, page, regexFlags)
+ info = {}
+
+ if mobj is None:
+ self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ return
+
+ for (i, key, err) in matchTuples:
+ if mobj.group(i) is None:
+ self._downloader.trouble(err)
+ return
+ else:
+ info[key] = mobj.group(i)
+
+ return info
+
+ def extractLiveStream(self, url):
+ video_lang = url.split('/')[-4]
+ info = self.grep_webpage(
+ url,
+ r'src="(.*?/videothek_js.*?\.js)',
+ 0,
+ [
+ (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ ]
+ )
+ http_host = url.split('/')[2]
+ next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
+ info = self.grep_webpage(
+ next_url,
+ r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
+ '(http://.*?\.swf).*?' +
+ '(rtmp://.*?)\'',
+ re.DOTALL,
+ [
+ (1, 'path', u'ERROR: could not extract video path: %s' % url),
+ (2, 'player', u'ERROR: could not extract video player: %s' % url),
+ (3, 'url', u'ERROR: could not extract video url: %s' % url)
+ ]
+ )
+ video_url = u'%s/%s' % (info.get('url'), info.get('path'))
+
+ def extractPlus7Stream(self, url):
+ video_lang = url.split('/')[-3]
+ info = self.grep_webpage(
+ url,
+ r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
+ 0,
+ [
+ (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ ]
+ )
+ next_url = urllib.unquote(info.get('url'))
+ info = self.grep_webpage(
+ next_url,
+ r'