X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=85ffad18c61efce1431c07aabba3a56c3cfef56c;hb=18ea0cefc370807e1d31fd7b462691f29f48c8fe;hp=3cdce3b259983866855721374fb1c9e0db608b80;hpb=89af8e9d32f550bdf00da9eab3288ad1e104d3b3;p=youtube-dl
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 3cdce3b25..85ffad18c 100644
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -238,7 +238,7 @@ class YoutubeIE(InfoExtractor):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
- self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
+ self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % u(err))
return
# Set language
@@ -247,7 +247,7 @@ class YoutubeIE(InfoExtractor):
self.report_lang()
urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
+ self._downloader.to_stderr(u'WARNING: unable to set language: %s' % u(err))
return
# No authentication to be performed
@@ -270,7 +270,7 @@ class YoutubeIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
+ self._downloader.to_stderr(u'WARNING: unable to log in: %s' % u(err))
return
# Confirm age
@@ -283,7 +283,7 @@ class YoutubeIE(InfoExtractor):
self.report_age_confirmation()
age_results = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: unable to confirm age: %s' % u(err))
return
def _real_extract(self, url):
@@ -305,7 +305,7 @@ class YoutubeIE(InfoExtractor):
try:
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return
# Attempt to extract SWF player URL
@@ -327,7 +327,7 @@ class YoutubeIE(InfoExtractor):
if 'token' in video_info:
break
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % u(err))
return
if 'token' not in video_info:
if 'reason' in video_info:
@@ -390,7 +390,7 @@ class YoutubeIE(InfoExtractor):
try:
srt_list = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
+ raise Trouble(u'WARNING: unable to download video subtitles: %s' % u(err))
srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
if not srt_lang_list:
@@ -407,7 +407,7 @@ class YoutubeIE(InfoExtractor):
try:
srt_xml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
+ raise Trouble(u'WARNING: unable to download video subtitles: %s' % u(err))
if not srt_xml:
raise Trouble(u'WARNING: unable to download video subtitles')
video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
@@ -526,7 +526,7 @@ class MetacafeIE(InfoExtractor):
self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % u(err))
return
# Confirm age
@@ -539,7 +539,7 @@ class MetacafeIE(InfoExtractor):
self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: unable to confirm age: %s' % u(err))
return
def _real_extract(self, url):
@@ -563,7 +563,7 @@ class MetacafeIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % u(err))
return
# Extract URL, uploader and title from webpage
@@ -656,7 +656,7 @@ class DailymotionIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % u(err))
return
# Extract URL, uploader and title from webpage
@@ -754,7 +754,7 @@ class GoogleIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Extract URL, uploader, and title from webpage
@@ -793,7 +793,7 @@ class GoogleIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
mobj = re.search(r'
', webpage)
if mobj is None:
@@ -849,7 +849,7 @@ class PhotobucketIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Extract URL, uploader, and title from webpage
@@ -919,7 +919,7 @@ class YahooIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@ -943,7 +943,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Extract uploader and title from webpage
@@ -1001,7 +1001,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Extract media URL from playlist XML
@@ -1059,7 +1059,7 @@ class VimeoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
# Now we begin extracting as much information as we can from what we
@@ -1140,6 +1140,143 @@ class VimeoIE(InfoExtractor):
}]
+class ArteTvIE(InfoExtractor):
+ """arte.tv information extractor."""
+
+ _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
+ _LIVE_URL = r'index-[0-9]+\.html$'
+
+ IE_NAME = u'arte.tv'
+
+ def __init__(self, downloader=None):
+ InfoExtractor.__init__(self, downloader)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
+
+ def report_extraction(self, video_id):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
+
+ def fetch_webpage(self, url):
+ self._downloader.increment_downloads()
+ request = urllib2.Request(url)
+ try:
+ self.report_download_webpage(url)
+ webpage = urllib2.urlopen(request).read()
+ except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ return
+ except ValueError, err:
+ self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ return
+ return webpage
+
+ def grep_webpage(self, url, regex, regexFlags, matchTuples):
+ page = self.fetch_webpage(url)
+ mobj = re.search(regex, page, regexFlags)
+ info = {}
+
+ if mobj is None:
+ self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+ return
+
+ for (i, key, err) in matchTuples:
+ if mobj.group(i) is None:
+ self._downloader.trouble(err)
+ return
+ else:
+ info[key] = mobj.group(i)
+
+ return info
+
+ def extractLiveStream(self, url):
+ video_lang = url.split('/')[-4]
+ info = self.grep_webpage(
+ url,
+ r'src="(.*?/videothek_js.*?\.js)',
+ 0,
+ [
+ (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ ]
+ )
+ http_host = url.split('/')[2]
+ next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
+ info = self.grep_webpage(
+ next_url,
+ r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
+ '(http://.*?\.swf).*?' +
+ '(rtmp://.*?)\'',
+ re.DOTALL,
+ [
+ (1, 'path', u'ERROR: could not extract video path: %s' % url),
+ (2, 'player', u'ERROR: could not extract video player: %s' % url),
+ (3, 'url', u'ERROR: could not extract video url: %s' % url)
+ ]
+ )
+ video_url = u'%s/%s' % (info.get('url'), info.get('path'))
+
+ def extractPlus7Stream(self, url):
+ video_lang = url.split('/')[-3]
+ info = self.grep_webpage(
+ url,
+ r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
+ 0,
+ [
+ (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ ]
+ )
+ next_url = urllib.unquote(info.get('url'))
+ info = self.grep_webpage(
+ next_url,
+ r'