(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
- def report_webpage(self, video_id):
- """Report information extraction"""
- self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
-
- def report_extraction(self, video_id):
- """Report information extraction"""
- self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
- self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
- return
+ raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(1)
- self.report_webpage(video_id)
-
# Get webpage content
- try:
- webpage_bytes = compat_urllib_request.urlopen(url).read()
- webpage = webpage_bytes.decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
- return
+ webpage = self._download_webpage(url, video_id)
result = re.search(self.VIDEO_URL_RE, webpage)
if result is None:
- self._downloader.trouble(u'ERROR: unable to extract video url')
- return
+ raise ExtractorError(u'Unable to extract video url')
video_url = compat_urllib_parse.unquote(result.group(1))
result = re.search(self.VIDEO_TITLE_RE, webpage)
if result is None:
- self._downloader.trouble(u'ERROR: unable to extract video title')
- return
+ raise ExtractorError(u'Unable to extract video title')
video_title = result.group(1)
result = re.search(self.VIDEO_THUMB_RE, webpage)
if result is None:
- self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
- return
+ raise ExtractorError(u'Unable to extract video thumbnail')
video_thumbnail = result.group(1)
return [{
@@ -3470,52 +2980,43 @@ class XNXXIE(InfoExtractor):
class GooglePlusIE(InfoExtractor):
"""Information extractor for plus.google.com."""
- _VALID_URL = r'(?:https://)?plus\.google\.com/(?:\w+/)*?(\d+)/posts/(\w+)'
+ _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
IE_NAME = u'plus.google'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
def report_extract_entry(self, url):
"""Report downloading extry"""
- self._downloader.to_screen(u'[plus.google] Downloading entry: %s' % url.decode('utf-8'))
+ self.to_screen(u'Downloading entry: %s' % url)
def report_date(self, upload_date):
"""Report downloading extry"""
- self._downloader.to_screen(u'[plus.google] Entry date: %s' % upload_date)
+ self.to_screen(u'Entry date: %s' % upload_date)
def report_uploader(self, uploader):
"""Report downloading extry"""
- self._downloader.to_screen(u'[plus.google] Uploader: %s' % uploader.decode('utf-8'))
+ self.to_screen(u'Uploader: %s' % uploader)
def report_title(self, video_title):
"""Report downloading extry"""
- self._downloader.to_screen(u'[plus.google] Title: %s' % video_title.decode('utf-8'))
+ self.to_screen(u'Title: %s' % video_title)
def report_extract_vid_page(self, video_page):
"""Report information extraction."""
- self._downloader.to_screen(u'[plus.google] Extracting video page: %s' % video_page.decode('utf-8'))
+ self.to_screen(u'Extracting video page: %s' % video_page)
def _real_extract(self, url):
# Extract id from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
- self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
- return
+ raise ExtractorError(u'Invalid URL: %s' % url)
post_url = mobj.group(0)
- video_id = mobj.group(2)
+ video_id = mobj.group(1)
video_extension = 'flv'
# Step 1, Retrieve post webpage to extract further information
self.report_extract_entry(post_url)
- request = compat_urllib_request.Request(post_url)
- try:
- webpage = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
- return
+ webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
# Extract update date
upload_date = None
@@ -3549,15 +3050,10 @@ class GooglePlusIE(InfoExtractor):
pattern = '"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]'
mobj = re.search(pattern, webpage)
if mobj is None:
- self._downloader.trouble(u'ERROR: unable to extract video page URL')
+ raise ExtractorError(u'Unable to extract video page URL')
video_page = mobj.group(1)
- request = compat_urllib_request.Request(video_page)
- try:
- webpage = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
- return
+ webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
self.report_extract_vid_page(video_page)
@@ -3566,7 +3062,7 @@ class GooglePlusIE(InfoExtractor):
pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
mobj = re.findall(pattern, webpage)
if len(mobj) == 0:
- self._downloader.trouble(u'ERROR: unable to extract video links')
+ raise ExtractorError(u'Unable to extract video links')
# Sort in resolution
links = sorted(mobj)
@@ -3576,43 +3072,35 @@ class GooglePlusIE(InfoExtractor):
# Only get the url. The resolution part in the tuple has no use anymore
video_url = video_url[-1]
# Treat escaped \u0026 style hex
- video_url = unicode(video_url, "unicode_escape")
+ try:
+ video_url = video_url.decode("unicode_escape")
+ except AttributeError: # Python 3
+ video_url = bytes(video_url, 'ascii').decode('unicode-escape')
return [{
- 'id': video_id.decode('utf-8'),
+ 'id': video_id,
'url': video_url,
- 'uploader': uploader.decode('utf-8'),
- 'upload_date': upload_date.decode('utf-8'),
- 'title': video_title.decode('utf-8'),
- 'ext': video_extension.decode('utf-8'),
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'title': video_title,
+ 'ext': video_extension,
}]
class NBAIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*)(\?.*)?$'
IE_NAME = u'nba'
- def report_extraction(self, video_id):
- self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
- self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
- return
+ raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(1)
if video_id.endswith('/index.html'):
video_id = video_id[:-len('/index.html')]
- self.report_extraction(video_id)
- try:
- urlh = compat_urllib_request.urlopen(url)
- webpage_bytes = urlh.read()
- webpage = webpage_bytes.decode('utf-8', 'ignore')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
- return
+ webpage = self._download_webpage(url, video_id)
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
def _findProp(rexp, default=None):
@@ -3633,3 +3121,1051 @@ class NBAIE(InfoExtractor):
'description': _findProp(r'