from __future__ import absolute_import
+import base64
import datetime
import netrc
import os
def IE_NAME(self):
return type(self).__name__[:-2]
- def _download_webpage(self, url, video_id, note=None, errnote=None):
+ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
if note is None:
note = u'Downloading video webpage'
self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
try:
- urlh = compat_urllib_request.urlopen(url)
+ urlh = compat_urllib_request.urlopen(url_or_request)
webpage_bytes = urlh.read()
return webpage_bytes.decode('utf-8', 'replace')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
- raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)))
+ raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
class YoutubeIE(InfoExtractor):
# uploader_id
video_uploader_id = None
- mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/user/([^"]+)">', video_webpage)
+ mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
if mobj is not None:
video_uploader_id = mobj.group(1)
else:
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
-
def report_extraction(self, video_id):
"""Report information extraction."""
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off')
- try:
- self.report_download_webpage(video_id)
- webpage_bytes = compat_urllib_request.urlopen(request).read()
- webpage = webpage_bytes.decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
- return
+ webpage = self._download_webpage(request, video_id)
# Extract URL, uploader and title from webpage
self.report_extraction(video_id)
class InfoQIE(InfoExtractor):
"""Information extractor for infoq.com"""
-
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
- IE_NAME = u'infoq'
-
- def report_webpage(self, video_id):
- """Report information extraction."""
- self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
def report_extraction(self, video_id):
"""Report information extraction."""
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
- self.report_webpage(url)
-
- request = compat_urllib_request.Request(url)
- try:
- webpage = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
- return
-
+ webpage = self._download_webpage(url, video_id=url)
self.report_extraction(url)
-
# Extract video URL
mobj = re.search(r"jsclassref='([^']*)'", webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
- video_url = 'rtmpe://video.infoq.com/cfx/st/' + compat_urllib_parse.unquote(mobj.group(1).decode('base64'))
-
+ real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
+ video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
# Extract title
mobj = re.search(r'contentTitle = "(.*?)";', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
- video_title = mobj.group(1).decode('utf-8')
+ video_title = mobj.group(1)
# Extract description
video_description = u'No description available.'
mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
if mobj is not None:
- video_description = mobj.group(1).decode('utf-8')
+ video_description = mobj.group(1)
video_filename = video_url.split('/')[-1]
video_id, extension = video_filename.split('.')
videourl = 'http://store.steampowered.com/video/%s/' % gameID
webpage = self._download_webpage(videourl, gameID)
mweb = re.finditer(urlRE, webpage)
- namesRE = r'<span class=\"title\">(?P<videoName>[\w:/\.\?=\+\s-]+)</span>'
- titles = list(re.finditer(namesRE, webpage))
+ namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
+ titles = re.finditer(namesRE, webpage)
videos = []
for vid,vtitle in zip(mweb,titles):
video_id = vid.group('videoID')
'id':video_id,
'url':video_url,
'ext': 'flv',
- 'title': title
+ 'title': unescapeHTML(title)
}
videos.append(info)
return videos