errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
- def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
- """ Returns the data of the page as a string """
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
+ """ Returns a tuple (page content as string, URL handle) """
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
content_type = urlh.headers.get('Content-Type', '')
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
self.to_screen(u'Dumping request to ' + url)
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
- return webpage_bytes.decode(encoding, 'replace')
+ content = webpage_bytes.decode(encoding, 'replace')
+ return (content, urlh)
+
+ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
+ """ Returns the data of the page as a string """
+ return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'"""
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow
$"""
- _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
+ _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
# Log in
login_form_strs = {
- u'continue': u'http://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+ u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
u'Email': username,
u'GALX': galx,
u'Passwd': password,
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
- url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+ url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
video_id = self._extract_id(url)
# Get video webpage
self.report_video_webpage_download(video_id)
- url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+ url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
request = compat_urllib_request.Request(url)
try:
video_webpage_bytes = compat_urllib_request.urlopen(request).read()
'ext': video_extension.decode('utf-8'),
}]
-class RedtubeIE(InfoExtractor):
- """Information Extractor for redtube"""
- _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
- IE_NAME = u'redtube'
-
- def _real_extract(self,url):
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- self._downloader.report_error(u'invalid URL: %s' % url)
- return
- video_id = mobj.group('id')
- video_extension = 'mp4'
- webpage = self._download_webpage(url, video_id)
- self.report_extraction(video_id)
- mobj = re.search(r'<source src="'+'(.+)'+'" type="video/mp4">',webpage)
- if mobj is not None:
- video_url = mobj.group(1)
- else:
- self._downloader.report_error(u'unable to extract media URL')
- return
- mobj = re.search('<h1 class="videoTitle slidePanelMovable">'+r'(.+)'+r'</h1>',webpage)
- if mobj is not None:
- video_title = mobj.group(1)
- else:
- video_title = 'Redtube - %s' % time.ctime()
-
- return [{
- 'id': video_id,
- 'url': video_url,
- 'ext': video_extension,
- 'title': video_title,
- }]
-
class DailymotionIE(InfoExtractor):
"""Information Extractor for Dailymotion"""
epTitle = mobj.group('episode')
self.report_extraction(epTitle)
- webpage = self._download_webpage(url, epTitle)
+ webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
if dlNewest:
url = htmlHandle.geturl()
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None:
- self._downloader.report_error(u'Invalid redirected URL: ' + url)
- return
+ raise ExtractorError(u'Invalid redirected URL: ' + url)
if mobj.group('episode') == '':
- self._downloader.report_error(u'Redirected URL is still not specific: ' + url)
- return
+ raise ExtractorError(u'Redirected URL is still not specific: ' + url)
epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
if len(altMovieParams) == 0:
- self._downloader.report_error(u'unable to find Flash URL in webpage ' + url)
- return
+ raise ExtractorError(u'unable to find Flash URL in webpage ' + url)
else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
IE_NAME = u'WorldStarHipHop'
def _real_extract(self, url):
- _src_url = r"""(http://(hw-videos|hw-post1).*(?:mp4|flv))"""
+ _src_url = r'so\.addVariable\("file","(.*?)"\)'
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
mobj = re.search(_src_url, webpage_src)
if mobj is not None:
- video_url = mobj.group()
+ video_url = mobj.group(1)
if 'mp4' in video_url:
ext = 'mp4'
else:
return [track_info]
+class RedTubeIE(InfoExtractor):
+ """Information Extractor for redtube"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+
+ video_id = mobj.group('id')
+ video_extension = 'mp4'
+ webpage = self._download_webpage(url, video_id)
+ self.report_extraction(video_id)
+ mobj = re.search(r'<source src="'+'(.+)'+'" type="video/mp4">',webpage)
+
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract media URL')
+
+ video_url = mobj.group(1)
+ mobj = re.search('<h1 class="videoTitle slidePanelMovable">(.+)</h1>',webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ }]
+
def gen_extractors():
""" Return a list of an instance of every supported extractor.
TEDIE(),
MySpassIE(),
SpiegelIE(),
- RedtubeIE(),
LiveLeakIE(),
ARDIE(),
TumblrIE(),
BandcampIE(),
+ RedTubeIE(),
GenericIE()
]