+class BandcampIE(InfoExtractor):
+ _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title = mobj.group('title')
+ webpage = self._download_webpage(url, title)
+ # We get the link to the free download page
+ m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
+ if m_download is None:
+ raise ExtractorError(u'No free songs founded')
+
+ download_link = m_download.group(1)
+ id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
+ webpage, re.MULTILINE|re.DOTALL).group('id')
+
+ download_webpage = self._download_webpage(download_link, id,
+ 'Downloading free downloads page')
+ # We get the dictionary of the track from some javascrip code
+ info = re.search(r'items: (.*?),$',
+ download_webpage, re.MULTILINE).group(1)
+ info = json.loads(info)[0]
+ # We pick mp3-320 for now, until format selection can be easily implemented.
+ mp3_info = info[u'downloads'][u'mp3-320']
+ # If we try to use this url it says the link has expired
+ initial_url = mp3_info[u'url']
+ re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
+ m_url = re.match(re_url, initial_url)
+ #We build the url we will use to get the final track url
+ # This url is build in Bandcamp in the script download_bunde_*.js
+ request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
+ final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
+ # If we could correctly generate the .rand field the url would be
+ #in the "download_url" key
+ final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
+
+ track_info = {'id':id,
+ 'title' : info[u'title'],
+ 'ext' : 'mp3',
+ 'url' : final_url,
+ 'thumbnail' : info[u'thumb_url'],
+ 'uploader' : info[u'artist']
+ }
+
+ return [track_info]
+
+class RedTubeIE(InfoExtractor):
+ """Information Extractor for redtube"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+
+ video_id = mobj.group('id')
+ video_extension = 'mp4'
+ webpage = self._download_webpage(url, video_id)
+ self.report_extraction(video_id)
+ mobj = re.search(r'<source src="'+'(.+)'+'" type="video/mp4">',webpage)
+
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract media URL')
+
+ video_url = mobj.group(1)
+ mobj = re.search('<h1 class="videoTitle slidePanelMovable">(.+)</h1>',webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ }]
+
+class InaIE(InfoExtractor):
+ """Information Extractor for Ina.fr"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
+ video_extension = 'mp4'
+ webpage = self._download_webpage(mrss_url, video_id)
+
+ mobj = re.search(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract media URL')
+ video_url = mobj.group(1)
+
+ mobj = re.search(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ }]
+
+class HowcastIE(InfoExtractor):
+ """Information Extractor for Howcast.com"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ webpage_url = 'http://www.howcast.com/videos/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ self.report_extraction(video_id)
+
+ mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video URL')
+ video_url = mobj.group(1)
+
+ mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1) or mobj.group(2)
+
+ mobj = re.search(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', webpage)
+ if mobj is None:
+ self._downloader.report_warning(u'unable to extract description')
+ video_description = None
+ else:
+ video_description = mobj.group(1) or mobj.group(2)
+
+ mobj = re.search(r'<meta content=\'(.+?)\' property=\'og:image\'', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract thumbnail')
+ thumbnail = mobj.group(1)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': thumbnail,
+ }]
+
+class VineIE(InfoExtractor):
+ """Information Extractor for Vine.co"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
+
+ def _real_extract(self, url):
+
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ webpage_url = 'https://vine.co/v/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ self.report_extraction(video_id)
+
+ mobj = re.search(r'<meta property="twitter:player:stream" content="(.+?)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video URL')
+ video_url = mobj.group(1)
+
+ mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1)
+
+ mobj = re.search(r'<meta property="og:image" content="(.+?)(\?.*?)?"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract thumbnail')
+ thumbnail = mobj.group(1)
+
+ mobj = re.search(r'<div class="user">.*?<h2>(.+?)</h2>', webpage, re.DOTALL)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract uploader')
+ uploader = mobj.group(1)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ }]
+
+class FlickrIE(InfoExtractor):
+ """Information Extractor for Flickr videos"""
+ _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ video_uploader_id = mobj.group('uploader_id')
+ webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+
+ mobj = re.search(r"photo_secret: '(\w+)'", webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video secret')
+ secret = mobj.group(1)
+
+ first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
+ first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
+
+ mobj = re.search(r'<Item id="id">(\d+-\d+)</Item>', first_xml)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract node_id')
+ node_id = mobj.group(1)
+
+ second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
+ second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
+
+ self.report_extraction(video_id)
+
+ mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video url')
+ video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
+
+ mobj = re.search(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1) or mobj.group(2)
+
+ mobj = re.search(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
+ if mobj is None:
+ self._downloader.report_warning(u'unable to extract description')
+ video_description = None
+ else:
+ video_description = mobj.group(1) or mobj.group(2)
+
+ mobj = re.search(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract thumbnail')
+ thumbnail = mobj.group(1) or mobj.group(2)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': thumbnail,
+ 'uploader_id': video_uploader_id,
+ }]
+
+class TeamcocoIE(InfoExtractor):
+ _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError(u'Invalid URL: %s' % url)
+ url_title = mobj.group('url_title')
+ webpage = self._download_webpage(url, url_title)
+
+ mobj = re.search(r'<article class="video" data-id="(\d+?)"', webpage)
+ video_id = mobj.group(1)
+
+ self.report_extraction(video_id)
+
+ mobj = re.search(r'<meta property="og:title" content="(.+?)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = mobj.group(1)
+
+ mobj = re.search(r'<meta property="og:image" content="(.+?)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract thumbnail')
+ thumbnail = mobj.group(1)
+
+ mobj = re.search(r'<meta property="og:description" content="(.*?)"', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract description')
+ description = mobj.group(1)
+
+ data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
+ data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
+ mobj = re.search(r'<file type="high".*?>(.*?)</file>', data)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract video url')
+ video_url = mobj.group(1)
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'title': video_title,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ }]
+
+class XHamsterIE(InfoExtractor):
+ """Information Extractor for xHamster"""
+ _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+
+ video_id = mobj.group('id')
+ mrss_url='http://xhamster.com/movies/%s/.html' % video_id
+ webpage = self._download_webpage(mrss_url, video_id)
+ mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract media URL')
+ if len(mobj.group('server')) == 0:
+ video_url = compat_urllib_parse.unquote(mobj.group('file'))
+ else:
+ video_url = mobj.group('server')+'/key='+mobj.group('file')
+ video_extension = video_url.split('.')[-1]
+
+ mobj = re.search(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract title')
+ video_title = unescapeHTML(mobj.group('title'))
+
+ mobj = re.search(r'<span>Description: </span>(?P<description>[^<]+)', webpage)
+ if mobj is None:
+ video_description = u''
+ else:
+ video_description = unescapeHTML(mobj.group('description'))
+
+ mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract upload date')
+ video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
+
+ mobj = re.search(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^>]+)', webpage)
+ if mobj is None:
+ video_uploader_id = u'anonymous'
+ else:
+ video_uploader_id = mobj.group('uploader_id')
+
+ mobj = re.search(r'\'image\':\'(?P<thumbnail>[^\']+)\'', webpage)
+ if mobj is None:
+ raise ExtractorError(u'Unable to extract thumbnail URL')
+ video_thumbnail = mobj.group('thumbnail')
+
+ return [{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': video_extension,
+ 'title': video_title,
+ 'description': video_description,
+ 'upload_date': video_upload_date,
+ 'uploader_id': video_uploader_id,
+ 'thumbnail': video_thumbnail
+ }]