10 import xml.etree.ElementTree
19 from .extractor.common import InfoExtractor, SearchInfoExtractor
21 from .extractor.ard import ARDIE
22 from .extractor.arte import ArteTvIE
23 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
24 from .extractor.comedycentral import ComedyCentralIE
25 from .extractor.collegehumor import CollegeHumorIE
26 from .extractor.dailymotion import DailymotionIE
27 from .extractor.depositfiles import DepositFilesIE
28 from .extractor.escapist import EscapistIE
29 from .extractor.facebook import FacebookIE
30 from .extractor.funnyordie import FunnyOrDieIE
31 from .extractor.gametrailers import GametrailersIE
32 from .extractor.generic import GenericIE
33 from .extractor.googleplus import GooglePlusIE
34 from .extractor.googlesearch import GoogleSearchIE
35 from .extractor.infoq import InfoQIE
36 from .extractor.justintv import JustinTVIE
37 from .extractor.metacafe import MetacafeIE
38 from .extractor.mixcloud import MixcloudIE
39 from .extractor.mtv import MTVIE
40 from .extractor.myvideo import MyVideoIE
41 from .extractor.nba import NBAIE
42 from .extractor.statigram import StatigramIE
43 from .extractor.photobucket import PhotobucketIE
44 from .extractor.rbmaradio import RBMARadioIE
45 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
46 from .extractor.stanfordoc import StanfordOpenClassroomIE
47 from .extractor.steam import SteamIE
48 from .extractor.ted import TEDIE
49 from .extractor.ustream import UstreamIE
50 from .extractor.vimeo import VimeoIE
51 from .extractor.worldstarhiphop import WorldStarHipHopIE
52 from .extractor.xnxx import XNXXIE
53 from .extractor.xvideos import XVideosIE
54 from .extractor.yahoo import YahooIE, YahooSearchIE
55 from .extractor.youku import YoukuIE
56 from .extractor.youporn import YouPornIE
57 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
58 from .extractor.zdf import ZDFIE
82 class PornotubeIE(InfoExtractor):
83 """Information extractor for pornotube.com."""
84 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
86 def _real_extract(self, url):
87 mobj = re.match(self._VALID_URL, url)
89 raise ExtractorError(u'Invalid URL: %s' % url)
91 video_id = mobj.group('videoid')
92 video_title = mobj.group('title')
95 webpage = self._download_webpage(url, video_id)
98 VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
99 video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
100 video_url = compat_urllib_parse.unquote(video_url)
102 #Get the uploaded date
103 VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
104 upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
105 if upload_date: upload_date = unified_strdate(upload_date)
107 info = {'id': video_id,
110 'upload_date': upload_date,
111 'title': video_title,
117 class YouJizzIE(InfoExtractor):
118 """Information extractor for youjizz.com."""
119 _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
121 def _real_extract(self, url):
122 mobj = re.match(self._VALID_URL, url)
124 raise ExtractorError(u'Invalid URL: %s' % url)
126 video_id = mobj.group('videoid')
128 # Get webpage content
129 webpage = self._download_webpage(url, video_id)
131 # Get the video title
132 video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
133 webpage, u'title').strip()
136 result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
138 raise ExtractorError(u'ERROR: unable to extract embed page')
140 embed_page_url = result.group(0).strip()
141 video_id = result.group('videoid')
143 webpage = self._download_webpage(embed_page_url, video_id)
146 video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
147 webpage, u'video URL')
149 info = {'id': video_id,
151 'title': video_title,
154 'player_url': embed_page_url}
158 class EightTracksIE(InfoExtractor):
160 _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
162 def _real_extract(self, url):
163 mobj = re.match(self._VALID_URL, url)
165 raise ExtractorError(u'Invalid URL: %s' % url)
166 playlist_id = mobj.group('id')
168 webpage = self._download_webpage(url, playlist_id)
170 json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
171 data = json.loads(json_like)
173 session = str(random.randint(0, 1000000000))
175 track_count = data['tracks_count']
176 first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
179 for i in itertools.count():
180 api_json = self._download_webpage(next_url, playlist_id,
181 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
182 errnote=u'Failed to download song information')
183 api_data = json.loads(api_json)
184 track_data = api_data[u'set']['track']
186 'id': track_data['id'],
187 'url': track_data['track_file_stream_url'],
188 'title': track_data['performer'] + u' - ' + track_data['name'],
189 'raw_title': track_data['name'],
190 'uploader_id': data['user']['login'],
194 if api_data['set']['at_last_track']:
196 next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
199 class KeekIE(InfoExtractor):
200 _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
203 def _real_extract(self, url):
204 m = re.match(self._VALID_URL, url)
205 video_id = m.group('videoID')
207 video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
208 thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
209 webpage = self._download_webpage(url, video_id)
211 video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
214 uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
215 webpage, u'uploader', fatal=False)
221 'title': video_title,
222 'thumbnail': thumbnail,
228 class MySpassIE(InfoExtractor):
229 _VALID_URL = r'http://www.myspass.de/.*'
231 def _real_extract(self, url):
232 META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
234 # video id is the last path element of the URL
235 # usually there is a trailing slash, so also try the second but last
236 url_path = compat_urllib_parse_urlparse(url).path
237 url_parent_path, video_id = os.path.split(url_path)
239 _, video_id = os.path.split(url_parent_path)
242 metadata_url = META_DATA_URL_TEMPLATE % video_id
243 metadata_text = self._download_webpage(metadata_url, video_id)
244 metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
246 # extract values from metadata
247 url_flv_el = metadata.find('url_flv')
248 if url_flv_el is None:
249 raise ExtractorError(u'Unable to extract download url')
250 video_url = url_flv_el.text
251 extension = os.path.splitext(video_url)[1][1:]
252 title_el = metadata.find('title')
254 raise ExtractorError(u'Unable to extract title')
255 title = title_el.text
256 format_id_el = metadata.find('format_id')
257 if format_id_el is None:
260 format = format_id_el.text
261 description_el = metadata.find('description')
262 if description_el is not None:
263 description = description_el.text
266 imagePreview_el = metadata.find('imagePreview')
267 if imagePreview_el is not None:
268 thumbnail = imagePreview_el.text
277 'thumbnail': thumbnail,
278 'description': description
282 class SpiegelIE(InfoExtractor):
283 _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
285 def _real_extract(self, url):
286 m = re.match(self._VALID_URL, url)
287 video_id = m.group('videoID')
289 webpage = self._download_webpage(url, video_id)
291 video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
294 xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
295 xml_code = self._download_webpage(xml_url, video_id,
296 note=u'Downloading XML', errnote=u'Failed to download XML')
298 idoc = xml.etree.ElementTree.fromstring(xml_code)
300 filename = last_type.findall('./filename')[0].text
301 duration = float(last_type.findall('./duration')[0].text)
303 video_url = 'http://video2.spiegel.de/flash/' + filename
304 video_ext = filename.rpartition('.')[2]
309 'title': video_title,
310 'duration': duration,
314 class LiveLeakIE(InfoExtractor):
316 _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
317 IE_NAME = u'liveleak'
319 def _real_extract(self, url):
320 mobj = re.match(self._VALID_URL, url)
322 raise ExtractorError(u'Invalid URL: %s' % url)
324 video_id = mobj.group('video_id')
326 webpage = self._download_webpage(url, video_id)
328 video_url = self._search_regex(r'file: "(.*?)",',
329 webpage, u'video URL')
331 video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
332 webpage, u'title').replace('LiveLeak.com -', '').strip()
334 video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
335 webpage, u'description', fatal=False)
337 video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
338 webpage, u'uploader', fatal=False)
344 'title': video_title,
345 'description': video_description,
346 'uploader': video_uploader
353 class TumblrIE(InfoExtractor):
354 _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
356 def _real_extract(self, url):
357 m_url = re.match(self._VALID_URL, url)
358 video_id = m_url.group('id')
359 blog = m_url.group('blog_name')
361 url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
362 webpage = self._download_webpage(url, video_id)
364 re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
365 video = re.search(re_video, webpage)
367 raise ExtractorError(u'Unable to extract video')
368 video_url = video.group('video_url')
369 ext = video.group('ext')
371 video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
372 webpage, u'thumbnail', fatal=False) # We pick the first poster
373 if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
375 # The only place where you can get a title, it's not complete,
376 # but searching in other places doesn't work for all videos
377 video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
378 webpage, u'title', flags=re.DOTALL)
380 return [{'id': video_id,
382 'title': video_title,
383 'thumbnail': video_thumbnail,
387 class BandcampIE(InfoExtractor):
388 _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
390 def _real_extract(self, url):
391 mobj = re.match(self._VALID_URL, url)
392 title = mobj.group('title')
393 webpage = self._download_webpage(url, title)
394 # We get the link to the free download page
395 m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
396 if m_download is None:
397 raise ExtractorError(u'No free songs found')
399 download_link = m_download.group(1)
400 id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
401 webpage, re.MULTILINE|re.DOTALL).group('id')
403 download_webpage = self._download_webpage(download_link, id,
404 'Downloading free downloads page')
405 # We get the dictionary of the track from some javascrip code
406 info = re.search(r'items: (.*?),$',
407 download_webpage, re.MULTILINE).group(1)
408 info = json.loads(info)[0]
409 # We pick mp3-320 for now, until format selection can be easily implemented.
410 mp3_info = info[u'downloads'][u'mp3-320']
411 # If we try to use this url it says the link has expired
412 initial_url = mp3_info[u'url']
413 re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
414 m_url = re.match(re_url, initial_url)
415 #We build the url we will use to get the final track url
416 # This url is build in Bandcamp in the script download_bunde_*.js
417 request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
418 final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
419 # If we could correctly generate the .rand field the url would be
420 #in the "download_url" key
421 final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
423 track_info = {'id':id,
424 'title' : info[u'title'],
427 'thumbnail' : info[u'thumb_url'],
428 'uploader' : info[u'artist']
433 class RedTubeIE(InfoExtractor):
434 """Information Extractor for redtube"""
435 _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
437 def _real_extract(self,url):
438 mobj = re.match(self._VALID_URL, url)
440 raise ExtractorError(u'Invalid URL: %s' % url)
442 video_id = mobj.group('id')
443 video_extension = 'mp4'
444 webpage = self._download_webpage(url, video_id)
446 self.report_extraction(video_id)
448 video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
449 webpage, u'video URL')
451 video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
457 'ext': video_extension,
458 'title': video_title,
461 class InaIE(InfoExtractor):
462 """Information Extractor for Ina.fr"""
463 _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
465 def _real_extract(self,url):
466 mobj = re.match(self._VALID_URL, url)
468 video_id = mobj.group('id')
469 mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
470 video_extension = 'mp4'
471 webpage = self._download_webpage(mrss_url, video_id)
473 self.report_extraction(video_id)
475 video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
476 webpage, u'video URL')
478 video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
484 'ext': video_extension,
485 'title': video_title,
488 class HowcastIE(InfoExtractor):
489 """Information Extractor for Howcast.com"""
490 _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
492 def _real_extract(self, url):
493 mobj = re.match(self._VALID_URL, url)
495 video_id = mobj.group('id')
496 webpage_url = 'http://www.howcast.com/videos/' + video_id
497 webpage = self._download_webpage(webpage_url, video_id)
499 self.report_extraction(video_id)
501 video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
502 webpage, u'video URL')
504 video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
507 video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
508 webpage, u'description', fatal=False)
510 thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
511 webpage, u'thumbnail', fatal=False)
517 'title': video_title,
518 'description': video_description,
519 'thumbnail': thumbnail,
522 class VineIE(InfoExtractor):
523 """Information Extractor for Vine.co"""
524 _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
526 def _real_extract(self, url):
527 mobj = re.match(self._VALID_URL, url)
529 video_id = mobj.group('id')
530 webpage_url = 'https://vine.co/v/' + video_id
531 webpage = self._download_webpage(webpage_url, video_id)
533 self.report_extraction(video_id)
535 video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
536 webpage, u'video URL')
538 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
541 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
542 webpage, u'thumbnail', fatal=False)
544 uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
545 webpage, u'uploader', fatal=False, flags=re.DOTALL)
551 'title': video_title,
552 'thumbnail': thumbnail,
553 'uploader': uploader,
556 class FlickrIE(InfoExtractor):
557 """Information Extractor for Flickr videos"""
558 _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
560 def _real_extract(self, url):
561 mobj = re.match(self._VALID_URL, url)
563 video_id = mobj.group('id')
564 video_uploader_id = mobj.group('uploader_id')
565 webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
566 webpage = self._download_webpage(webpage_url, video_id)
568 secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
570 first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
571 first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
573 node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
574 first_xml, u'node_id')
576 second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
577 second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
579 self.report_extraction(video_id)
581 mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
583 raise ExtractorError(u'Unable to extract video url')
584 video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
586 video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
587 webpage, u'video title')
589 video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
590 webpage, u'description', fatal=False)
592 thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
593 webpage, u'thumbnail', fatal=False)
599 'title': video_title,
600 'description': video_description,
601 'thumbnail': thumbnail,
602 'uploader_id': video_uploader_id,
605 class TeamcocoIE(InfoExtractor):
606 _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
608 def _real_extract(self, url):
609 mobj = re.match(self._VALID_URL, url)
611 raise ExtractorError(u'Invalid URL: %s' % url)
612 url_title = mobj.group('url_title')
613 webpage = self._download_webpage(url, url_title)
615 video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
616 webpage, u'video id')
618 self.report_extraction(video_id)
620 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
623 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
624 webpage, u'thumbnail', fatal=False)
626 video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
627 webpage, u'description', fatal=False)
629 data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
630 data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
632 video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
639 'title': video_title,
640 'thumbnail': thumbnail,
641 'description': video_description,
644 class XHamsterIE(InfoExtractor):
645 """Information Extractor for xHamster"""
646 _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
648 def _real_extract(self,url):
649 mobj = re.match(self._VALID_URL, url)
651 video_id = mobj.group('id')
652 mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
653 webpage = self._download_webpage(mrss_url, video_id)
655 mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
657 raise ExtractorError(u'Unable to extract media URL')
658 if len(mobj.group('server')) == 0:
659 video_url = compat_urllib_parse.unquote(mobj.group('file'))
661 video_url = mobj.group('server')+'/key='+mobj.group('file')
662 video_extension = video_url.split('.')[-1]
664 video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
667 # Can't see the description anywhere in the UI
668 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
669 # webpage, u'description', fatal=False)
670 # if video_description: video_description = unescapeHTML(video_description)
672 mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
674 video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
676 video_upload_date = None
677 self._downloader.report_warning(u'Unable to extract upload date')
679 video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
680 webpage, u'uploader id', default=u'anonymous')
682 video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
683 webpage, u'thumbnail', fatal=False)
688 'ext': video_extension,
689 'title': video_title,
690 # 'description': video_description,
691 'upload_date': video_upload_date,
692 'uploader_id': video_uploader_id,
693 'thumbnail': video_thumbnail
696 class HypemIE(InfoExtractor):
697 """Information Extractor for hypem"""
698 _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
700 def _real_extract(self, url):
701 mobj = re.match(self._VALID_URL, url)
703 raise ExtractorError(u'Invalid URL: %s' % url)
704 track_id = mobj.group(1)
706 data = { 'ax': 1, 'ts': time.time() }
707 data_encoded = compat_urllib_parse.urlencode(data)
708 complete_url = url + "?" + data_encoded
709 request = compat_urllib_request.Request(complete_url)
710 response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
711 cookie = urlh.headers.get('Set-Cookie', '')
713 self.report_extraction(track_id)
715 html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
716 response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
718 track_list = json.loads(html_tracks)
719 track = track_list[u'tracks'][0]
721 raise ExtractorError(u'Hypemachine contained invalid JSON.')
724 track_id = track[u"id"]
725 artist = track[u"artist"]
726 title = track[u"song"]
728 serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
729 request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
730 request.add_header('cookie', cookie)
731 song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
733 song_data = json.loads(song_data_json)
735 raise ExtractorError(u'Hypemachine contained invalid JSON.')
736 final_url = song_data[u"url"]
746 class Vbox7IE(InfoExtractor):
747 """Information Extractor for Vbox7"""
748 _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
750 def _real_extract(self,url):
751 mobj = re.match(self._VALID_URL, url)
753 raise ExtractorError(u'Invalid URL: %s' % url)
754 video_id = mobj.group(1)
756 redirect_page, urlh = self._download_webpage_handle(url, video_id)
757 new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
758 redirect_url = urlh.geturl() + new_location
759 webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
761 title = self._html_search_regex(r'<title>(.*)</title>',
762 webpage, u'title').split('/')[0].strip()
765 info_url = "http://vbox7.com/play/magare.do"
766 data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
767 info_request = compat_urllib_request.Request(info_url, data)
768 info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
769 info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
770 if info_response is None:
771 raise ExtractorError(u'Unable to extract the media url')
772 (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
779 'thumbnail': thumbnail_url,
783 def gen_extractors():
784 """ Return a list of an instance of every supported extractor.
785 The order does matter; the first extractor matched is the one handling the URL.
813 StanfordOpenClassroomIE(),
853 def get_info_extractor(ie_name):
854 """Returns the info extractor class with the given ie_name"""
855 return globals()[ie_name+'IE']