4002c9485d394b75d89be4f5a444d9f40c421a4d
[youtube-dl] / youtube_dl / InfoExtractors.py
1 import base64
2 import datetime
3 import itertools
4 import netrc
5 import os
6 import re
7 import socket
8 import time
9 import email.utils
10 import xml.etree.ElementTree
11 import random
12 import math
13 import operator
14 import hashlib
15 import binascii
16 import urllib
17
18 from .utils import *
19 from .extractor.common import InfoExtractor, SearchInfoExtractor
20
21 from .extractor.ard import ARDIE
22 from .extractor.arte import ArteTvIE
23 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
24 from .extractor.comedycentral import ComedyCentralIE
25 from .extractor.collegehumor import CollegeHumorIE
26 from .extractor.dailymotion import DailymotionIE
27 from .extractor.depositfiles import DepositFilesIE
28 from .extractor.eighttracks import EightTracksIE
29 from .extractor.escapist import EscapistIE
30 from .extractor.facebook import FacebookIE
31 from .extractor.funnyordie import FunnyOrDieIE
32 from .extractor.gametrailers import GametrailersIE
33 from .extractor.generic import GenericIE
34 from .extractor.googleplus import GooglePlusIE
35 from .extractor.googlesearch import GoogleSearchIE
36 from .extractor.infoq import InfoQIE
37 from .extractor.justintv import JustinTVIE
38 from .extractor.metacafe import MetacafeIE
39 from .extractor.mixcloud import MixcloudIE
40 from .extractor.mtv import MTVIE
41 from .extractor.myvideo import MyVideoIE
42 from .extractor.nba import NBAIE
43 from .extractor.statigram import StatigramIE
44 from .extractor.photobucket import PhotobucketIE
45 from .extractor.pornotube import PornotubeIE
46 from .extractor.rbmaradio import RBMARadioIE
47 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
48 from .extractor.stanfordoc import StanfordOpenClassroomIE
49 from .extractor.steam import SteamIE
50 from .extractor.ted import TEDIE
51 from .extractor.ustream import UstreamIE
52 from .extractor.vimeo import VimeoIE
53 from .extractor.worldstarhiphop import WorldStarHipHopIE
54 from .extractor.xnxx import XNXXIE
55 from .extractor.xvideos import XVideosIE
56 from .extractor.yahoo import YahooIE, YahooSearchIE
57 from .extractor.youjizz import YouJizzIE
58 from .extractor.youku import YoukuIE
59 from .extractor.youporn import YouPornIE
60 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
61 from .extractor.zdf import ZDFIE
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 class KeekIE(InfoExtractor):
89     _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
90     IE_NAME = u'keek'
91
92     def _real_extract(self, url):
93         m = re.match(self._VALID_URL, url)
94         video_id = m.group('videoID')
95
96         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
97         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
98         webpage = self._download_webpage(url, video_id)
99
100         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
101             webpage, u'title')
102
103         uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
104             webpage, u'uploader', fatal=False)
105
106         info = {
107                 'id': video_id,
108                 'url': video_url,
109                 'ext': 'mp4',
110                 'title': video_title,
111                 'thumbnail': thumbnail,
112                 'uploader': uploader
113         }
114         return [info]
115
116
117 class MySpassIE(InfoExtractor):
118     _VALID_URL = r'http://www.myspass.de/.*'
119
120     def _real_extract(self, url):
121         META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
122
123         # video id is the last path element of the URL
124         # usually there is a trailing slash, so also try the second but last
125         url_path = compat_urllib_parse_urlparse(url).path
126         url_parent_path, video_id = os.path.split(url_path)
127         if not video_id:
128             _, video_id = os.path.split(url_parent_path)
129
130         # get metadata
131         metadata_url = META_DATA_URL_TEMPLATE % video_id
132         metadata_text = self._download_webpage(metadata_url, video_id)
133         metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
134
135         # extract values from metadata
136         url_flv_el = metadata.find('url_flv')
137         if url_flv_el is None:
138             raise ExtractorError(u'Unable to extract download url')
139         video_url = url_flv_el.text
140         extension = os.path.splitext(video_url)[1][1:]
141         title_el = metadata.find('title')
142         if title_el is None:
143             raise ExtractorError(u'Unable to extract title')
144         title = title_el.text
145         format_id_el = metadata.find('format_id')
146         if format_id_el is None:
147             format = ext
148         else:
149             format = format_id_el.text
150         description_el = metadata.find('description')
151         if description_el is not None:
152             description = description_el.text
153         else:
154             description = None
155         imagePreview_el = metadata.find('imagePreview')
156         if imagePreview_el is not None:
157             thumbnail = imagePreview_el.text
158         else:
159             thumbnail = None
160         info = {
161             'id': video_id,
162             'url': video_url,
163             'title': title,
164             'ext': extension,
165             'format': format,
166             'thumbnail': thumbnail,
167             'description': description
168         }
169         return [info]
170
171 class SpiegelIE(InfoExtractor):
172     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
173
174     def _real_extract(self, url):
175         m = re.match(self._VALID_URL, url)
176         video_id = m.group('videoID')
177
178         webpage = self._download_webpage(url, video_id)
179
180         video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
181             webpage, u'title')
182
183         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
184         xml_code = self._download_webpage(xml_url, video_id,
185                     note=u'Downloading XML', errnote=u'Failed to download XML')
186
187         idoc = xml.etree.ElementTree.fromstring(xml_code)
188         last_type = idoc[-1]
189         filename = last_type.findall('./filename')[0].text
190         duration = float(last_type.findall('./duration')[0].text)
191
192         video_url = 'http://video2.spiegel.de/flash/' + filename
193         video_ext = filename.rpartition('.')[2]
194         info = {
195             'id': video_id,
196             'url': video_url,
197             'ext': video_ext,
198             'title': video_title,
199             'duration': duration,
200         }
201         return [info]
202
203 class LiveLeakIE(InfoExtractor):
204
205     _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
206     IE_NAME = u'liveleak'
207
208     def _real_extract(self, url):
209         mobj = re.match(self._VALID_URL, url)
210         if mobj is None:
211             raise ExtractorError(u'Invalid URL: %s' % url)
212
213         video_id = mobj.group('video_id')
214
215         webpage = self._download_webpage(url, video_id)
216
217         video_url = self._search_regex(r'file: "(.*?)",',
218             webpage, u'video URL')
219
220         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
221             webpage, u'title').replace('LiveLeak.com -', '').strip()
222
223         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
224             webpage, u'description', fatal=False)
225
226         video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
227             webpage, u'uploader', fatal=False)
228
229         info = {
230             'id':  video_id,
231             'url': video_url,
232             'ext': 'mp4',
233             'title': video_title,
234             'description': video_description,
235             'uploader': video_uploader
236         }
237
238         return [info]
239
240
241
242 class TumblrIE(InfoExtractor):
243     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
244
245     def _real_extract(self, url):
246         m_url = re.match(self._VALID_URL, url)
247         video_id = m_url.group('id')
248         blog = m_url.group('blog_name')
249
250         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
251         webpage = self._download_webpage(url, video_id)
252
253         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
254         video = re.search(re_video, webpage)
255         if video is None:
256            raise ExtractorError(u'Unable to extract video')
257         video_url = video.group('video_url')
258         ext = video.group('ext')
259
260         video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
261             webpage, u'thumbnail', fatal=False)  # We pick the first poster
262         if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
263
264         # The only place where you can get a title, it's not complete,
265         # but searching in other places doesn't work for all videos
266         video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
267             webpage, u'title', flags=re.DOTALL)
268
269         return [{'id': video_id,
270                  'url': video_url,
271                  'title': video_title,
272                  'thumbnail': video_thumbnail,
273                  'ext': ext
274                  }]
275
276 class BandcampIE(InfoExtractor):
277     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
278
279     def _real_extract(self, url):
280         mobj = re.match(self._VALID_URL, url)
281         title = mobj.group('title')
282         webpage = self._download_webpage(url, title)
283         # We get the link to the free download page
284         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
285         if m_download is None:
286             raise ExtractorError(u'No free songs found')
287
288         download_link = m_download.group(1)
289         id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', 
290                        webpage, re.MULTILINE|re.DOTALL).group('id')
291
292         download_webpage = self._download_webpage(download_link, id,
293                                                   'Downloading free downloads page')
294         # We get the dictionary of the track from some javascrip code
295         info = re.search(r'items: (.*?),$',
296                          download_webpage, re.MULTILINE).group(1)
297         info = json.loads(info)[0]
298         # We pick mp3-320 for now, until format selection can be easily implemented.
299         mp3_info = info[u'downloads'][u'mp3-320']
300         # If we try to use this url it says the link has expired
301         initial_url = mp3_info[u'url']
302         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
303         m_url = re.match(re_url, initial_url)
304         #We build the url we will use to get the final track url
305         # This url is build in Bandcamp in the script download_bunde_*.js
306         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
307         final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
308         # If we could correctly generate the .rand field the url would be
309         #in the "download_url" key
310         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
311
312         track_info = {'id':id,
313                       'title' : info[u'title'],
314                       'ext' :   'mp3',
315                       'url' :   final_url,
316                       'thumbnail' : info[u'thumb_url'],
317                       'uploader' :  info[u'artist']
318                       }
319
320         return [track_info]
321
322 class RedTubeIE(InfoExtractor):
323     """Information Extractor for redtube"""
324     _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
325
326     def _real_extract(self,url):
327         mobj = re.match(self._VALID_URL, url)
328         if mobj is None:
329             raise ExtractorError(u'Invalid URL: %s' % url)
330
331         video_id = mobj.group('id')
332         video_extension = 'mp4'        
333         webpage = self._download_webpage(url, video_id)
334
335         self.report_extraction(video_id)
336
337         video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
338             webpage, u'video URL')
339
340         video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
341             webpage, u'title')
342
343         return [{
344             'id':       video_id,
345             'url':      video_url,
346             'ext':      video_extension,
347             'title':    video_title,
348         }]
349         
350 class InaIE(InfoExtractor):
351     """Information Extractor for Ina.fr"""
352     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
353
354     def _real_extract(self,url):
355         mobj = re.match(self._VALID_URL, url)
356
357         video_id = mobj.group('id')
358         mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
359         video_extension = 'mp4'
360         webpage = self._download_webpage(mrss_url, video_id)
361
362         self.report_extraction(video_id)
363
364         video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
365             webpage, u'video URL')
366
367         video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
368             webpage, u'title')
369
370         return [{
371             'id':       video_id,
372             'url':      video_url,
373             'ext':      video_extension,
374             'title':    video_title,
375         }]
376
377 class HowcastIE(InfoExtractor):
378     """Information Extractor for Howcast.com"""
379     _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
380
381     def _real_extract(self, url):
382         mobj = re.match(self._VALID_URL, url)
383
384         video_id = mobj.group('id')
385         webpage_url = 'http://www.howcast.com/videos/' + video_id
386         webpage = self._download_webpage(webpage_url, video_id)
387
388         self.report_extraction(video_id)
389
390         video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
391             webpage, u'video URL')
392
393         video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
394             webpage, u'title')
395
396         video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
397             webpage, u'description', fatal=False)
398
399         thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
400             webpage, u'thumbnail', fatal=False)
401
402         return [{
403             'id':       video_id,
404             'url':      video_url,
405             'ext':      'mp4',
406             'title':    video_title,
407             'description': video_description,
408             'thumbnail': thumbnail,
409         }]
410
411 class VineIE(InfoExtractor):
412     """Information Extractor for Vine.co"""
413     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
414
415     def _real_extract(self, url):
416         mobj = re.match(self._VALID_URL, url)
417
418         video_id = mobj.group('id')
419         webpage_url = 'https://vine.co/v/' + video_id
420         webpage = self._download_webpage(webpage_url, video_id)
421
422         self.report_extraction(video_id)
423
424         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
425             webpage, u'video URL')
426
427         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
428             webpage, u'title')
429
430         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
431             webpage, u'thumbnail', fatal=False)
432
433         uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
434             webpage, u'uploader', fatal=False, flags=re.DOTALL)
435
436         return [{
437             'id':        video_id,
438             'url':       video_url,
439             'ext':       'mp4',
440             'title':     video_title,
441             'thumbnail': thumbnail,
442             'uploader':  uploader,
443         }]
444
445 class FlickrIE(InfoExtractor):
446     """Information Extractor for Flickr videos"""
447     _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
448
449     def _real_extract(self, url):
450         mobj = re.match(self._VALID_URL, url)
451
452         video_id = mobj.group('id')
453         video_uploader_id = mobj.group('uploader_id')
454         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
455         webpage = self._download_webpage(webpage_url, video_id)
456
457         secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
458
459         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
460         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
461
462         node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
463             first_xml, u'node_id')
464
465         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
466         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
467
468         self.report_extraction(video_id)
469
470         mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
471         if mobj is None:
472             raise ExtractorError(u'Unable to extract video url')
473         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
474
475         video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
476             webpage, u'video title')
477
478         video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
479             webpage, u'description', fatal=False)
480
481         thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
482             webpage, u'thumbnail', fatal=False)
483
484         return [{
485             'id':          video_id,
486             'url':         video_url,
487             'ext':         'mp4',
488             'title':       video_title,
489             'description': video_description,
490             'thumbnail':   thumbnail,
491             'uploader_id': video_uploader_id,
492         }]
493
494 class TeamcocoIE(InfoExtractor):
495     _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
496
497     def _real_extract(self, url):
498         mobj = re.match(self._VALID_URL, url)
499         if mobj is None:
500             raise ExtractorError(u'Invalid URL: %s' % url)
501         url_title = mobj.group('url_title')
502         webpage = self._download_webpage(url, url_title)
503
504         video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
505             webpage, u'video id')
506
507         self.report_extraction(video_id)
508
509         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
510             webpage, u'title')
511
512         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
513             webpage, u'thumbnail', fatal=False)
514
515         video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
516             webpage, u'description', fatal=False)
517
518         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
519         data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
520
521         video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
522             data, u'video URL')
523
524         return [{
525             'id':          video_id,
526             'url':         video_url,
527             'ext':         'mp4',
528             'title':       video_title,
529             'thumbnail':   thumbnail,
530             'description': video_description,
531         }]
532
533 class XHamsterIE(InfoExtractor):
534     """Information Extractor for xHamster"""
535     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
536
537     def _real_extract(self,url):
538         mobj = re.match(self._VALID_URL, url)
539
540         video_id = mobj.group('id')
541         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
542         webpage = self._download_webpage(mrss_url, video_id)
543
544         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
545         if mobj is None:
546             raise ExtractorError(u'Unable to extract media URL')
547         if len(mobj.group('server')) == 0:
548             video_url = compat_urllib_parse.unquote(mobj.group('file'))
549         else:
550             video_url = mobj.group('server')+'/key='+mobj.group('file')
551         video_extension = video_url.split('.')[-1]
552
553         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
554             webpage, u'title')
555
556         # Can't see the description anywhere in the UI
557         # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
558         #     webpage, u'description', fatal=False)
559         # if video_description: video_description = unescapeHTML(video_description)
560
561         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
562         if mobj:
563             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
564         else:
565             video_upload_date = None
566             self._downloader.report_warning(u'Unable to extract upload date')
567
568         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
569             webpage, u'uploader id', default=u'anonymous')
570
571         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
572             webpage, u'thumbnail', fatal=False)
573
574         return [{
575             'id':       video_id,
576             'url':      video_url,
577             'ext':      video_extension,
578             'title':    video_title,
579             # 'description': video_description,
580             'upload_date': video_upload_date,
581             'uploader_id': video_uploader_id,
582             'thumbnail': video_thumbnail
583         }]
584
585 class HypemIE(InfoExtractor):
586     """Information Extractor for hypem"""
587     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
588
589     def _real_extract(self, url):
590         mobj = re.match(self._VALID_URL, url)
591         if mobj is None:
592             raise ExtractorError(u'Invalid URL: %s' % url)
593         track_id = mobj.group(1)
594
595         data = { 'ax': 1, 'ts': time.time() }
596         data_encoded = compat_urllib_parse.urlencode(data)
597         complete_url = url + "?" + data_encoded
598         request = compat_urllib_request.Request(complete_url)
599         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
600         cookie = urlh.headers.get('Set-Cookie', '')
601
602         self.report_extraction(track_id)
603
604         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
605             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
606         try:
607             track_list = json.loads(html_tracks)
608             track = track_list[u'tracks'][0]
609         except ValueError:
610             raise ExtractorError(u'Hypemachine contained invalid JSON.')
611
612         key = track[u"key"]
613         track_id = track[u"id"]
614         artist = track[u"artist"]
615         title = track[u"song"]
616
617         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
618         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
619         request.add_header('cookie', cookie)
620         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
621         try:
622             song_data = json.loads(song_data_json)
623         except ValueError:
624             raise ExtractorError(u'Hypemachine contained invalid JSON.')
625         final_url = song_data[u"url"]
626
627         return [{
628             'id':       track_id,
629             'url':      final_url,
630             'ext':      "mp3",
631             'title':    title,
632             'artist':   artist,
633         }]
634
635 class Vbox7IE(InfoExtractor):
636     """Information Extractor for Vbox7"""
637     _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
638
639     def _real_extract(self,url):
640         mobj = re.match(self._VALID_URL, url)
641         if mobj is None:
642             raise ExtractorError(u'Invalid URL: %s' % url)
643         video_id = mobj.group(1)
644
645         redirect_page, urlh = self._download_webpage_handle(url, video_id)
646         new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
647         redirect_url = urlh.geturl() + new_location
648         webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
649
650         title = self._html_search_regex(r'<title>(.*)</title>',
651             webpage, u'title').split('/')[0].strip()
652
653         ext = "flv"
654         info_url = "http://vbox7.com/play/magare.do"
655         data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
656         info_request = compat_urllib_request.Request(info_url, data)
657         info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
658         info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
659         if info_response is None:
660             raise ExtractorError(u'Unable to extract the media url')
661         (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
662
663         return [{
664             'id':        video_id,
665             'url':       final_url,
666             'ext':       ext,
667             'title':     title,
668             'thumbnail': thumbnail_url,
669         }]
670
671
672 def gen_extractors():
673     """ Return a list of an instance of every supported extractor.
674     The order does matter; the first extractor matched is the one handling the URL.
675     """
676     return [
677         YoutubePlaylistIE(),
678         YoutubeChannelIE(),
679         YoutubeUserIE(),
680         YoutubeSearchIE(),
681         YoutubeIE(),
682         MetacafeIE(),
683         DailymotionIE(),
684         GoogleSearchIE(),
685         PhotobucketIE(),
686         YahooIE(),
687         YahooSearchIE(),
688         DepositFilesIE(),
689         FacebookIE(),
690         BlipTVIE(),
691         BlipTVUserIE(),
692         VimeoIE(),
693         MyVideoIE(),
694         ComedyCentralIE(),
695         EscapistIE(),
696         CollegeHumorIE(),
697         XVideosIE(),
698         SoundcloudSetIE(),
699         SoundcloudIE(),
700         InfoQIE(),
701         MixcloudIE(),
702         StanfordOpenClassroomIE(),
703         MTVIE(),
704         YoukuIE(),
705         XNXXIE(),
706         YouJizzIE(),
707         PornotubeIE(),
708         YouPornIE(),
709         GooglePlusIE(),
710         ArteTvIE(),
711         NBAIE(),
712         WorldStarHipHopIE(),
713         JustinTVIE(),
714         FunnyOrDieIE(),
715         SteamIE(),
716         UstreamIE(),
717         RBMARadioIE(),
718         EightTracksIE(),
719         KeekIE(),
720         TEDIE(),
721         MySpassIE(),
722         SpiegelIE(),
723         LiveLeakIE(),
724         ARDIE(),
725         ZDFIE(),
726         TumblrIE(),
727         BandcampIE(),
728         RedTubeIE(),
729         InaIE(),
730         HowcastIE(),
731         VineIE(),
732         FlickrIE(),
733         TeamcocoIE(),
734         XHamsterIE(),
735         HypemIE(),
736         Vbox7IE(),
737         GametrailersIE(),
738         StatigramIE(),
739         GenericIE()
740     ]
741
742 def get_info_extractor(ie_name):
743     """Returns the info extractor class with the given ie_name"""
744     return globals()[ie_name+'IE']