_ Git - youtube-dl/blob - youtube_dl/InfoExtractors.py

   1 import base64
   2 import datetime
   3 import itertools
   4 import netrc
   5 import os
   6 import re
   7 import socket
   8 import time
   9 import email.utils
  10 import xml.etree.ElementTree
  11 import random
  12 import math
  13 import operator
  14 import hashlib
  15 import binascii
  16 import urllib
  17
  18 from .utils import *
  19 from .extractor.common import InfoExtractor, SearchInfoExtractor
  20
  21 from .extractor.ard import ARDIE
  22 from .extractor.arte import ArteTvIE
  23 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
  24 from .extractor.comedycentral import ComedyCentralIE
  25 from .extractor.collegehumor import CollegeHumorIE
  26 from .extractor.dailymotion import DailymotionIE
  27 from .extractor.depositfiles import DepositFilesIE
  28 from .extractor.eighttracks import EightTracksIE
  29 from .extractor.escapist import EscapistIE
  30 from .extractor.facebook import FacebookIE
  31 from .extractor.funnyordie import FunnyOrDieIE
  32 from .extractor.gametrailers import GametrailersIE
  33 from .extractor.generic import GenericIE
  34 from .extractor.googleplus import GooglePlusIE
  35 from .extractor.googlesearch import GoogleSearchIE
  36 from .extractor.infoq import InfoQIE
  37 from .extractor.justintv import JustinTVIE
  38 from .extractor.keek import KeekIE
  39 from .extractor.metacafe import MetacafeIE
  40 from .extractor.mixcloud import MixcloudIE
  41 from .extractor.mtv import MTVIE
  42 from .extractor.myspass import MySpassIE
  43 from .extractor.myvideo import MyVideoIE
  44 from .extractor.nba import NBAIE
  45 from .extractor.statigram import StatigramIE
  46 from .extractor.photobucket import PhotobucketIE
  47 from .extractor.pornotube import PornotubeIE
  48 from .extractor.rbmaradio import RBMARadioIE
  49 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
  50 from .extractor.stanfordoc import StanfordOpenClassroomIE
  51 from .extractor.steam import SteamIE
  52 from .extractor.ted import TEDIE
  53 from .extractor.ustream import UstreamIE
  54 from .extractor.vimeo import VimeoIE
  55 from .extractor.worldstarhiphop import WorldStarHipHopIE
  56 from .extractor.xnxx import XNXXIE
  57 from .extractor.xvideos import XVideosIE
  58 from .extractor.yahoo import YahooIE, YahooSearchIE
  59 from .extractor.youjizz import YouJizzIE
  60 from .extractor.youku import YoukuIE
  61 from .extractor.youporn import YouPornIE
  62 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
  63 from .extractor.zdf import ZDFIE
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93 class SpiegelIE(InfoExtractor):
  94     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
  95
  96     def _real_extract(self, url):
  97         m = re.match(self._VALID_URL, url)
  98         video_id = m.group('videoID')
  99
 100         webpage = self._download_webpage(url, video_id)
 101
 102         video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
 103             webpage, u'title')
 104
 105         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
 106         xml_code = self._download_webpage(xml_url, video_id,
 107                     note=u'Downloading XML', errnote=u'Failed to download XML')
 108
 109         idoc = xml.etree.ElementTree.fromstring(xml_code)
 110         last_type = idoc[-1]
 111         filename = last_type.findall('./filename')[0].text
 112         duration = float(last_type.findall('./duration')[0].text)
 113
 114         video_url = 'http://video2.spiegel.de/flash/' + filename
 115         video_ext = filename.rpartition('.')[2]
 116         info = {
 117             'id': video_id,
 118             'url': video_url,
 119             'ext': video_ext,
 120             'title': video_title,
 121             'duration': duration,
 122         }
 123         return [info]
 124
 125 class LiveLeakIE(InfoExtractor):
 126
 127     _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
 128     IE_NAME = u'liveleak'
 129
 130     def _real_extract(self, url):
 131         mobj = re.match(self._VALID_URL, url)
 132         if mobj is None:
 133             raise ExtractorError(u'Invalid URL: %s' % url)
 134
 135         video_id = mobj.group('video_id')
 136
 137         webpage = self._download_webpage(url, video_id)
 138
 139         video_url = self._search_regex(r'file: "(.*?)",',
 140             webpage, u'video URL')
 141
 142         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
 143             webpage, u'title').replace('LiveLeak.com -', '').strip()
 144
 145         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
 146             webpage, u'description', fatal=False)
 147
 148         video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
 149             webpage, u'uploader', fatal=False)
 150
 151         info = {
 152             'id':  video_id,
 153             'url': video_url,
 154             'ext': 'mp4',
 155             'title': video_title,
 156             'description': video_description,
 157             'uploader': video_uploader
 158         }
 159
 160         return [info]
 161
 162
 163
 164 class TumblrIE(InfoExtractor):
 165     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
 166
 167     def _real_extract(self, url):
 168         m_url = re.match(self._VALID_URL, url)
 169         video_id = m_url.group('id')
 170         blog = m_url.group('blog_name')
 171
 172         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
 173         webpage = self._download_webpage(url, video_id)
 174
 175         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
 176         video = re.search(re_video, webpage)
 177         if video is None:
 178            raise ExtractorError(u'Unable to extract video')
 179         video_url = video.group('video_url')
 180         ext = video.group('ext')
 181
 182         video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
 183             webpage, u'thumbnail', fatal=False)  # We pick the first poster
 184         if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
 185
 186         # The only place where you can get a title, it's not complete,
 187         # but searching in other places doesn't work for all videos
 188         video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
 189             webpage, u'title', flags=re.DOTALL)
 190
 191         return [{'id': video_id,
 192                  'url': video_url,
 193                  'title': video_title,
 194                  'thumbnail': video_thumbnail,
 195                  'ext': ext
 196                  }]
 197
 198 class BandcampIE(InfoExtractor):
 199     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
 200
 201     def _real_extract(self, url):
 202         mobj = re.match(self._VALID_URL, url)
 203         title = mobj.group('title')
 204         webpage = self._download_webpage(url, title)
 205         # We get the link to the free download page
 206         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
 207         if m_download is None:
 208             raise ExtractorError(u'No free songs found')
 209
 210         download_link = m_download.group(1)
 211         id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
 212                        webpage, re.MULTILINE|re.DOTALL).group('id')
 213
 214         download_webpage = self._download_webpage(download_link, id,
 215                                                   'Downloading free downloads page')
 216         # We get the dictionary of the track from some javascrip code
 217         info = re.search(r'items: (.*?),$',
 218                          download_webpage, re.MULTILINE).group(1)
 219         info = json.loads(info)[0]
 220         # We pick mp3-320 for now, until format selection can be easily implemented.
 221         mp3_info = info[u'downloads'][u'mp3-320']
 222         # If we try to use this url it says the link has expired
 223         initial_url = mp3_info[u'url']
 224         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
 225         m_url = re.match(re_url, initial_url)
 226         #We build the url we will use to get the final track url
 227         # This url is build in Bandcamp in the script download_bunde_*.js
 228         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
 229         final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
 230         # If we could correctly generate the .rand field the url would be
 231         #in the "download_url" key
 232         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
 233
 234         track_info = {'id':id,
 235                       'title' : info[u'title'],
 236                       'ext' :   'mp3',
 237                       'url' :   final_url,
 238                       'thumbnail' : info[u'thumb_url'],
 239                       'uploader' :  info[u'artist']
 240                       }
 241
 242         return [track_info]
 243
 244 class RedTubeIE(InfoExtractor):
 245     """Information Extractor for redtube"""
 246     _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
 247
 248     def _real_extract(self,url):
 249         mobj = re.match(self._VALID_URL, url)
 250         if mobj is None:
 251             raise ExtractorError(u'Invalid URL: %s' % url)
 252
 253         video_id = mobj.group('id')
 254         video_extension = 'mp4'
 255         webpage = self._download_webpage(url, video_id)
 256
 257         self.report_extraction(video_id)
 258
 259         video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
 260             webpage, u'video URL')
 261
 262         video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
 263             webpage, u'title')
 264
 265         return [{
 266             'id':       video_id,
 267             'url':      video_url,
 268             'ext':      video_extension,
 269             'title':    video_title,
 270         }]
 271
 272 class InaIE(InfoExtractor):
 273     """Information Extractor for Ina.fr"""
 274     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
 275
 276     def _real_extract(self,url):
 277         mobj = re.match(self._VALID_URL, url)
 278
 279         video_id = mobj.group('id')
 280         mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
 281         video_extension = 'mp4'
 282         webpage = self._download_webpage(mrss_url, video_id)
 283
 284         self.report_extraction(video_id)
 285
 286         video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
 287             webpage, u'video URL')
 288
 289         video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
 290             webpage, u'title')
 291
 292         return [{
 293             'id':       video_id,
 294             'url':      video_url,
 295             'ext':      video_extension,
 296             'title':    video_title,
 297         }]
 298
 299 class HowcastIE(InfoExtractor):
 300     """Information Extractor for Howcast.com"""
 301     _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
 302
 303     def _real_extract(self, url):
 304         mobj = re.match(self._VALID_URL, url)
 305
 306         video_id = mobj.group('id')
 307         webpage_url = 'http://www.howcast.com/videos/' + video_id
 308         webpage = self._download_webpage(webpage_url, video_id)
 309
 310         self.report_extraction(video_id)
 311
 312         video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
 313             webpage, u'video URL')
 314
 315         video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
 316             webpage, u'title')
 317
 318         video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
 319             webpage, u'description', fatal=False)
 320
 321         thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
 322             webpage, u'thumbnail', fatal=False)
 323
 324         return [{
 325             'id':       video_id,
 326             'url':      video_url,
 327             'ext':      'mp4',
 328             'title':    video_title,
 329             'description': video_description,
 330             'thumbnail': thumbnail,
 331         }]
 332
 333 class VineIE(InfoExtractor):
 334     """Information Extractor for Vine.co"""
 335     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
 336
 337     def _real_extract(self, url):
 338         mobj = re.match(self._VALID_URL, url)
 339
 340         video_id = mobj.group('id')
 341         webpage_url = 'https://vine.co/v/' + video_id
 342         webpage = self._download_webpage(webpage_url, video_id)
 343
 344         self.report_extraction(video_id)
 345
 346         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
 347             webpage, u'video URL')
 348
 349         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
 350             webpage, u'title')
 351
 352         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
 353             webpage, u'thumbnail', fatal=False)
 354
 355         uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
 356             webpage, u'uploader', fatal=False, flags=re.DOTALL)
 357
 358         return [{
 359             'id':        video_id,
 360             'url':       video_url,
 361             'ext':       'mp4',
 362             'title':     video_title,
 363             'thumbnail': thumbnail,
 364             'uploader':  uploader,
 365         }]
 366
 367 class FlickrIE(InfoExtractor):
 368     """Information Extractor for Flickr videos"""
 369     _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
 370
 371     def _real_extract(self, url):
 372         mobj = re.match(self._VALID_URL, url)
 373
 374         video_id = mobj.group('id')
 375         video_uploader_id = mobj.group('uploader_id')
 376         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
 377         webpage = self._download_webpage(webpage_url, video_id)
 378
 379         secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
 380
 381         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
 382         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
 383
 384         node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
 385             first_xml, u'node_id')
 386
 387         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
 388         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
 389
 390         self.report_extraction(video_id)
 391
 392         mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
 393         if mobj is None:
 394             raise ExtractorError(u'Unable to extract video url')
 395         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
 396
 397         video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
 398             webpage, u'video title')
 399
 400         video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
 401             webpage, u'description', fatal=False)
 402
 403         thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
 404             webpage, u'thumbnail', fatal=False)
 405
 406         return [{
 407             'id':          video_id,
 408             'url':         video_url,
 409             'ext':         'mp4',
 410             'title':       video_title,
 411             'description': video_description,
 412             'thumbnail':   thumbnail,
 413             'uploader_id': video_uploader_id,
 414         }]
 415
 416 class TeamcocoIE(InfoExtractor):
 417     _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
 418
 419     def _real_extract(self, url):
 420         mobj = re.match(self._VALID_URL, url)
 421         if mobj is None:
 422             raise ExtractorError(u'Invalid URL: %s' % url)
 423         url_title = mobj.group('url_title')
 424         webpage = self._download_webpage(url, url_title)
 425
 426         video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
 427             webpage, u'video id')
 428
 429         self.report_extraction(video_id)
 430
 431         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
 432             webpage, u'title')
 433
 434         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
 435             webpage, u'thumbnail', fatal=False)
 436
 437         video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
 438             webpage, u'description', fatal=False)
 439
 440         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
 441         data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
 442
 443         video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
 444             data, u'video URL')
 445
 446         return [{
 447             'id':          video_id,
 448             'url':         video_url,
 449             'ext':         'mp4',
 450             'title':       video_title,
 451             'thumbnail':   thumbnail,
 452             'description': video_description,
 453         }]
 454
 455 class XHamsterIE(InfoExtractor):
 456     """Information Extractor for xHamster"""
 457     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
 458
 459     def _real_extract(self,url):
 460         mobj = re.match(self._VALID_URL, url)
 461
 462         video_id = mobj.group('id')
 463         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
 464         webpage = self._download_webpage(mrss_url, video_id)
 465
 466         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
 467         if mobj is None:
 468             raise ExtractorError(u'Unable to extract media URL')
 469         if len(mobj.group('server')) == 0:
 470             video_url = compat_urllib_parse.unquote(mobj.group('file'))
 471         else:
 472             video_url = mobj.group('server')+'/key='+mobj.group('file')
 473         video_extension = video_url.split('.')[-1]
 474
 475         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
 476             webpage, u'title')
 477
 478         # Can't see the description anywhere in the UI
 479         # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
 480         #     webpage, u'description', fatal=False)
 481         # if video_description: video_description = unescapeHTML(video_description)
 482
 483         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
 484         if mobj:
 485             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
 486         else:
 487             video_upload_date = None
 488             self._downloader.report_warning(u'Unable to extract upload date')
 489
 490         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
 491             webpage, u'uploader id', default=u'anonymous')
 492
 493         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
 494             webpage, u'thumbnail', fatal=False)
 495
 496         return [{
 497             'id':       video_id,
 498             'url':      video_url,
 499             'ext':      video_extension,
 500             'title':    video_title,
 501             # 'description': video_description,
 502             'upload_date': video_upload_date,
 503             'uploader_id': video_uploader_id,
 504             'thumbnail': video_thumbnail
 505         }]
 506
 507 class HypemIE(InfoExtractor):
 508     """Information Extractor for hypem"""
 509     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
 510
 511     def _real_extract(self, url):
 512         mobj = re.match(self._VALID_URL, url)
 513         if mobj is None:
 514             raise ExtractorError(u'Invalid URL: %s' % url)
 515         track_id = mobj.group(1)
 516
 517         data = { 'ax': 1, 'ts': time.time() }
 518         data_encoded = compat_urllib_parse.urlencode(data)
 519         complete_url = url + "?" + data_encoded
 520         request = compat_urllib_request.Request(complete_url)
 521         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
 522         cookie = urlh.headers.get('Set-Cookie', '')
 523
 524         self.report_extraction(track_id)
 525
 526         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
 527             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
 528         try:
 529             track_list = json.loads(html_tracks)
 530             track = track_list[u'tracks'][0]
 531         except ValueError:
 532             raise ExtractorError(u'Hypemachine contained invalid JSON.')
 533
 534         key = track[u"key"]
 535         track_id = track[u"id"]
 536         artist = track[u"artist"]
 537         title = track[u"song"]
 538
 539         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
 540         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
 541         request.add_header('cookie', cookie)
 542         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
 543         try:
 544             song_data = json.loads(song_data_json)
 545         except ValueError:
 546             raise ExtractorError(u'Hypemachine contained invalid JSON.')
 547         final_url = song_data[u"url"]
 548
 549         return [{
 550             'id':       track_id,
 551             'url':      final_url,
 552             'ext':      "mp3",
 553             'title':    title,
 554             'artist':   artist,
 555         }]
 556
 557 class Vbox7IE(InfoExtractor):
 558     """Information Extractor for Vbox7"""
 559     _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
 560
 561     def _real_extract(self,url):
 562         mobj = re.match(self._VALID_URL, url)
 563         if mobj is None:
 564             raise ExtractorError(u'Invalid URL: %s' % url)
 565         video_id = mobj.group(1)
 566
 567         redirect_page, urlh = self._download_webpage_handle(url, video_id)
 568         new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
 569         redirect_url = urlh.geturl() + new_location
 570         webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
 571
 572         title = self._html_search_regex(r'<title>(.*)</title>',
 573             webpage, u'title').split('/')[0].strip()
 574
 575         ext = "flv"
 576         info_url = "http://vbox7.com/play/magare.do"
 577         data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
 578         info_request = compat_urllib_request.Request(info_url, data)
 579         info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 580         info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
 581         if info_response is None:
 582             raise ExtractorError(u'Unable to extract the media url')
 583         (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
 584
 585         return [{
 586             'id':        video_id,
 587             'url':       final_url,
 588             'ext':       ext,
 589             'title':     title,
 590             'thumbnail': thumbnail_url,
 591         }]
 592
 593
 594 def gen_extractors():
 595     """ Return a list of an instance of every supported extractor.
 596     The order does matter; the first extractor matched is the one handling the URL.
 597     """
 598     return [
 599         YoutubePlaylistIE(),
 600         YoutubeChannelIE(),
 601         YoutubeUserIE(),
 602         YoutubeSearchIE(),
 603         YoutubeIE(),
 604         MetacafeIE(),
 605         DailymotionIE(),
 606         GoogleSearchIE(),
 607         PhotobucketIE(),
 608         YahooIE(),
 609         YahooSearchIE(),
 610         DepositFilesIE(),
 611         FacebookIE(),
 612         BlipTVIE(),
 613         BlipTVUserIE(),
 614         VimeoIE(),
 615         MyVideoIE(),
 616         ComedyCentralIE(),
 617         EscapistIE(),
 618         CollegeHumorIE(),
 619         XVideosIE(),
 620         SoundcloudSetIE(),
 621         SoundcloudIE(),
 622         InfoQIE(),
 623         MixcloudIE(),
 624         StanfordOpenClassroomIE(),
 625         MTVIE(),
 626         YoukuIE(),
 627         XNXXIE(),
 628         YouJizzIE(),
 629         PornotubeIE(),
 630         YouPornIE(),
 631         GooglePlusIE(),
 632         ArteTvIE(),
 633         NBAIE(),
 634         WorldStarHipHopIE(),
 635         JustinTVIE(),
 636         FunnyOrDieIE(),
 637         SteamIE(),
 638         UstreamIE(),
 639         RBMARadioIE(),
 640         EightTracksIE(),
 641         KeekIE(),
 642         TEDIE(),
 643         MySpassIE(),
 644         SpiegelIE(),
 645         LiveLeakIE(),
 646         ARDIE(),
 647         ZDFIE(),
 648         TumblrIE(),
 649         BandcampIE(),
 650         RedTubeIE(),
 651         InaIE(),
 652         HowcastIE(),
 653         VineIE(),
 654         FlickrIE(),
 655         TeamcocoIE(),
 656         XHamsterIE(),
 657         HypemIE(),
 658         Vbox7IE(),
 659         GametrailersIE(),
 660         StatigramIE(),
 661         GenericIE()
 662     ]
 663
 664 def get_info_extractor(ie_name):
 665     """Returns the info extractor class with the given ie_name"""
 666     return globals()[ie_name+'IE']