_ Git - youtube-dl/blob - youtube_dl/InfoExtractors.py

   1 import base64
   2 import datetime
   3 import itertools
   4 import netrc
   5 import os
   6 import re
   7 import socket
   8 import time
   9 import email.utils
  10 import xml.etree.ElementTree
  11 import random
  12 import math
  13 import operator
  14 import hashlib
  15 import binascii
  16 import urllib
  17
  18 from .utils import *
  19 from .extractor.common import InfoExtractor, SearchInfoExtractor
  20
  21 from .extractor.ard import ARDIE
  22 from .extractor.arte import ArteTvIE
  23 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
  24 from .extractor.comedycentral import ComedyCentralIE
  25 from .extractor.collegehumor import CollegeHumorIE
  26 from .extractor.dailymotion import DailymotionIE
  27 from .extractor.depositfiles import DepositFilesIE
  28 from .extractor.escapist import EscapistIE
  29 from .extractor.facebook import FacebookIE
  30 from .extractor.gametrailers import GametrailersIE
  31 from .extractor.generic import GenericIE
  32 from .extractor.googleplus import GooglePlusIE
  33 from .extractor.googlesearch import GoogleSearchIE
  34 from .extractor.infoq import InfoQIE
  35 from .extractor.metacafe import MetacafeIE
  36 from .extractor.mixcloud import MixcloudIE
  37 from .extractor.mtv import MTVIE
  38 from .extractor.myvideo import MyVideoIE
  39 from .extractor.nba import NBAIE
  40 from .extractor.statigram import StatigramIE
  41 from .extractor.photobucket import PhotobucketIE
  42 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
  43 from .extractor.stanfordoc import StanfordOpenClassroomIE
  44 from .extractor.steam import SteamIE
  45 from .extractor.ted import TEDIE
  46 from .extractor.vimeo import VimeoIE
  47 from .extractor.xnxx import XNXXIE
  48 from .extractor.xvideos import XVideosIE
  49 from .extractor.yahoo import YahooIE, YahooSearchIE
  50 from .extractor.youku import YoukuIE
  51 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
  52 from .extractor.zdf import ZDFIE
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66 class JustinTVIE(InfoExtractor):
  67     """Information extractor for justin.tv and twitch.tv"""
  68     # TODO: One broadcast may be split into multiple videos. The key
  69     # 'broadcast_id' is the same for all parts, and 'broadcast_part'
  70     # starts at 1 and increases. Can we treat all parts as one video?
  71
  72     _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
  73         (?:
  74             (?P<channelid>[^/]+)|
  75             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
  76             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
  77         )
  78         /?(?:\#.*)?$
  79         """
  80     _JUSTIN_PAGE_LIMIT = 100
  81     IE_NAME = u'justin.tv'
  82
  83     def report_download_page(self, channel, offset):
  84         """Report attempt to download a single page of videos."""
  85         self.to_screen(u'%s: Downloading video information from %d to %d' %
  86                 (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
  87
  88     # Return count of items, list of *valid* items
  89     def _parse_page(self, url, video_id):
  90         webpage = self._download_webpage(url, video_id,
  91                                          u'Downloading video info JSON',
  92                                          u'unable to download video info JSON')
  93
  94         response = json.loads(webpage)
  95         if type(response) != list:
  96             error_text = response.get('error', 'unknown error')
  97             raise ExtractorError(u'Justin.tv API: %s' % error_text)
  98         info = []
  99         for clip in response:
 100             video_url = clip['video_file_url']
 101             if video_url:
 102                 video_extension = os.path.splitext(video_url)[1][1:]
 103                 video_date = re.sub('-', '', clip['start_time'][:10])
 104                 video_uploader_id = clip.get('user_id', clip.get('channel_id'))
 105                 video_id = clip['id']
 106                 video_title = clip.get('title', video_id)
 107                 info.append({
 108                     'id': video_id,
 109                     'url': video_url,
 110                     'title': video_title,
 111                     'uploader': clip.get('channel_name', video_uploader_id),
 112                     'uploader_id': video_uploader_id,
 113                     'upload_date': video_date,
 114                     'ext': video_extension,
 115                 })
 116         return (len(response), info)
 117
 118     def _real_extract(self, url):
 119         mobj = re.match(self._VALID_URL, url)
 120         if mobj is None:
 121             raise ExtractorError(u'invalid URL: %s' % url)
 122
 123         api_base = 'http://api.justin.tv'
 124         paged = False
 125         if mobj.group('channelid'):
 126             paged = True
 127             video_id = mobj.group('channelid')
 128             api = api_base + '/channel/archives/%s.json' % video_id
 129         elif mobj.group('chapterid'):
 130             chapter_id = mobj.group('chapterid')
 131
 132             webpage = self._download_webpage(url, chapter_id)
 133             m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
 134             if not m:
 135                 raise ExtractorError(u'Cannot find archive of a chapter')
 136             archive_id = m.group(1)
 137
 138             api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
 139             chapter_info_xml = self._download_webpage(api, chapter_id,
 140                                              note=u'Downloading chapter information',
 141                                              errnote=u'Chapter information download failed')
 142             doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
 143             for a in doc.findall('.//archive'):
 144                 if archive_id == a.find('./id').text:
 145                     break
 146             else:
 147                 raise ExtractorError(u'Could not find chapter in chapter information')
 148
 149             video_url = a.find('./video_file_url').text
 150             video_ext = video_url.rpartition('.')[2] or u'flv'
 151
 152             chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
 153             chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
 154                                    note='Downloading chapter metadata',
 155                                    errnote='Download of chapter metadata failed')
 156             chapter_info = json.loads(chapter_info_json)
 157
 158             bracket_start = int(doc.find('.//bracket_start').text)
 159             bracket_end = int(doc.find('.//bracket_end').text)
 160
 161             # TODO determine start (and probably fix up file)
 162             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
 163             #video_url += u'?start=' + TODO:start_timestamp
 164             # bracket_start is 13290, but we want 51670615
 165             self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
 166                                             u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
 167
 168             info = {
 169                 'id': u'c' + chapter_id,
 170                 'url': video_url,
 171                 'ext': video_ext,
 172                 'title': chapter_info['title'],
 173                 'thumbnail': chapter_info['preview'],
 174                 'description': chapter_info['description'],
 175                 'uploader': chapter_info['channel']['display_name'],
 176                 'uploader_id': chapter_info['channel']['name'],
 177             }
 178             return [info]
 179         else:
 180             video_id = mobj.group('videoid')
 181             api = api_base + '/broadcast/by_archive/%s.json' % video_id
 182
 183         self.report_extraction(video_id)
 184
 185         info = []
 186         offset = 0
 187         limit = self._JUSTIN_PAGE_LIMIT
 188         while True:
 189             if paged:
 190                 self.report_download_page(video_id, offset)
 191             page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
 192             page_count, page_info = self._parse_page(page_url, video_id)
 193             info.extend(page_info)
 194             if not paged or page_count != limit:
 195                 break
 196             offset += limit
 197         return info
 198
 199 class FunnyOrDieIE(InfoExtractor):
 200     _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
 201
 202     def _real_extract(self, url):
 203         mobj = re.match(self._VALID_URL, url)
 204         if mobj is None:
 205             raise ExtractorError(u'invalid URL: %s' % url)
 206
 207         video_id = mobj.group('id')
 208         webpage = self._download_webpage(url, video_id)
 209
 210         video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
 211             webpage, u'video URL', flags=re.DOTALL)
 212
 213         title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
 214             r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
 215
 216         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
 217             webpage, u'description', fatal=False, flags=re.DOTALL)
 218
 219         info = {
 220             'id': video_id,
 221             'url': video_url,
 222             'ext': 'mp4',
 223             'title': title,
 224             'description': video_description,
 225         }
 226         return [info]
 227
 228
 229 class UstreamIE(InfoExtractor):
 230     _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
 231     IE_NAME = u'ustream'
 232
 233     def _real_extract(self, url):
 234         m = re.match(self._VALID_URL, url)
 235         video_id = m.group('videoID')
 236
 237         video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
 238         webpage = self._download_webpage(url, video_id)
 239
 240         self.report_extraction(video_id)
 241
 242         video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
 243             webpage, u'title')
 244
 245         uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
 246             webpage, u'uploader', fatal=False, flags=re.DOTALL)
 247
 248         thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
 249             webpage, u'thumbnail', fatal=False)
 250
 251         info = {
 252                 'id': video_id,
 253                 'url': video_url,
 254                 'ext': 'flv',
 255                 'title': video_title,
 256                 'uploader': uploader,
 257                 'thumbnail': thumbnail,
 258                }
 259         return info
 260
 261 class WorldStarHipHopIE(InfoExtractor):
 262     _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
 263     IE_NAME = u'WorldStarHipHop'
 264
 265     def _real_extract(self, url):
 266         m = re.match(self._VALID_URL, url)
 267         video_id = m.group('id')
 268
 269         webpage_src = self._download_webpage(url, video_id)
 270
 271         video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
 272             webpage_src, u'video URL')
 273
 274         if 'mp4' in video_url:
 275             ext = 'mp4'
 276         else:
 277             ext = 'flv'
 278
 279         video_title = self._html_search_regex(r"<title>(.*)</title>",
 280             webpage_src, u'title')
 281
 282         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
 283         thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
 284             webpage_src, u'thumbnail', fatal=False)
 285
 286         if not thumbnail:
 287             _title = r"""candytitles.*>(.*)</span>"""
 288             mobj = re.search(_title, webpage_src)
 289             if mobj is not None:
 290                 video_title = mobj.group(1)
 291
 292         results = [{
 293                     'id': video_id,
 294                     'url' : video_url,
 295                     'title' : video_title,
 296                     'thumbnail' : thumbnail,
 297                     'ext' : ext,
 298                     }]
 299         return results
 300
 301 class RBMARadioIE(InfoExtractor):
 302     _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
 303
 304     def _real_extract(self, url):
 305         m = re.match(self._VALID_URL, url)
 306         video_id = m.group('videoID')
 307
 308         webpage = self._download_webpage(url, video_id)
 309
 310         json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
 311             webpage, u'json data', flags=re.MULTILINE)
 312
 313         try:
 314             data = json.loads(json_data)
 315         except ValueError as e:
 316             raise ExtractorError(u'Invalid JSON: ' + str(e))
 317
 318         video_url = data['akamai_url'] + '&cbr=256'
 319         url_parts = compat_urllib_parse_urlparse(video_url)
 320         video_ext = url_parts.path.rpartition('.')[2]
 321         info = {
 322                 'id': video_id,
 323                 'url': video_url,
 324                 'ext': video_ext,
 325                 'title': data['title'],
 326                 'description': data.get('teaser_text'),
 327                 'location': data.get('country_of_origin'),
 328                 'uploader': data.get('host', {}).get('name'),
 329                 'uploader_id': data.get('host', {}).get('slug'),
 330                 'thumbnail': data.get('image', {}).get('large_url_2x'),
 331                 'duration': data.get('duration'),
 332         }
 333         return [info]
 334
 335
 336 class YouPornIE(InfoExtractor):
 337     """Information extractor for youporn.com."""
 338     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
 339
 340     def _print_formats(self, formats):
 341         """Print all available formats"""
 342         print(u'Available formats:')
 343         print(u'ext\t\tformat')
 344         print(u'---------------------------------')
 345         for format in formats:
 346             print(u'%s\t\t%s'  % (format['ext'], format['format']))
 347
 348     def _specific(self, req_format, formats):
 349         for x in formats:
 350             if(x["format"]==req_format):
 351                 return x
 352         return None
 353
 354     def _real_extract(self, url):
 355         mobj = re.match(self._VALID_URL, url)
 356         if mobj is None:
 357             raise ExtractorError(u'Invalid URL: %s' % url)
 358         video_id = mobj.group('videoid')
 359
 360         req = compat_urllib_request.Request(url)
 361         req.add_header('Cookie', 'age_verified=1')
 362         webpage = self._download_webpage(req, video_id)
 363
 364         # Get JSON parameters
 365         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
 366         try:
 367             params = json.loads(json_params)
 368         except:
 369             raise ExtractorError(u'Invalid JSON')
 370
 371         self.report_extraction(video_id)
 372         try:
 373             video_title = params['title']
 374             upload_date = unified_strdate(params['release_date_f'])
 375             video_description = params['description']
 376             video_uploader = params['submitted_by']
 377             thumbnail = params['thumbnails'][0]['image']
 378         except KeyError:
 379             raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
 380
 381         # Get all of the formats available
 382         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
 383         download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
 384             webpage, u'download list').strip()
 385
 386         # Get all of the links from the page
 387         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
 388         links = re.findall(LINK_RE, download_list_html)
 389         if(len(links) == 0):
 390             raise ExtractorError(u'ERROR: no known formats available for video')
 391
 392         self.to_screen(u'Links found: %d' % len(links))
 393
 394         formats = []
 395         for link in links:
 396
 397             # A link looks like this:
 398             # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
 399             # A path looks like this:
 400             # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
 401             video_url = unescapeHTML( link )
 402             path = compat_urllib_parse_urlparse( video_url ).path
 403             extension = os.path.splitext( path )[1][1:]
 404             format = path.split('/')[4].split('_')[:2]
 405             size = format[0]
 406             bitrate = format[1]
 407             format = "-".join( format )
 408             # title = u'%s-%s-%s' % (video_title, size, bitrate)
 409
 410             formats.append({
 411                 'id': video_id,
 412                 'url': video_url,
 413                 'uploader': video_uploader,
 414                 'upload_date': upload_date,
 415                 'title': video_title,
 416                 'ext': extension,
 417                 'format': format,
 418                 'thumbnail': thumbnail,
 419                 'description': video_description
 420             })
 421
 422         if self._downloader.params.get('listformats', None):
 423             self._print_formats(formats)
 424             return
 425
 426         req_format = self._downloader.params.get('format', None)
 427         self.to_screen(u'Format: %s' % req_format)
 428
 429         if req_format is None or req_format == 'best':
 430             return [formats[0]]
 431         elif req_format == 'worst':
 432             return [formats[-1]]
 433         elif req_format in ('-1', 'all'):
 434             return formats
 435         else:
 436             format = self._specific( req_format, formats )
 437             if result is None:
 438                 raise ExtractorError(u'Requested format not available')
 439             return [format]
 440
 441
 442
 443 class PornotubeIE(InfoExtractor):
 444     """Information extractor for pornotube.com."""
 445     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
 446
 447     def _real_extract(self, url):
 448         mobj = re.match(self._VALID_URL, url)
 449         if mobj is None:
 450             raise ExtractorError(u'Invalid URL: %s' % url)
 451
 452         video_id = mobj.group('videoid')
 453         video_title = mobj.group('title')
 454
 455         # Get webpage content
 456         webpage = self._download_webpage(url, video_id)
 457
 458         # Get the video URL
 459         VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
 460         video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
 461         video_url = compat_urllib_parse.unquote(video_url)
 462
 463         #Get the uploaded date
 464         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
 465         upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
 466         if upload_date: upload_date = unified_strdate(upload_date)
 467
 468         info = {'id': video_id,
 469                 'url': video_url,
 470                 'uploader': None,
 471                 'upload_date': upload_date,
 472                 'title': video_title,
 473                 'ext': 'flv',
 474                 'format': 'flv'}
 475
 476         return [info]
 477
 478 class YouJizzIE(InfoExtractor):
 479     """Information extractor for youjizz.com."""
 480     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
 481
 482     def _real_extract(self, url):
 483         mobj = re.match(self._VALID_URL, url)
 484         if mobj is None:
 485             raise ExtractorError(u'Invalid URL: %s' % url)
 486
 487         video_id = mobj.group('videoid')
 488
 489         # Get webpage content
 490         webpage = self._download_webpage(url, video_id)
 491
 492         # Get the video title
 493         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
 494             webpage, u'title').strip()
 495
 496         # Get the embed page
 497         result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
 498         if result is None:
 499             raise ExtractorError(u'ERROR: unable to extract embed page')
 500
 501         embed_page_url = result.group(0).strip()
 502         video_id = result.group('videoid')
 503
 504         webpage = self._download_webpage(embed_page_url, video_id)
 505
 506         # Get the video URL
 507         video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
 508             webpage, u'video URL')
 509
 510         info = {'id': video_id,
 511                 'url': video_url,
 512                 'title': video_title,
 513                 'ext': 'flv',
 514                 'format': 'flv',
 515                 'player_url': embed_page_url}
 516
 517         return [info]
 518
 519 class EightTracksIE(InfoExtractor):
 520     IE_NAME = '8tracks'
 521     _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
 522
 523     def _real_extract(self, url):
 524         mobj = re.match(self._VALID_URL, url)
 525         if mobj is None:
 526             raise ExtractorError(u'Invalid URL: %s' % url)
 527         playlist_id = mobj.group('id')
 528
 529         webpage = self._download_webpage(url, playlist_id)
 530
 531         json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
 532         data = json.loads(json_like)
 533
 534         session = str(random.randint(0, 1000000000))
 535         mix_id = data['id']
 536         track_count = data['tracks_count']
 537         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
 538         next_url = first_url
 539         res = []
 540         for i in itertools.count():
 541             api_json = self._download_webpage(next_url, playlist_id,
 542                 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
 543                 errnote=u'Failed to download song information')
 544             api_data = json.loads(api_json)
 545             track_data = api_data[u'set']['track']
 546             info = {
 547                 'id': track_data['id'],
 548                 'url': track_data['track_file_stream_url'],
 549                 'title': track_data['performer'] + u' - ' + track_data['name'],
 550                 'raw_title': track_data['name'],
 551                 'uploader_id': data['user']['login'],
 552                 'ext': 'm4a',
 553             }
 554             res.append(info)
 555             if api_data['set']['at_last_track']:
 556                 break
 557             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
 558         return res
 559
 560 class KeekIE(InfoExtractor):
 561     _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
 562     IE_NAME = u'keek'
 563
 564     def _real_extract(self, url):
 565         m = re.match(self._VALID_URL, url)
 566         video_id = m.group('videoID')
 567
 568         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
 569         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
 570         webpage = self._download_webpage(url, video_id)
 571
 572         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
 573             webpage, u'title')
 574
 575         uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
 576             webpage, u'uploader', fatal=False)
 577
 578         info = {
 579                 'id': video_id,
 580                 'url': video_url,
 581                 'ext': 'mp4',
 582                 'title': video_title,
 583                 'thumbnail': thumbnail,
 584                 'uploader': uploader
 585         }
 586         return [info]
 587
 588
 589 class MySpassIE(InfoExtractor):
 590     _VALID_URL = r'http://www.myspass.de/.*'
 591
 592     def _real_extract(self, url):
 593         META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
 594
 595         # video id is the last path element of the URL
 596         # usually there is a trailing slash, so also try the second but last
 597         url_path = compat_urllib_parse_urlparse(url).path
 598         url_parent_path, video_id = os.path.split(url_path)
 599         if not video_id:
 600             _, video_id = os.path.split(url_parent_path)
 601
 602         # get metadata
 603         metadata_url = META_DATA_URL_TEMPLATE % video_id
 604         metadata_text = self._download_webpage(metadata_url, video_id)
 605         metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
 606
 607         # extract values from metadata
 608         url_flv_el = metadata.find('url_flv')
 609         if url_flv_el is None:
 610             raise ExtractorError(u'Unable to extract download url')
 611         video_url = url_flv_el.text
 612         extension = os.path.splitext(video_url)[1][1:]
 613         title_el = metadata.find('title')
 614         if title_el is None:
 615             raise ExtractorError(u'Unable to extract title')
 616         title = title_el.text
 617         format_id_el = metadata.find('format_id')
 618         if format_id_el is None:
 619             format = ext
 620         else:
 621             format = format_id_el.text
 622         description_el = metadata.find('description')
 623         if description_el is not None:
 624             description = description_el.text
 625         else:
 626             description = None
 627         imagePreview_el = metadata.find('imagePreview')
 628         if imagePreview_el is not None:
 629             thumbnail = imagePreview_el.text
 630         else:
 631             thumbnail = None
 632         info = {
 633             'id': video_id,
 634             'url': video_url,
 635             'title': title,
 636             'ext': extension,
 637             'format': format,
 638             'thumbnail': thumbnail,
 639             'description': description
 640         }
 641         return [info]
 642
 643 class SpiegelIE(InfoExtractor):
 644     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
 645
 646     def _real_extract(self, url):
 647         m = re.match(self._VALID_URL, url)
 648         video_id = m.group('videoID')
 649
 650         webpage = self._download_webpage(url, video_id)
 651
 652         video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
 653             webpage, u'title')
 654
 655         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
 656         xml_code = self._download_webpage(xml_url, video_id,
 657                     note=u'Downloading XML', errnote=u'Failed to download XML')
 658
 659         idoc = xml.etree.ElementTree.fromstring(xml_code)
 660         last_type = idoc[-1]
 661         filename = last_type.findall('./filename')[0].text
 662         duration = float(last_type.findall('./duration')[0].text)
 663
 664         video_url = 'http://video2.spiegel.de/flash/' + filename
 665         video_ext = filename.rpartition('.')[2]
 666         info = {
 667             'id': video_id,
 668             'url': video_url,
 669             'ext': video_ext,
 670             'title': video_title,
 671             'duration': duration,
 672         }
 673         return [info]
 674
 675 class LiveLeakIE(InfoExtractor):
 676
 677     _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
 678     IE_NAME = u'liveleak'
 679
 680     def _real_extract(self, url):
 681         mobj = re.match(self._VALID_URL, url)
 682         if mobj is None:
 683             raise ExtractorError(u'Invalid URL: %s' % url)
 684
 685         video_id = mobj.group('video_id')
 686
 687         webpage = self._download_webpage(url, video_id)
 688
 689         video_url = self._search_regex(r'file: "(.*?)",',
 690             webpage, u'video URL')
 691
 692         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
 693             webpage, u'title').replace('LiveLeak.com -', '').strip()
 694
 695         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
 696             webpage, u'description', fatal=False)
 697
 698         video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
 699             webpage, u'uploader', fatal=False)
 700
 701         info = {
 702             'id':  video_id,
 703             'url': video_url,
 704             'ext': 'mp4',
 705             'title': video_title,
 706             'description': video_description,
 707             'uploader': video_uploader
 708         }
 709
 710         return [info]
 711
 712
 713
 714 class TumblrIE(InfoExtractor):
 715     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
 716
 717     def _real_extract(self, url):
 718         m_url = re.match(self._VALID_URL, url)
 719         video_id = m_url.group('id')
 720         blog = m_url.group('blog_name')
 721
 722         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
 723         webpage = self._download_webpage(url, video_id)
 724
 725         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
 726         video = re.search(re_video, webpage)
 727         if video is None:
 728            raise ExtractorError(u'Unable to extract video')
 729         video_url = video.group('video_url')
 730         ext = video.group('ext')
 731
 732         video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
 733             webpage, u'thumbnail', fatal=False)  # We pick the first poster
 734         if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
 735
 736         # The only place where you can get a title, it's not complete,
 737         # but searching in other places doesn't work for all videos
 738         video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
 739             webpage, u'title', flags=re.DOTALL)
 740
 741         return [{'id': video_id,
 742                  'url': video_url,
 743                  'title': video_title,
 744                  'thumbnail': video_thumbnail,
 745                  'ext': ext
 746                  }]
 747
 748 class BandcampIE(InfoExtractor):
 749     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
 750
 751     def _real_extract(self, url):
 752         mobj = re.match(self._VALID_URL, url)
 753         title = mobj.group('title')
 754         webpage = self._download_webpage(url, title)
 755         # We get the link to the free download page
 756         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
 757         if m_download is None:
 758             raise ExtractorError(u'No free songs found')
 759
 760         download_link = m_download.group(1)
 761         id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
 762                        webpage, re.MULTILINE|re.DOTALL).group('id')
 763
 764         download_webpage = self._download_webpage(download_link, id,
 765                                                   'Downloading free downloads page')
 766         # We get the dictionary of the track from some javascrip code
 767         info = re.search(r'items: (.*?),$',
 768                          download_webpage, re.MULTILINE).group(1)
 769         info = json.loads(info)[0]
 770         # We pick mp3-320 for now, until format selection can be easily implemented.
 771         mp3_info = info[u'downloads'][u'mp3-320']
 772         # If we try to use this url it says the link has expired
 773         initial_url = mp3_info[u'url']
 774         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
 775         m_url = re.match(re_url, initial_url)
 776         #We build the url we will use to get the final track url
 777         # This url is build in Bandcamp in the script download_bunde_*.js
 778         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
 779         final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
 780         # If we could correctly generate the .rand field the url would be
 781         #in the "download_url" key
 782         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
 783
 784         track_info = {'id':id,
 785                       'title' : info[u'title'],
 786                       'ext' :   'mp3',
 787                       'url' :   final_url,
 788                       'thumbnail' : info[u'thumb_url'],
 789                       'uploader' :  info[u'artist']
 790                       }
 791
 792         return [track_info]
 793
 794 class RedTubeIE(InfoExtractor):
 795     """Information Extractor for redtube"""
 796     _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
 797
 798     def _real_extract(self,url):
 799         mobj = re.match(self._VALID_URL, url)
 800         if mobj is None:
 801             raise ExtractorError(u'Invalid URL: %s' % url)
 802
 803         video_id = mobj.group('id')
 804         video_extension = 'mp4'
 805         webpage = self._download_webpage(url, video_id)
 806
 807         self.report_extraction(video_id)
 808
 809         video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
 810             webpage, u'video URL')
 811
 812         video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
 813             webpage, u'title')
 814
 815         return [{
 816             'id':       video_id,
 817             'url':      video_url,
 818             'ext':      video_extension,
 819             'title':    video_title,
 820         }]
 821
 822 class InaIE(InfoExtractor):
 823     """Information Extractor for Ina.fr"""
 824     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
 825
 826     def _real_extract(self,url):
 827         mobj = re.match(self._VALID_URL, url)
 828
 829         video_id = mobj.group('id')
 830         mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
 831         video_extension = 'mp4'
 832         webpage = self._download_webpage(mrss_url, video_id)
 833
 834         self.report_extraction(video_id)
 835
 836         video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
 837             webpage, u'video URL')
 838
 839         video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
 840             webpage, u'title')
 841
 842         return [{
 843             'id':       video_id,
 844             'url':      video_url,
 845             'ext':      video_extension,
 846             'title':    video_title,
 847         }]
 848
 849 class HowcastIE(InfoExtractor):
 850     """Information Extractor for Howcast.com"""
 851     _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
 852
 853     def _real_extract(self, url):
 854         mobj = re.match(self._VALID_URL, url)
 855
 856         video_id = mobj.group('id')
 857         webpage_url = 'http://www.howcast.com/videos/' + video_id
 858         webpage = self._download_webpage(webpage_url, video_id)
 859
 860         self.report_extraction(video_id)
 861
 862         video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
 863             webpage, u'video URL')
 864
 865         video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
 866             webpage, u'title')
 867
 868         video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
 869             webpage, u'description', fatal=False)
 870
 871         thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
 872             webpage, u'thumbnail', fatal=False)
 873
 874         return [{
 875             'id':       video_id,
 876             'url':      video_url,
 877             'ext':      'mp4',
 878             'title':    video_title,
 879             'description': video_description,
 880             'thumbnail': thumbnail,
 881         }]
 882
 883 class VineIE(InfoExtractor):
 884     """Information Extractor for Vine.co"""
 885     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
 886
 887     def _real_extract(self, url):
 888         mobj = re.match(self._VALID_URL, url)
 889
 890         video_id = mobj.group('id')
 891         webpage_url = 'https://vine.co/v/' + video_id
 892         webpage = self._download_webpage(webpage_url, video_id)
 893
 894         self.report_extraction(video_id)
 895
 896         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
 897             webpage, u'video URL')
 898
 899         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
 900             webpage, u'title')
 901
 902         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
 903             webpage, u'thumbnail', fatal=False)
 904
 905         uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
 906             webpage, u'uploader', fatal=False, flags=re.DOTALL)
 907
 908         return [{
 909             'id':        video_id,
 910             'url':       video_url,
 911             'ext':       'mp4',
 912             'title':     video_title,
 913             'thumbnail': thumbnail,
 914             'uploader':  uploader,
 915         }]
 916
 917 class FlickrIE(InfoExtractor):
 918     """Information Extractor for Flickr videos"""
 919     _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
 920
 921     def _real_extract(self, url):
 922         mobj = re.match(self._VALID_URL, url)
 923
 924         video_id = mobj.group('id')
 925         video_uploader_id = mobj.group('uploader_id')
 926         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
 927         webpage = self._download_webpage(webpage_url, video_id)
 928
 929         secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
 930
 931         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
 932         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
 933
 934         node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
 935             first_xml, u'node_id')
 936
 937         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
 938         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
 939
 940         self.report_extraction(video_id)
 941
 942         mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
 943         if mobj is None:
 944             raise ExtractorError(u'Unable to extract video url')
 945         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
 946
 947         video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
 948             webpage, u'video title')
 949
 950         video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
 951             webpage, u'description', fatal=False)
 952
 953         thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
 954             webpage, u'thumbnail', fatal=False)
 955
 956         return [{
 957             'id':          video_id,
 958             'url':         video_url,
 959             'ext':         'mp4',
 960             'title':       video_title,
 961             'description': video_description,
 962             'thumbnail':   thumbnail,
 963             'uploader_id': video_uploader_id,
 964         }]
 965
 966 class TeamcocoIE(InfoExtractor):
 967     _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
 968
 969     def _real_extract(self, url):
 970         mobj = re.match(self._VALID_URL, url)
 971         if mobj is None:
 972             raise ExtractorError(u'Invalid URL: %s' % url)
 973         url_title = mobj.group('url_title')
 974         webpage = self._download_webpage(url, url_title)
 975
 976         video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
 977             webpage, u'video id')
 978
 979         self.report_extraction(video_id)
 980
 981         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
 982             webpage, u'title')
 983
 984         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
 985             webpage, u'thumbnail', fatal=False)
 986
 987         video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
 988             webpage, u'description', fatal=False)
 989
 990         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
 991         data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
 992
 993         video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
 994             data, u'video URL')
 995
 996         return [{
 997             'id':          video_id,
 998             'url':         video_url,
 999             'ext':         'mp4',
1000             'title':       video_title,
1001             'thumbnail':   thumbnail,
1002             'description': video_description,
1003         }]
1004
1005 class XHamsterIE(InfoExtractor):
1006     """Information Extractor for xHamster"""
1007     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
1008
1009     def _real_extract(self,url):
1010         mobj = re.match(self._VALID_URL, url)
1011
1012         video_id = mobj.group('id')
1013         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
1014         webpage = self._download_webpage(mrss_url, video_id)
1015
1016         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
1017         if mobj is None:
1018             raise ExtractorError(u'Unable to extract media URL')
1019         if len(mobj.group('server')) == 0:
1020             video_url = compat_urllib_parse.unquote(mobj.group('file'))
1021         else:
1022             video_url = mobj.group('server')+'/key='+mobj.group('file')
1023         video_extension = video_url.split('.')[-1]
1024
1025         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
1026             webpage, u'title')
1027
1028         # Can't see the description anywhere in the UI
1029         # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
1030         #     webpage, u'description', fatal=False)
1031         # if video_description: video_description = unescapeHTML(video_description)
1032
1033         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
1034         if mobj:
1035             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
1036         else:
1037             video_upload_date = None
1038             self._downloader.report_warning(u'Unable to extract upload date')
1039
1040         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
1041             webpage, u'uploader id', default=u'anonymous')
1042
1043         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
1044             webpage, u'thumbnail', fatal=False)
1045
1046         return [{
1047             'id':       video_id,
1048             'url':      video_url,
1049             'ext':      video_extension,
1050             'title':    video_title,
1051             # 'description': video_description,
1052             'upload_date': video_upload_date,
1053             'uploader_id': video_uploader_id,
1054             'thumbnail': video_thumbnail
1055         }]
1056
1057 class HypemIE(InfoExtractor):
1058     """Information Extractor for hypem"""
1059     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
1060
1061     def _real_extract(self, url):
1062         mobj = re.match(self._VALID_URL, url)
1063         if mobj is None:
1064             raise ExtractorError(u'Invalid URL: %s' % url)
1065         track_id = mobj.group(1)
1066
1067         data = { 'ax': 1, 'ts': time.time() }
1068         data_encoded = compat_urllib_parse.urlencode(data)
1069         complete_url = url + "?" + data_encoded
1070         request = compat_urllib_request.Request(complete_url)
1071         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
1072         cookie = urlh.headers.get('Set-Cookie', '')
1073
1074         self.report_extraction(track_id)
1075
1076         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
1077             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
1078         try:
1079             track_list = json.loads(html_tracks)
1080             track = track_list[u'tracks'][0]
1081         except ValueError:
1082             raise ExtractorError(u'Hypemachine contained invalid JSON.')
1083
1084         key = track[u"key"]
1085         track_id = track[u"id"]
1086         artist = track[u"artist"]
1087         title = track[u"song"]
1088
1089         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
1090         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
1091         request.add_header('cookie', cookie)
1092         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
1093         try:
1094             song_data = json.loads(song_data_json)
1095         except ValueError:
1096             raise ExtractorError(u'Hypemachine contained invalid JSON.')
1097         final_url = song_data[u"url"]
1098
1099         return [{
1100             'id':       track_id,
1101             'url':      final_url,
1102             'ext':      "mp3",
1103             'title':    title,
1104             'artist':   artist,
1105         }]
1106
1107 class Vbox7IE(InfoExtractor):
1108     """Information Extractor for Vbox7"""
1109     _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
1110
1111     def _real_extract(self,url):
1112         mobj = re.match(self._VALID_URL, url)
1113         if mobj is None:
1114             raise ExtractorError(u'Invalid URL: %s' % url)
1115         video_id = mobj.group(1)
1116
1117         redirect_page, urlh = self._download_webpage_handle(url, video_id)
1118         new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
1119         redirect_url = urlh.geturl() + new_location
1120         webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
1121
1122         title = self._html_search_regex(r'<title>(.*)</title>',
1123             webpage, u'title').split('/')[0].strip()
1124
1125         ext = "flv"
1126         info_url = "http://vbox7.com/play/magare.do"
1127         data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
1128         info_request = compat_urllib_request.Request(info_url, data)
1129         info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
1130         info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
1131         if info_response is None:
1132             raise ExtractorError(u'Unable to extract the media url')
1133         (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
1134
1135         return [{
1136             'id':        video_id,
1137             'url':       final_url,
1138             'ext':       ext,
1139             'title':     title,
1140             'thumbnail': thumbnail_url,
1141         }]
1142
1143
1144 def gen_extractors():
1145     """ Return a list of an instance of every supported extractor.
1146     The order does matter; the first extractor matched is the one handling the URL.
1147     """
1148     return [
1149         YoutubePlaylistIE(),
1150         YoutubeChannelIE(),
1151         YoutubeUserIE(),
1152         YoutubeSearchIE(),
1153         YoutubeIE(),
1154         MetacafeIE(),
1155         DailymotionIE(),
1156         GoogleSearchIE(),
1157         PhotobucketIE(),
1158         YahooIE(),
1159         YahooSearchIE(),
1160         DepositFilesIE(),
1161         FacebookIE(),
1162         BlipTVIE(),
1163         BlipTVUserIE(),
1164         VimeoIE(),
1165         MyVideoIE(),
1166         ComedyCentralIE(),
1167         EscapistIE(),
1168         CollegeHumorIE(),
1169         XVideosIE(),
1170         SoundcloudSetIE(),
1171         SoundcloudIE(),
1172         InfoQIE(),
1173         MixcloudIE(),
1174         StanfordOpenClassroomIE(),
1175         MTVIE(),
1176         YoukuIE(),
1177         XNXXIE(),
1178         YouJizzIE(),
1179         PornotubeIE(),
1180         YouPornIE(),
1181         GooglePlusIE(),
1182         ArteTvIE(),
1183         NBAIE(),
1184         WorldStarHipHopIE(),
1185         JustinTVIE(),
1186         FunnyOrDieIE(),
1187         SteamIE(),
1188         UstreamIE(),
1189         RBMARadioIE(),
1190         EightTracksIE(),
1191         KeekIE(),
1192         TEDIE(),
1193         MySpassIE(),
1194         SpiegelIE(),
1195         LiveLeakIE(),
1196         ARDIE(),
1197         ZDFIE(),
1198         TumblrIE(),
1199         BandcampIE(),
1200         RedTubeIE(),
1201         InaIE(),
1202         HowcastIE(),
1203         VineIE(),
1204         FlickrIE(),
1205         TeamcocoIE(),
1206         XHamsterIE(),
1207         HypemIE(),
1208         Vbox7IE(),
1209         GametrailersIE(),
1210         StatigramIE(),
1211         GenericIE()
1212     ]
1213
1214 def get_info_extractor(ie_name):
1215     """Returns the info extractor class with the given ie_name"""
1216     return globals()[ie_name+'IE']