_ Git - youtube-dl/blob - youtube_dl/InfoExtractors.py

   1 import base64
   2 import datetime
   3 import itertools
   4 import netrc
   5 import os
   6 import re
   7 import socket
   8 import time
   9 import email.utils
  10 import xml.etree.ElementTree
  11 import random
  12 import math
  13 import operator
  14 import hashlib
  15 import binascii
  16 import urllib
  17
  18 from .utils import *
  19 from .extractor.common import InfoExtractor, SearchInfoExtractor
  20
  21 from .extractor.ard import ARDIE
  22 from .extractor.arte import ArteTvIE
  23 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
  24 from .extractor.comedycentral import ComedyCentralIE
  25 from .extractor.collegehumor import CollegeHumorIE
  26 from .extractor.dailymotion import DailymotionIE
  27 from .extractor.depositfiles import DepositFilesIE
  28 from .extractor.escapist import EscapistIE
  29 from .extractor.facebook import FacebookIE
  30 from .extractor.gametrailers import GametrailersIE
  31 from .extractor.generic import GenericIE
  32 from .extractor.googleplus import GooglePlusIE
  33 from .extractor.googlesearch import GoogleSearchIE
  34 from .extractor.infoq import InfoQIE
  35 from .extractor.metacafe import MetacafeIE
  36 from .extractor.myvideo import MyVideoIE
  37 from .extractor.statigram import StatigramIE
  38 from .extractor.photobucket import PhotobucketIE
  39 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
  40 from .extractor.vimeo import VimeoIE
  41 from .extractor.xvideos import XVideosIE
  42 from .extractor.yahoo import YahooIE, YahooSearchIE
  43 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
  44 from .extractor.zdf import ZDFIE
  45
  46
  47
  48 class MixcloudIE(InfoExtractor):
  49     """Information extractor for www.mixcloud.com"""
  50
  51     _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
  52     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
  53     IE_NAME = u'mixcloud'
  54
  55     def report_download_json(self, file_id):
  56         """Report JSON download."""
  57         self.to_screen(u'Downloading json')
  58
  59     def get_urls(self, jsonData, fmt, bitrate='best'):
  60         """Get urls from 'audio_formats' section in json"""
  61         file_url = None
  62         try:
  63             bitrate_list = jsonData[fmt]
  64             if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
  65                 bitrate = max(bitrate_list) # select highest
  66
  67             url_list = jsonData[fmt][bitrate]
  68         except TypeError: # we have no bitrate info.
  69             url_list = jsonData[fmt]
  70         return url_list
  71
  72     def check_urls(self, url_list):
  73         """Returns 1st active url from list"""
  74         for url in url_list:
  75             try:
  76                 compat_urllib_request.urlopen(url)
  77                 return url
  78             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  79                 url = None
  80
  81         return None
  82
  83     def _print_formats(self, formats):
  84         print('Available formats:')
  85         for fmt in formats.keys():
  86             for b in formats[fmt]:
  87                 try:
  88                     ext = formats[fmt][b][0]
  89                     print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
  90                 except TypeError: # we have no bitrate info
  91                     ext = formats[fmt][0]
  92                     print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
  93                     break
  94
  95     def _real_extract(self, url):
  96         mobj = re.match(self._VALID_URL, url)
  97         if mobj is None:
  98             raise ExtractorError(u'Invalid URL: %s' % url)
  99         # extract uploader & filename from url
 100         uploader = mobj.group(1).decode('utf-8')
 101         file_id = uploader + "-" + mobj.group(2).decode('utf-8')
 102
 103         # construct API request
 104         file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
 105         # retrieve .json file with links to files
 106         request = compat_urllib_request.Request(file_url)
 107         try:
 108             self.report_download_json(file_url)
 109             jsonData = compat_urllib_request.urlopen(request).read()
 110         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 111             raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
 112
 113         # parse JSON
 114         json_data = json.loads(jsonData)
 115         player_url = json_data['player_swf_url']
 116         formats = dict(json_data['audio_formats'])
 117
 118         req_format = self._downloader.params.get('format', None)
 119         bitrate = None
 120
 121         if self._downloader.params.get('listformats', None):
 122             self._print_formats(formats)
 123             return
 124
 125         if req_format is None or req_format == 'best':
 126             for format_param in formats.keys():
 127                 url_list = self.get_urls(formats, format_param)
 128                 # check urls
 129                 file_url = self.check_urls(url_list)
 130                 if file_url is not None:
 131                     break # got it!
 132         else:
 133             if req_format not in formats:
 134                 raise ExtractorError(u'Format is not available')
 135
 136             url_list = self.get_urls(formats, req_format)
 137             file_url = self.check_urls(url_list)
 138             format_param = req_format
 139
 140         return [{
 141             'id': file_id.decode('utf-8'),
 142             'url': file_url.decode('utf-8'),
 143             'uploader': uploader.decode('utf-8'),
 144             'upload_date': None,
 145             'title': json_data['name'],
 146             'ext': file_url.split('.')[-1].decode('utf-8'),
 147             'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
 148             'thumbnail': json_data['thumbnail_url'],
 149             'description': json_data['description'],
 150             'player_url': player_url.decode('utf-8'),
 151         }]
 152
 153 class StanfordOpenClassroomIE(InfoExtractor):
 154     """Information extractor for Stanford's Open ClassRoom"""
 155
 156     _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
 157     IE_NAME = u'stanfordoc'
 158
 159     def _real_extract(self, url):
 160         mobj = re.match(self._VALID_URL, url)
 161         if mobj is None:
 162             raise ExtractorError(u'Invalid URL: %s' % url)
 163
 164         if mobj.group('course') and mobj.group('video'): # A specific video
 165             course = mobj.group('course')
 166             video = mobj.group('video')
 167             info = {
 168                 'id': course + '_' + video,
 169                 'uploader': None,
 170                 'upload_date': None,
 171             }
 172
 173             self.report_extraction(info['id'])
 174             baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
 175             xmlUrl = baseUrl + video + '.xml'
 176             try:
 177                 metaXml = compat_urllib_request.urlopen(xmlUrl).read()
 178             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 179                 raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
 180             mdoc = xml.etree.ElementTree.fromstring(metaXml)
 181             try:
 182                 info['title'] = mdoc.findall('./title')[0].text
 183                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
 184             except IndexError:
 185                 raise ExtractorError(u'Invalid metadata XML file')
 186             info['ext'] = info['url'].rpartition('.')[2]
 187             return [info]
 188         elif mobj.group('course'): # A course page
 189             course = mobj.group('course')
 190             info = {
 191                 'id': course,
 192                 'type': 'playlist',
 193                 'uploader': None,
 194                 'upload_date': None,
 195             }
 196
 197             coursepage = self._download_webpage(url, info['id'],
 198                                         note='Downloading course info page',
 199                                         errnote='Unable to download course info page')
 200
 201             info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
 202
 203             info['description'] = self._html_search_regex('<description>([^<]+)</description>',
 204                 coursepage, u'description', fatal=False)
 205
 206             links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
 207             info['list'] = [
 208                 {
 209                     'type': 'reference',
 210                     'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
 211                 }
 212                     for vpage in links]
 213             results = []
 214             for entry in info['list']:
 215                 assert entry['type'] == 'reference'
 216                 results += self.extract(entry['url'])
 217             return results
 218         else: # Root page
 219             info = {
 220                 'id': 'Stanford OpenClassroom',
 221                 'type': 'playlist',
 222                 'uploader': None,
 223                 'upload_date': None,
 224             }
 225
 226             self.report_download_webpage(info['id'])
 227             rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
 228             try:
 229                 rootpage = compat_urllib_request.urlopen(rootURL).read()
 230             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 231                 raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
 232
 233             info['title'] = info['id']
 234
 235             links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
 236             info['list'] = [
 237                 {
 238                     'type': 'reference',
 239                     'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
 240                 }
 241                     for cpage in links]
 242
 243             results = []
 244             for entry in info['list']:
 245                 assert entry['type'] == 'reference'
 246                 results += self.extract(entry['url'])
 247             return results
 248
 249 class MTVIE(InfoExtractor):
 250     """Information extractor for MTV.com"""
 251
 252     _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
 253     IE_NAME = u'mtv'
 254
 255     def _real_extract(self, url):
 256         mobj = re.match(self._VALID_URL, url)
 257         if mobj is None:
 258             raise ExtractorError(u'Invalid URL: %s' % url)
 259         if not mobj.group('proto'):
 260             url = 'http://' + url
 261         video_id = mobj.group('videoid')
 262
 263         webpage = self._download_webpage(url, video_id)
 264
 265         song_name = self._html_search_regex(r'<meta name="mtv_vt" content="([^"]+)"/>',
 266             webpage, u'song name', fatal=False)
 267
 268         video_title = self._html_search_regex(r'<meta name="mtv_an" content="([^"]+)"/>',
 269             webpage, u'title')
 270
 271         mtvn_uri = self._html_search_regex(r'<meta name="mtvn_uri" content="([^"]+)"/>',
 272             webpage, u'mtvn_uri', fatal=False)
 273
 274         content_id = self._search_regex(r'MTVN.Player.defaultPlaylistId = ([0-9]+);',
 275             webpage, u'content id', fatal=False)
 276
 277         videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
 278         self.report_extraction(video_id)
 279         request = compat_urllib_request.Request(videogen_url)
 280         try:
 281             metadataXml = compat_urllib_request.urlopen(request).read()
 282         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 283             raise ExtractorError(u'Unable to download video metadata: %s' % compat_str(err))
 284
 285         mdoc = xml.etree.ElementTree.fromstring(metadataXml)
 286         renditions = mdoc.findall('.//rendition')
 287
 288         # For now, always pick the highest quality.
 289         rendition = renditions[-1]
 290
 291         try:
 292             _,_,ext = rendition.attrib['type'].partition('/')
 293             format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
 294             video_url = rendition.find('./src').text
 295         except KeyError:
 296             raise ExtractorError('Invalid rendition field.')
 297
 298         info = {
 299             'id': video_id,
 300             'url': video_url,
 301             'uploader': performer,
 302             'upload_date': None,
 303             'title': video_title,
 304             'ext': ext,
 305             'format': format,
 306         }
 307
 308         return [info]
 309
 310
 311 class YoukuIE(InfoExtractor):
 312     _VALID_URL =  r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
 313
 314     def _gen_sid(self):
 315         nowTime = int(time.time() * 1000)
 316         random1 = random.randint(1000,1998)
 317         random2 = random.randint(1000,9999)
 318
 319         return "%d%d%d" %(nowTime,random1,random2)
 320
 321     def _get_file_ID_mix_string(self, seed):
 322         mixed = []
 323         source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
 324         seed = float(seed)
 325         for i in range(len(source)):
 326             seed  =  (seed * 211 + 30031 ) % 65536
 327             index  =  math.floor(seed / 65536 * len(source) )
 328             mixed.append(source[int(index)])
 329             source.remove(source[int(index)])
 330         #return ''.join(mixed)
 331         return mixed
 332
 333     def _get_file_id(self, fileId, seed):
 334         mixed = self._get_file_ID_mix_string(seed)
 335         ids = fileId.split('*')
 336         realId = []
 337         for ch in ids:
 338             if ch:
 339                 realId.append(mixed[int(ch)])
 340         return ''.join(realId)
 341
 342     def _real_extract(self, url):
 343         mobj = re.match(self._VALID_URL, url)
 344         if mobj is None:
 345             raise ExtractorError(u'Invalid URL: %s' % url)
 346         video_id = mobj.group('ID')
 347
 348         info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
 349
 350         jsondata = self._download_webpage(info_url, video_id)
 351
 352         self.report_extraction(video_id)
 353         try:
 354             config = json.loads(jsondata)
 355
 356             video_title =  config['data'][0]['title']
 357             seed = config['data'][0]['seed']
 358
 359             format = self._downloader.params.get('format', None)
 360             supported_format = list(config['data'][0]['streamfileids'].keys())
 361
 362             if format is None or format == 'best':
 363                 if 'hd2' in supported_format:
 364                     format = 'hd2'
 365                 else:
 366                     format = 'flv'
 367                 ext = u'flv'
 368             elif format == 'worst':
 369                 format = 'mp4'
 370                 ext = u'mp4'
 371             else:
 372                 format = 'flv'
 373                 ext = u'flv'
 374
 375
 376             fileid = config['data'][0]['streamfileids'][format]
 377             keys = [s['k'] for s in config['data'][0]['segs'][format]]
 378         except (UnicodeDecodeError, ValueError, KeyError):
 379             raise ExtractorError(u'Unable to extract info section')
 380
 381         files_info=[]
 382         sid = self._gen_sid()
 383         fileid = self._get_file_id(fileid, seed)
 384
 385         #column 8,9 of fileid represent the segment number
 386         #fileid[7:9] should be changed
 387         for index, key in enumerate(keys):
 388
 389             temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
 390             download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
 391
 392             info = {
 393                 'id': '%s_part%02d' % (video_id, index),
 394                 'url': download_url,
 395                 'uploader': None,
 396                 'upload_date': None,
 397                 'title': video_title,
 398                 'ext': ext,
 399             }
 400             files_info.append(info)
 401
 402         return files_info
 403
 404
 405 class XNXXIE(InfoExtractor):
 406     """Information extractor for xnxx.com"""
 407
 408     _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
 409     IE_NAME = u'xnxx'
 410     VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
 411     VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
 412     VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
 413
 414     def _real_extract(self, url):
 415         mobj = re.match(self._VALID_URL, url)
 416         if mobj is None:
 417             raise ExtractorError(u'Invalid URL: %s' % url)
 418         video_id = mobj.group(1)
 419
 420         # Get webpage content
 421         webpage = self._download_webpage(url, video_id)
 422
 423         video_url = self._search_regex(self.VIDEO_URL_RE,
 424             webpage, u'video URL')
 425         video_url = compat_urllib_parse.unquote(video_url)
 426
 427         video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
 428             webpage, u'title')
 429
 430         video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
 431             webpage, u'thumbnail', fatal=False)
 432
 433         return [{
 434             'id': video_id,
 435             'url': video_url,
 436             'uploader': None,
 437             'upload_date': None,
 438             'title': video_title,
 439             'ext': 'flv',
 440             'thumbnail': video_thumbnail,
 441             'description': None,
 442         }]
 443
 444
 445
 446 class NBAIE(InfoExtractor):
 447     _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
 448     IE_NAME = u'nba'
 449
 450     def _real_extract(self, url):
 451         mobj = re.match(self._VALID_URL, url)
 452         if mobj is None:
 453             raise ExtractorError(u'Invalid URL: %s' % url)
 454
 455         video_id = mobj.group(1)
 456
 457         webpage = self._download_webpage(url, video_id)
 458
 459         video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
 460
 461         shortened_video_id = video_id.rpartition('/')[2]
 462         title = self._html_search_regex(r'<meta property="og:title" content="(.*?)"',
 463             webpage, 'title', default=shortened_video_id).replace('NBA.com: ', '')
 464
 465         # It isn't there in the HTML it returns to us
 466         # uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
 467
 468         description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
 469
 470         info = {
 471             'id': shortened_video_id,
 472             'url': video_url,
 473             'ext': 'mp4',
 474             'title': title,
 475             # 'uploader_date': uploader_date,
 476             'description': description,
 477         }
 478         return [info]
 479
 480 class JustinTVIE(InfoExtractor):
 481     """Information extractor for justin.tv and twitch.tv"""
 482     # TODO: One broadcast may be split into multiple videos. The key
 483     # 'broadcast_id' is the same for all parts, and 'broadcast_part'
 484     # starts at 1 and increases. Can we treat all parts as one video?
 485
 486     _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
 487         (?:
 488             (?P<channelid>[^/]+)|
 489             (?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
 490             (?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
 491         )
 492         /?(?:\#.*)?$
 493         """
 494     _JUSTIN_PAGE_LIMIT = 100
 495     IE_NAME = u'justin.tv'
 496
 497     def report_download_page(self, channel, offset):
 498         """Report attempt to download a single page of videos."""
 499         self.to_screen(u'%s: Downloading video information from %d to %d' %
 500                 (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
 501
 502     # Return count of items, list of *valid* items
 503     def _parse_page(self, url, video_id):
 504         webpage = self._download_webpage(url, video_id,
 505                                          u'Downloading video info JSON',
 506                                          u'unable to download video info JSON')
 507
 508         response = json.loads(webpage)
 509         if type(response) != list:
 510             error_text = response.get('error', 'unknown error')
 511             raise ExtractorError(u'Justin.tv API: %s' % error_text)
 512         info = []
 513         for clip in response:
 514             video_url = clip['video_file_url']
 515             if video_url:
 516                 video_extension = os.path.splitext(video_url)[1][1:]
 517                 video_date = re.sub('-', '', clip['start_time'][:10])
 518                 video_uploader_id = clip.get('user_id', clip.get('channel_id'))
 519                 video_id = clip['id']
 520                 video_title = clip.get('title', video_id)
 521                 info.append({
 522                     'id': video_id,
 523                     'url': video_url,
 524                     'title': video_title,
 525                     'uploader': clip.get('channel_name', video_uploader_id),
 526                     'uploader_id': video_uploader_id,
 527                     'upload_date': video_date,
 528                     'ext': video_extension,
 529                 })
 530         return (len(response), info)
 531
 532     def _real_extract(self, url):
 533         mobj = re.match(self._VALID_URL, url)
 534         if mobj is None:
 535             raise ExtractorError(u'invalid URL: %s' % url)
 536
 537         api_base = 'http://api.justin.tv'
 538         paged = False
 539         if mobj.group('channelid'):
 540             paged = True
 541             video_id = mobj.group('channelid')
 542             api = api_base + '/channel/archives/%s.json' % video_id
 543         elif mobj.group('chapterid'):
 544             chapter_id = mobj.group('chapterid')
 545
 546             webpage = self._download_webpage(url, chapter_id)
 547             m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
 548             if not m:
 549                 raise ExtractorError(u'Cannot find archive of a chapter')
 550             archive_id = m.group(1)
 551
 552             api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
 553             chapter_info_xml = self._download_webpage(api, chapter_id,
 554                                              note=u'Downloading chapter information',
 555                                              errnote=u'Chapter information download failed')
 556             doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
 557             for a in doc.findall('.//archive'):
 558                 if archive_id == a.find('./id').text:
 559                     break
 560             else:
 561                 raise ExtractorError(u'Could not find chapter in chapter information')
 562
 563             video_url = a.find('./video_file_url').text
 564             video_ext = video_url.rpartition('.')[2] or u'flv'
 565
 566             chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
 567             chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
 568                                    note='Downloading chapter metadata',
 569                                    errnote='Download of chapter metadata failed')
 570             chapter_info = json.loads(chapter_info_json)
 571
 572             bracket_start = int(doc.find('.//bracket_start').text)
 573             bracket_end = int(doc.find('.//bracket_end').text)
 574
 575             # TODO determine start (and probably fix up file)
 576             #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
 577             #video_url += u'?start=' + TODO:start_timestamp
 578             # bracket_start is 13290, but we want 51670615
 579             self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
 580                                             u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
 581
 582             info = {
 583                 'id': u'c' + chapter_id,
 584                 'url': video_url,
 585                 'ext': video_ext,
 586                 'title': chapter_info['title'],
 587                 'thumbnail': chapter_info['preview'],
 588                 'description': chapter_info['description'],
 589                 'uploader': chapter_info['channel']['display_name'],
 590                 'uploader_id': chapter_info['channel']['name'],
 591             }
 592             return [info]
 593         else:
 594             video_id = mobj.group('videoid')
 595             api = api_base + '/broadcast/by_archive/%s.json' % video_id
 596
 597         self.report_extraction(video_id)
 598
 599         info = []
 600         offset = 0
 601         limit = self._JUSTIN_PAGE_LIMIT
 602         while True:
 603             if paged:
 604                 self.report_download_page(video_id, offset)
 605             page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
 606             page_count, page_info = self._parse_page(page_url, video_id)
 607             info.extend(page_info)
 608             if not paged or page_count != limit:
 609                 break
 610             offset += limit
 611         return info
 612
 613 class FunnyOrDieIE(InfoExtractor):
 614     _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
 615
 616     def _real_extract(self, url):
 617         mobj = re.match(self._VALID_URL, url)
 618         if mobj is None:
 619             raise ExtractorError(u'invalid URL: %s' % url)
 620
 621         video_id = mobj.group('id')
 622         webpage = self._download_webpage(url, video_id)
 623
 624         video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
 625             webpage, u'video URL', flags=re.DOTALL)
 626
 627         title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
 628             r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
 629
 630         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
 631             webpage, u'description', fatal=False, flags=re.DOTALL)
 632
 633         info = {
 634             'id': video_id,
 635             'url': video_url,
 636             'ext': 'mp4',
 637             'title': title,
 638             'description': video_description,
 639         }
 640         return [info]
 641
 642 class SteamIE(InfoExtractor):
 643     _VALID_URL = r"""http://store\.steampowered\.com/
 644                 (agecheck/)?
 645                 (?P<urltype>video|app)/ #If the page is only for videos or for a game
 646                 (?P<gameID>\d+)/?
 647                 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
 648                 """
 649     _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
 650     _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
 651
 652     @classmethod
 653     def suitable(cls, url):
 654         """Receives a URL and returns True if suitable for this IE."""
 655         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 656
 657     def _real_extract(self, url):
 658         m = re.match(self._VALID_URL, url, re.VERBOSE)
 659         gameID = m.group('gameID')
 660
 661         videourl = self._VIDEO_PAGE_TEMPLATE % gameID
 662         webpage = self._download_webpage(videourl, gameID)
 663
 664         if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
 665             videourl = self._AGECHECK_TEMPLATE % gameID
 666             self.report_age_confirmation()
 667             webpage = self._download_webpage(videourl, gameID)
 668
 669         self.report_extraction(gameID)
 670         game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
 671                                              webpage, 'game title')
 672
 673         urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
 674         mweb = re.finditer(urlRE, webpage)
 675         namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
 676         titles = re.finditer(namesRE, webpage)
 677         thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
 678         thumbs = re.finditer(thumbsRE, webpage)
 679         videos = []
 680         for vid,vtitle,thumb in zip(mweb,titles,thumbs):
 681             video_id = vid.group('videoID')
 682             title = vtitle.group('videoName')
 683             video_url = vid.group('videoURL')
 684             video_thumb = thumb.group('thumbnail')
 685             if not video_url:
 686                 raise ExtractorError(u'Cannot find video url for %s' % video_id)
 687             info = {
 688                 'id':video_id,
 689                 'url':video_url,
 690                 'ext': 'flv',
 691                 'title': unescapeHTML(title),
 692                 'thumbnail': video_thumb
 693                   }
 694             videos.append(info)
 695         return [self.playlist_result(videos, gameID, game_title)]
 696
 697 class UstreamIE(InfoExtractor):
 698     _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
 699     IE_NAME = u'ustream'
 700
 701     def _real_extract(self, url):
 702         m = re.match(self._VALID_URL, url)
 703         video_id = m.group('videoID')
 704
 705         video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
 706         webpage = self._download_webpage(url, video_id)
 707
 708         self.report_extraction(video_id)
 709
 710         video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
 711             webpage, u'title')
 712
 713         uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
 714             webpage, u'uploader', fatal=False, flags=re.DOTALL)
 715
 716         thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
 717             webpage, u'thumbnail', fatal=False)
 718
 719         info = {
 720                 'id': video_id,
 721                 'url': video_url,
 722                 'ext': 'flv',
 723                 'title': video_title,
 724                 'uploader': uploader,
 725                 'thumbnail': thumbnail,
 726                }
 727         return info
 728
 729 class WorldStarHipHopIE(InfoExtractor):
 730     _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P<id>.*)'
 731     IE_NAME = u'WorldStarHipHop'
 732
 733     def _real_extract(self, url):
 734         m = re.match(self._VALID_URL, url)
 735         video_id = m.group('id')
 736
 737         webpage_src = self._download_webpage(url, video_id)
 738
 739         video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
 740             webpage_src, u'video URL')
 741
 742         if 'mp4' in video_url:
 743             ext = 'mp4'
 744         else:
 745             ext = 'flv'
 746
 747         video_title = self._html_search_regex(r"<title>(.*)</title>",
 748             webpage_src, u'title')
 749
 750         # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
 751         thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />',
 752             webpage_src, u'thumbnail', fatal=False)
 753
 754         if not thumbnail:
 755             _title = r"""candytitles.*>(.*)</span>"""
 756             mobj = re.search(_title, webpage_src)
 757             if mobj is not None:
 758                 video_title = mobj.group(1)
 759
 760         results = [{
 761                     'id': video_id,
 762                     'url' : video_url,
 763                     'title' : video_title,
 764                     'thumbnail' : thumbnail,
 765                     'ext' : ext,
 766                     }]
 767         return results
 768
 769 class RBMARadioIE(InfoExtractor):
 770     _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
 771
 772     def _real_extract(self, url):
 773         m = re.match(self._VALID_URL, url)
 774         video_id = m.group('videoID')
 775
 776         webpage = self._download_webpage(url, video_id)
 777
 778         json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
 779             webpage, u'json data', flags=re.MULTILINE)
 780
 781         try:
 782             data = json.loads(json_data)
 783         except ValueError as e:
 784             raise ExtractorError(u'Invalid JSON: ' + str(e))
 785
 786         video_url = data['akamai_url'] + '&cbr=256'
 787         url_parts = compat_urllib_parse_urlparse(video_url)
 788         video_ext = url_parts.path.rpartition('.')[2]
 789         info = {
 790                 'id': video_id,
 791                 'url': video_url,
 792                 'ext': video_ext,
 793                 'title': data['title'],
 794                 'description': data.get('teaser_text'),
 795                 'location': data.get('country_of_origin'),
 796                 'uploader': data.get('host', {}).get('name'),
 797                 'uploader_id': data.get('host', {}).get('slug'),
 798                 'thumbnail': data.get('image', {}).get('large_url_2x'),
 799                 'duration': data.get('duration'),
 800         }
 801         return [info]
 802
 803
 804 class YouPornIE(InfoExtractor):
 805     """Information extractor for youporn.com."""
 806     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
 807
 808     def _print_formats(self, formats):
 809         """Print all available formats"""
 810         print(u'Available formats:')
 811         print(u'ext\t\tformat')
 812         print(u'---------------------------------')
 813         for format in formats:
 814             print(u'%s\t\t%s'  % (format['ext'], format['format']))
 815
 816     def _specific(self, req_format, formats):
 817         for x in formats:
 818             if(x["format"]==req_format):
 819                 return x
 820         return None
 821
 822     def _real_extract(self, url):
 823         mobj = re.match(self._VALID_URL, url)
 824         if mobj is None:
 825             raise ExtractorError(u'Invalid URL: %s' % url)
 826         video_id = mobj.group('videoid')
 827
 828         req = compat_urllib_request.Request(url)
 829         req.add_header('Cookie', 'age_verified=1')
 830         webpage = self._download_webpage(req, video_id)
 831
 832         # Get JSON parameters
 833         json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
 834         try:
 835             params = json.loads(json_params)
 836         except:
 837             raise ExtractorError(u'Invalid JSON')
 838
 839         self.report_extraction(video_id)
 840         try:
 841             video_title = params['title']
 842             upload_date = unified_strdate(params['release_date_f'])
 843             video_description = params['description']
 844             video_uploader = params['submitted_by']
 845             thumbnail = params['thumbnails'][0]['image']
 846         except KeyError:
 847             raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
 848
 849         # Get all of the formats available
 850         DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
 851         download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
 852             webpage, u'download list').strip()
 853
 854         # Get all of the links from the page
 855         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
 856         links = re.findall(LINK_RE, download_list_html)
 857         if(len(links) == 0):
 858             raise ExtractorError(u'ERROR: no known formats available for video')
 859
 860         self.to_screen(u'Links found: %d' % len(links))
 861
 862         formats = []
 863         for link in links:
 864
 865             # A link looks like this:
 866             # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
 867             # A path looks like this:
 868             # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
 869             video_url = unescapeHTML( link )
 870             path = compat_urllib_parse_urlparse( video_url ).path
 871             extension = os.path.splitext( path )[1][1:]
 872             format = path.split('/')[4].split('_')[:2]
 873             size = format[0]
 874             bitrate = format[1]
 875             format = "-".join( format )
 876             # title = u'%s-%s-%s' % (video_title, size, bitrate)
 877
 878             formats.append({
 879                 'id': video_id,
 880                 'url': video_url,
 881                 'uploader': video_uploader,
 882                 'upload_date': upload_date,
 883                 'title': video_title,
 884                 'ext': extension,
 885                 'format': format,
 886                 'thumbnail': thumbnail,
 887                 'description': video_description
 888             })
 889
 890         if self._downloader.params.get('listformats', None):
 891             self._print_formats(formats)
 892             return
 893
 894         req_format = self._downloader.params.get('format', None)
 895         self.to_screen(u'Format: %s' % req_format)
 896
 897         if req_format is None or req_format == 'best':
 898             return [formats[0]]
 899         elif req_format == 'worst':
 900             return [formats[-1]]
 901         elif req_format in ('-1', 'all'):
 902             return formats
 903         else:
 904             format = self._specific( req_format, formats )
 905             if result is None:
 906                 raise ExtractorError(u'Requested format not available')
 907             return [format]
 908
 909
 910
 911 class PornotubeIE(InfoExtractor):
 912     """Information extractor for pornotube.com."""
 913     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
 914
 915     def _real_extract(self, url):
 916         mobj = re.match(self._VALID_URL, url)
 917         if mobj is None:
 918             raise ExtractorError(u'Invalid URL: %s' % url)
 919
 920         video_id = mobj.group('videoid')
 921         video_title = mobj.group('title')
 922
 923         # Get webpage content
 924         webpage = self._download_webpage(url, video_id)
 925
 926         # Get the video URL
 927         VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
 928         video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
 929         video_url = compat_urllib_parse.unquote(video_url)
 930
 931         #Get the uploaded date
 932         VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
 933         upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
 934         if upload_date: upload_date = unified_strdate(upload_date)
 935
 936         info = {'id': video_id,
 937                 'url': video_url,
 938                 'uploader': None,
 939                 'upload_date': upload_date,
 940                 'title': video_title,
 941                 'ext': 'flv',
 942                 'format': 'flv'}
 943
 944         return [info]
 945
 946 class YouJizzIE(InfoExtractor):
 947     """Information extractor for youjizz.com."""
 948     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
 949
 950     def _real_extract(self, url):
 951         mobj = re.match(self._VALID_URL, url)
 952         if mobj is None:
 953             raise ExtractorError(u'Invalid URL: %s' % url)
 954
 955         video_id = mobj.group('videoid')
 956
 957         # Get webpage content
 958         webpage = self._download_webpage(url, video_id)
 959
 960         # Get the video title
 961         video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
 962             webpage, u'title').strip()
 963
 964         # Get the embed page
 965         result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
 966         if result is None:
 967             raise ExtractorError(u'ERROR: unable to extract embed page')
 968
 969         embed_page_url = result.group(0).strip()
 970         video_id = result.group('videoid')
 971
 972         webpage = self._download_webpage(embed_page_url, video_id)
 973
 974         # Get the video URL
 975         video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);',
 976             webpage, u'video URL')
 977
 978         info = {'id': video_id,
 979                 'url': video_url,
 980                 'title': video_title,
 981                 'ext': 'flv',
 982                 'format': 'flv',
 983                 'player_url': embed_page_url}
 984
 985         return [info]
 986
 987 class EightTracksIE(InfoExtractor):
 988     IE_NAME = '8tracks'
 989     _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
 990
 991     def _real_extract(self, url):
 992         mobj = re.match(self._VALID_URL, url)
 993         if mobj is None:
 994             raise ExtractorError(u'Invalid URL: %s' % url)
 995         playlist_id = mobj.group('id')
 996
 997         webpage = self._download_webpage(url, playlist_id)
 998
 999         json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
1000         data = json.loads(json_like)
1001
1002         session = str(random.randint(0, 1000000000))
1003         mix_id = data['id']
1004         track_count = data['tracks_count']
1005         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
1006         next_url = first_url
1007         res = []
1008         for i in itertools.count():
1009             api_json = self._download_webpage(next_url, playlist_id,
1010                 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
1011                 errnote=u'Failed to download song information')
1012             api_data = json.loads(api_json)
1013             track_data = api_data[u'set']['track']
1014             info = {
1015                 'id': track_data['id'],
1016                 'url': track_data['track_file_stream_url'],
1017                 'title': track_data['performer'] + u' - ' + track_data['name'],
1018                 'raw_title': track_data['name'],
1019                 'uploader_id': data['user']['login'],
1020                 'ext': 'm4a',
1021             }
1022             res.append(info)
1023             if api_data['set']['at_last_track']:
1024                 break
1025             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
1026         return res
1027
1028 class KeekIE(InfoExtractor):
1029     _VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
1030     IE_NAME = u'keek'
1031
1032     def _real_extract(self, url):
1033         m = re.match(self._VALID_URL, url)
1034         video_id = m.group('videoID')
1035
1036         video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
1037         thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
1038         webpage = self._download_webpage(url, video_id)
1039
1040         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
1041             webpage, u'title')
1042
1043         uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
1044             webpage, u'uploader', fatal=False)
1045
1046         info = {
1047                 'id': video_id,
1048                 'url': video_url,
1049                 'ext': 'mp4',
1050                 'title': video_title,
1051                 'thumbnail': thumbnail,
1052                 'uploader': uploader
1053         }
1054         return [info]
1055
1056 class TEDIE(InfoExtractor):
1057     _VALID_URL=r'''http://www\.ted\.com/
1058                    (
1059                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
1060                         |
1061                         ((?P<type_talk>talks)) # We have a simple talk
1062                    )
1063                    (/lang/(.*?))? # The url may contain the language
1064                    /(?P<name>\w+) # Here goes the name and then ".html"
1065                    '''
1066
1067     @classmethod
1068     def suitable(cls, url):
1069         """Receives a URL and returns True if suitable for this IE."""
1070         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
1071
1072     def _real_extract(self, url):
1073         m=re.match(self._VALID_URL, url, re.VERBOSE)
1074         if m.group('type_talk'):
1075             return [self._talk_info(url)]
1076         else :
1077             playlist_id=m.group('playlist_id')
1078             name=m.group('name')
1079             self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
1080             return [self._playlist_videos_info(url,name,playlist_id)]
1081
1082     def _playlist_videos_info(self,url,name,playlist_id=0):
1083         '''Returns the videos of the playlist'''
1084         video_RE=r'''
1085                      <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
1086                      ([.\s]*?)data-playlist_item_id="(\d+)"
1087                      ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
1088                      '''
1089         video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
1090         webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
1091         m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
1092         m_names=re.finditer(video_name_RE,webpage)
1093
1094         playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
1095                                                  webpage, 'playlist title')
1096
1097         playlist_entries = []
1098         for m_video, m_name in zip(m_videos,m_names):
1099             video_id=m_video.group('video_id')
1100             talk_url='http://www.ted.com%s' % m_name.group('talk_url')
1101             playlist_entries.append(self.url_result(talk_url, 'TED'))
1102         return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
1103
1104     def _talk_info(self, url, video_id=0):
1105         """Return the video for the talk in the url"""
1106         m = re.match(self._VALID_URL, url,re.VERBOSE)
1107         video_name = m.group('name')
1108         webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
1109         self.report_extraction(video_name)
1110         # If the url includes the language we get the title translated
1111         title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
1112                                         webpage, 'title')
1113         json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
1114                                     webpage, 'json data')
1115         info = json.loads(json_data)
1116         desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
1117                                        webpage, 'description', flags = re.DOTALL)
1118
1119         thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
1120                                        webpage, 'thumbnail')
1121         info = {
1122                 'id': info['id'],
1123                 'url': info['htmlStreams'][-1]['file'],
1124                 'ext': 'mp4',
1125                 'title': title,
1126                 'thumbnail': thumbnail,
1127                 'description': desc,
1128                 }
1129         return info
1130
1131 class MySpassIE(InfoExtractor):
1132     _VALID_URL = r'http://www.myspass.de/.*'
1133
1134     def _real_extract(self, url):
1135         META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
1136
1137         # video id is the last path element of the URL
1138         # usually there is a trailing slash, so also try the second but last
1139         url_path = compat_urllib_parse_urlparse(url).path
1140         url_parent_path, video_id = os.path.split(url_path)
1141         if not video_id:
1142             _, video_id = os.path.split(url_parent_path)
1143
1144         # get metadata
1145         metadata_url = META_DATA_URL_TEMPLATE % video_id
1146         metadata_text = self._download_webpage(metadata_url, video_id)
1147         metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
1148
1149         # extract values from metadata
1150         url_flv_el = metadata.find('url_flv')
1151         if url_flv_el is None:
1152             raise ExtractorError(u'Unable to extract download url')
1153         video_url = url_flv_el.text
1154         extension = os.path.splitext(video_url)[1][1:]
1155         title_el = metadata.find('title')
1156         if title_el is None:
1157             raise ExtractorError(u'Unable to extract title')
1158         title = title_el.text
1159         format_id_el = metadata.find('format_id')
1160         if format_id_el is None:
1161             format = ext
1162         else:
1163             format = format_id_el.text
1164         description_el = metadata.find('description')
1165         if description_el is not None:
1166             description = description_el.text
1167         else:
1168             description = None
1169         imagePreview_el = metadata.find('imagePreview')
1170         if imagePreview_el is not None:
1171             thumbnail = imagePreview_el.text
1172         else:
1173             thumbnail = None
1174         info = {
1175             'id': video_id,
1176             'url': video_url,
1177             'title': title,
1178             'ext': extension,
1179             'format': format,
1180             'thumbnail': thumbnail,
1181             'description': description
1182         }
1183         return [info]
1184
1185 class SpiegelIE(InfoExtractor):
1186     _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
1187
1188     def _real_extract(self, url):
1189         m = re.match(self._VALID_URL, url)
1190         video_id = m.group('videoID')
1191
1192         webpage = self._download_webpage(url, video_id)
1193
1194         video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
1195             webpage, u'title')
1196
1197         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
1198         xml_code = self._download_webpage(xml_url, video_id,
1199                     note=u'Downloading XML', errnote=u'Failed to download XML')
1200
1201         idoc = xml.etree.ElementTree.fromstring(xml_code)
1202         last_type = idoc[-1]
1203         filename = last_type.findall('./filename')[0].text
1204         duration = float(last_type.findall('./duration')[0].text)
1205
1206         video_url = 'http://video2.spiegel.de/flash/' + filename
1207         video_ext = filename.rpartition('.')[2]
1208         info = {
1209             'id': video_id,
1210             'url': video_url,
1211             'ext': video_ext,
1212             'title': video_title,
1213             'duration': duration,
1214         }
1215         return [info]
1216
1217 class LiveLeakIE(InfoExtractor):
1218
1219     _VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
1220     IE_NAME = u'liveleak'
1221
1222     def _real_extract(self, url):
1223         mobj = re.match(self._VALID_URL, url)
1224         if mobj is None:
1225             raise ExtractorError(u'Invalid URL: %s' % url)
1226
1227         video_id = mobj.group('video_id')
1228
1229         webpage = self._download_webpage(url, video_id)
1230
1231         video_url = self._search_regex(r'file: "(.*?)",',
1232             webpage, u'video URL')
1233
1234         video_title = self._html_search_regex(r'<meta property="og:title" content="(?P<title>.*?)"',
1235             webpage, u'title').replace('LiveLeak.com -', '').strip()
1236
1237         video_description = self._html_search_regex(r'<meta property="og:description" content="(?P<desc>.*?)"',
1238             webpage, u'description', fatal=False)
1239
1240         video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
1241             webpage, u'uploader', fatal=False)
1242
1243         info = {
1244             'id':  video_id,
1245             'url': video_url,
1246             'ext': 'mp4',
1247             'title': video_title,
1248             'description': video_description,
1249             'uploader': video_uploader
1250         }
1251
1252         return [info]
1253
1254
1255
1256 class TumblrIE(InfoExtractor):
1257     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
1258
1259     def _real_extract(self, url):
1260         m_url = re.match(self._VALID_URL, url)
1261         video_id = m_url.group('id')
1262         blog = m_url.group('blog_name')
1263
1264         url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
1265         webpage = self._download_webpage(url, video_id)
1266
1267         re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
1268         video = re.search(re_video, webpage)
1269         if video is None:
1270            raise ExtractorError(u'Unable to extract video')
1271         video_url = video.group('video_url')
1272         ext = video.group('ext')
1273
1274         video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
1275             webpage, u'thumbnail', fatal=False)  # We pick the first poster
1276         if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
1277
1278         # The only place where you can get a title, it's not complete,
1279         # but searching in other places doesn't work for all videos
1280         video_title = self._html_search_regex(r'<title>(?P<title>.*?)</title>',
1281             webpage, u'title', flags=re.DOTALL)
1282
1283         return [{'id': video_id,
1284                  'url': video_url,
1285                  'title': video_title,
1286                  'thumbnail': video_thumbnail,
1287                  'ext': ext
1288                  }]
1289
1290 class BandcampIE(InfoExtractor):
1291     _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
1292
1293     def _real_extract(self, url):
1294         mobj = re.match(self._VALID_URL, url)
1295         title = mobj.group('title')
1296         webpage = self._download_webpage(url, title)
1297         # We get the link to the free download page
1298         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
1299         if m_download is None:
1300             raise ExtractorError(u'No free songs found')
1301
1302         download_link = m_download.group(1)
1303         id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
1304                        webpage, re.MULTILINE|re.DOTALL).group('id')
1305
1306         download_webpage = self._download_webpage(download_link, id,
1307                                                   'Downloading free downloads page')
1308         # We get the dictionary of the track from some javascrip code
1309         info = re.search(r'items: (.*?),$',
1310                          download_webpage, re.MULTILINE).group(1)
1311         info = json.loads(info)[0]
1312         # We pick mp3-320 for now, until format selection can be easily implemented.
1313         mp3_info = info[u'downloads'][u'mp3-320']
1314         # If we try to use this url it says the link has expired
1315         initial_url = mp3_info[u'url']
1316         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
1317         m_url = re.match(re_url, initial_url)
1318         #We build the url we will use to get the final track url
1319         # This url is build in Bandcamp in the script download_bunde_*.js
1320         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
1321         final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
1322         # If we could correctly generate the .rand field the url would be
1323         #in the "download_url" key
1324         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
1325
1326         track_info = {'id':id,
1327                       'title' : info[u'title'],
1328                       'ext' :   'mp3',
1329                       'url' :   final_url,
1330                       'thumbnail' : info[u'thumb_url'],
1331                       'uploader' :  info[u'artist']
1332                       }
1333
1334         return [track_info]
1335
1336 class RedTubeIE(InfoExtractor):
1337     """Information Extractor for redtube"""
1338     _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
1339
1340     def _real_extract(self,url):
1341         mobj = re.match(self._VALID_URL, url)
1342         if mobj is None:
1343             raise ExtractorError(u'Invalid URL: %s' % url)
1344
1345         video_id = mobj.group('id')
1346         video_extension = 'mp4'
1347         webpage = self._download_webpage(url, video_id)
1348
1349         self.report_extraction(video_id)
1350
1351         video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
1352             webpage, u'video URL')
1353
1354         video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
1355             webpage, u'title')
1356
1357         return [{
1358             'id':       video_id,
1359             'url':      video_url,
1360             'ext':      video_extension,
1361             'title':    video_title,
1362         }]
1363
1364 class InaIE(InfoExtractor):
1365     """Information Extractor for Ina.fr"""
1366     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
1367
1368     def _real_extract(self,url):
1369         mobj = re.match(self._VALID_URL, url)
1370
1371         video_id = mobj.group('id')
1372         mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
1373         video_extension = 'mp4'
1374         webpage = self._download_webpage(mrss_url, video_id)
1375
1376         self.report_extraction(video_id)
1377
1378         video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
1379             webpage, u'video URL')
1380
1381         video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
1382             webpage, u'title')
1383
1384         return [{
1385             'id':       video_id,
1386             'url':      video_url,
1387             'ext':      video_extension,
1388             'title':    video_title,
1389         }]
1390
1391 class HowcastIE(InfoExtractor):
1392     """Information Extractor for Howcast.com"""
1393     _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
1394
1395     def _real_extract(self, url):
1396         mobj = re.match(self._VALID_URL, url)
1397
1398         video_id = mobj.group('id')
1399         webpage_url = 'http://www.howcast.com/videos/' + video_id
1400         webpage = self._download_webpage(webpage_url, video_id)
1401
1402         self.report_extraction(video_id)
1403
1404         video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
1405             webpage, u'video URL')
1406
1407         video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
1408             webpage, u'title')
1409
1410         video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
1411             webpage, u'description', fatal=False)
1412
1413         thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
1414             webpage, u'thumbnail', fatal=False)
1415
1416         return [{
1417             'id':       video_id,
1418             'url':      video_url,
1419             'ext':      'mp4',
1420             'title':    video_title,
1421             'description': video_description,
1422             'thumbnail': thumbnail,
1423         }]
1424
1425 class VineIE(InfoExtractor):
1426     """Information Extractor for Vine.co"""
1427     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
1428
1429     def _real_extract(self, url):
1430         mobj = re.match(self._VALID_URL, url)
1431
1432         video_id = mobj.group('id')
1433         webpage_url = 'https://vine.co/v/' + video_id
1434         webpage = self._download_webpage(webpage_url, video_id)
1435
1436         self.report_extraction(video_id)
1437
1438         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
1439             webpage, u'video URL')
1440
1441         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
1442             webpage, u'title')
1443
1444         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)(\?.*?)?"',
1445             webpage, u'thumbnail', fatal=False)
1446
1447         uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
1448             webpage, u'uploader', fatal=False, flags=re.DOTALL)
1449
1450         return [{
1451             'id':        video_id,
1452             'url':       video_url,
1453             'ext':       'mp4',
1454             'title':     video_title,
1455             'thumbnail': thumbnail,
1456             'uploader':  uploader,
1457         }]
1458
1459 class FlickrIE(InfoExtractor):
1460     """Information Extractor for Flickr videos"""
1461     _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
1462
1463     def _real_extract(self, url):
1464         mobj = re.match(self._VALID_URL, url)
1465
1466         video_id = mobj.group('id')
1467         video_uploader_id = mobj.group('uploader_id')
1468         webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
1469         webpage = self._download_webpage(webpage_url, video_id)
1470
1471         secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
1472
1473         first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
1474         first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
1475
1476         node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
1477             first_xml, u'node_id')
1478
1479         second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
1480         second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
1481
1482         self.report_extraction(video_id)
1483
1484         mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
1485         if mobj is None:
1486             raise ExtractorError(u'Unable to extract video url')
1487         video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
1488
1489         video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
1490             webpage, u'video title')
1491
1492         video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
1493             webpage, u'description', fatal=False)
1494
1495         thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
1496             webpage, u'thumbnail', fatal=False)
1497
1498         return [{
1499             'id':          video_id,
1500             'url':         video_url,
1501             'ext':         'mp4',
1502             'title':       video_title,
1503             'description': video_description,
1504             'thumbnail':   thumbnail,
1505             'uploader_id': video_uploader_id,
1506         }]
1507
1508 class TeamcocoIE(InfoExtractor):
1509     _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
1510
1511     def _real_extract(self, url):
1512         mobj = re.match(self._VALID_URL, url)
1513         if mobj is None:
1514             raise ExtractorError(u'Invalid URL: %s' % url)
1515         url_title = mobj.group('url_title')
1516         webpage = self._download_webpage(url, url_title)
1517
1518         video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
1519             webpage, u'video id')
1520
1521         self.report_extraction(video_id)
1522
1523         video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
1524             webpage, u'title')
1525
1526         thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
1527             webpage, u'thumbnail', fatal=False)
1528
1529         video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
1530             webpage, u'description', fatal=False)
1531
1532         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
1533         data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
1534
1535         video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
1536             data, u'video URL')
1537
1538         return [{
1539             'id':          video_id,
1540             'url':         video_url,
1541             'ext':         'mp4',
1542             'title':       video_title,
1543             'thumbnail':   thumbnail,
1544             'description': video_description,
1545         }]
1546
1547 class XHamsterIE(InfoExtractor):
1548     """Information Extractor for xHamster"""
1549     _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
1550
1551     def _real_extract(self,url):
1552         mobj = re.match(self._VALID_URL, url)
1553
1554         video_id = mobj.group('id')
1555         mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
1556         webpage = self._download_webpage(mrss_url, video_id)
1557
1558         mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
1559         if mobj is None:
1560             raise ExtractorError(u'Unable to extract media URL')
1561         if len(mobj.group('server')) == 0:
1562             video_url = compat_urllib_parse.unquote(mobj.group('file'))
1563         else:
1564             video_url = mobj.group('server')+'/key='+mobj.group('file')
1565         video_extension = video_url.split('.')[-1]
1566
1567         video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
1568             webpage, u'title')
1569
1570         # Can't see the description anywhere in the UI
1571         # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
1572         #     webpage, u'description', fatal=False)
1573         # if video_description: video_description = unescapeHTML(video_description)
1574
1575         mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
1576         if mobj:
1577             video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
1578         else:
1579             video_upload_date = None
1580             self._downloader.report_warning(u'Unable to extract upload date')
1581
1582         video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
1583             webpage, u'uploader id', default=u'anonymous')
1584
1585         video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
1586             webpage, u'thumbnail', fatal=False)
1587
1588         return [{
1589             'id':       video_id,
1590             'url':      video_url,
1591             'ext':      video_extension,
1592             'title':    video_title,
1593             # 'description': video_description,
1594             'upload_date': video_upload_date,
1595             'uploader_id': video_uploader_id,
1596             'thumbnail': video_thumbnail
1597         }]
1598
1599 class HypemIE(InfoExtractor):
1600     """Information Extractor for hypem"""
1601     _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
1602
1603     def _real_extract(self, url):
1604         mobj = re.match(self._VALID_URL, url)
1605         if mobj is None:
1606             raise ExtractorError(u'Invalid URL: %s' % url)
1607         track_id = mobj.group(1)
1608
1609         data = { 'ax': 1, 'ts': time.time() }
1610         data_encoded = compat_urllib_parse.urlencode(data)
1611         complete_url = url + "?" + data_encoded
1612         request = compat_urllib_request.Request(complete_url)
1613         response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
1614         cookie = urlh.headers.get('Set-Cookie', '')
1615
1616         self.report_extraction(track_id)
1617
1618         html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
1619             response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
1620         try:
1621             track_list = json.loads(html_tracks)
1622             track = track_list[u'tracks'][0]
1623         except ValueError:
1624             raise ExtractorError(u'Hypemachine contained invalid JSON.')
1625
1626         key = track[u"key"]
1627         track_id = track[u"id"]
1628         artist = track[u"artist"]
1629         title = track[u"song"]
1630
1631         serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
1632         request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
1633         request.add_header('cookie', cookie)
1634         song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
1635         try:
1636             song_data = json.loads(song_data_json)
1637         except ValueError:
1638             raise ExtractorError(u'Hypemachine contained invalid JSON.')
1639         final_url = song_data[u"url"]
1640
1641         return [{
1642             'id':       track_id,
1643             'url':      final_url,
1644             'ext':      "mp3",
1645             'title':    title,
1646             'artist':   artist,
1647         }]
1648
1649 class Vbox7IE(InfoExtractor):
1650     """Information Extractor for Vbox7"""
1651     _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
1652
1653     def _real_extract(self,url):
1654         mobj = re.match(self._VALID_URL, url)
1655         if mobj is None:
1656             raise ExtractorError(u'Invalid URL: %s' % url)
1657         video_id = mobj.group(1)
1658
1659         redirect_page, urlh = self._download_webpage_handle(url, video_id)
1660         new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
1661         redirect_url = urlh.geturl() + new_location
1662         webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
1663
1664         title = self._html_search_regex(r'<title>(.*)</title>',
1665             webpage, u'title').split('/')[0].strip()
1666
1667         ext = "flv"
1668         info_url = "http://vbox7.com/play/magare.do"
1669         data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
1670         info_request = compat_urllib_request.Request(info_url, data)
1671         info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
1672         info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
1673         if info_response is None:
1674             raise ExtractorError(u'Unable to extract the media url')
1675         (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
1676
1677         return [{
1678             'id':        video_id,
1679             'url':       final_url,
1680             'ext':       ext,
1681             'title':     title,
1682             'thumbnail': thumbnail_url,
1683         }]
1684
1685
1686 def gen_extractors():
1687     """ Return a list of an instance of every supported extractor.
1688     The order does matter; the first extractor matched is the one handling the URL.
1689     """
1690     return [
1691         YoutubePlaylistIE(),
1692         YoutubeChannelIE(),
1693         YoutubeUserIE(),
1694         YoutubeSearchIE(),
1695         YoutubeIE(),
1696         MetacafeIE(),
1697         DailymotionIE(),
1698         GoogleSearchIE(),
1699         PhotobucketIE(),
1700         YahooIE(),
1701         YahooSearchIE(),
1702         DepositFilesIE(),
1703         FacebookIE(),
1704         BlipTVIE(),
1705         BlipTVUserIE(),
1706         VimeoIE(),
1707         MyVideoIE(),
1708         ComedyCentralIE(),
1709         EscapistIE(),
1710         CollegeHumorIE(),
1711         XVideosIE(),
1712         SoundcloudSetIE(),
1713         SoundcloudIE(),
1714         InfoQIE(),
1715         MixcloudIE(),
1716         StanfordOpenClassroomIE(),
1717         MTVIE(),
1718         YoukuIE(),
1719         XNXXIE(),
1720         YouJizzIE(),
1721         PornotubeIE(),
1722         YouPornIE(),
1723         GooglePlusIE(),
1724         ArteTvIE(),
1725         NBAIE(),
1726         WorldStarHipHopIE(),
1727         JustinTVIE(),
1728         FunnyOrDieIE(),
1729         SteamIE(),
1730         UstreamIE(),
1731         RBMARadioIE(),
1732         EightTracksIE(),
1733         KeekIE(),
1734         TEDIE(),
1735         MySpassIE(),
1736         SpiegelIE(),
1737         LiveLeakIE(),
1738         ARDIE(),
1739         ZDFIE(),
1740         TumblrIE(),
1741         BandcampIE(),
1742         RedTubeIE(),
1743         InaIE(),
1744         HowcastIE(),
1745         VineIE(),
1746         FlickrIE(),
1747         TeamcocoIE(),
1748         XHamsterIE(),
1749         HypemIE(),
1750         Vbox7IE(),
1751         GametrailersIE(),
1752         StatigramIE(),
1753         GenericIE()
1754     ]
1755
1756 def get_info_extractor(ie_name):
1757     """Returns the info extractor class with the given ie_name"""
1758     return globals()[ie_name+'IE']