10 import xml.etree.ElementTree
19 from .extractor.common import InfoExtractor, SearchInfoExtractor
21 from .extractor.ard import ARDIE
22 from .extractor.arte import ArteTvIE
23 from .extractor.bandcamp import BandcampIE
24 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
25 from .extractor.comedycentral import ComedyCentralIE
26 from .extractor.collegehumor import CollegeHumorIE
27 from .extractor.dailymotion import DailymotionIE
28 from .extractor.depositfiles import DepositFilesIE
29 from .extractor.eighttracks import EightTracksIE
30 from .extractor.escapist import EscapistIE
31 from .extractor.facebook import FacebookIE
32 from .extractor.flickr import FlickrIE
33 from .extractor.funnyordie import FunnyOrDieIE
34 from .extractor.gametrailers import GametrailersIE
35 from .extractor.generic import GenericIE
36 from .extractor.googleplus import GooglePlusIE
37 from .extractor.googlesearch import GoogleSearchIE
38 from .extractor.howcast import HowcastIE
39 from .extractor.hypem import HypemIE
40 from .extractor.ina import InaIE
41 from .extractor.infoq import InfoQIE
42 from .extractor.justintv import JustinTVIE
43 from .extractor.keek import KeekIE
44 from .extractor.liveleak import LiveLeakIE
45 from .extractor.metacafe import MetacafeIE
46 from .extractor.mixcloud import MixcloudIE
47 from .extractor.mtv import MTVIE
48 from .extractor.myspass import MySpassIE
49 from .extractor.myvideo import MyVideoIE
50 from .extractor.nba import NBAIE
51 from .extractor.statigram import StatigramIE
52 from .extractor.photobucket import PhotobucketIE
53 from .extractor.pornotube import PornotubeIE
54 from .extractor.rbmaradio import RBMARadioIE
55 from .extractor.redtube import RedTubeIE
56 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
57 from .extractor.spiegel import SpiegelIE
58 from .extractor.stanfordoc import StanfordOpenClassroomIE
59 from .extractor.steam import SteamIE
60 from .extractor.ted import TEDIE
61 from .extractor.tumblr import TumblrIE
62 from .extractor.ustream import UstreamIE
63 from .extractor.vbox7 import Vbox7IE
64 from .extractor.vimeo import VimeoIE
65 from .extractor.vine import VineIE
66 from .extractor.worldstarhiphop import WorldStarHipHopIE
67 from .extractor.xnxx import XNXXIE
68 from .extractor.xvideos import XVideosIE
69 from .extractor.yahoo import YahooIE, YahooSearchIE
70 from .extractor.youjizz import YouJizzIE
71 from .extractor.youku import YoukuIE
72 from .extractor.youporn import YouPornIE
73 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
74 from .extractor.zdf import ZDFIE
114 class TeamcocoIE(InfoExtractor):
115 _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
117 def _real_extract(self, url):
118 mobj = re.match(self._VALID_URL, url)
120 raise ExtractorError(u'Invalid URL: %s' % url)
121 url_title = mobj.group('url_title')
122 webpage = self._download_webpage(url, url_title)
124 video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
125 webpage, u'video id')
127 self.report_extraction(video_id)
129 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
132 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
133 webpage, u'thumbnail', fatal=False)
135 video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
136 webpage, u'description', fatal=False)
138 data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
139 data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
141 video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
148 'title': video_title,
149 'thumbnail': thumbnail,
150 'description': video_description,
153 class XHamsterIE(InfoExtractor):
154 """Information Extractor for xHamster"""
155 _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
157 def _real_extract(self,url):
158 mobj = re.match(self._VALID_URL, url)
160 video_id = mobj.group('id')
161 mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
162 webpage = self._download_webpage(mrss_url, video_id)
164 mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
166 raise ExtractorError(u'Unable to extract media URL')
167 if len(mobj.group('server')) == 0:
168 video_url = compat_urllib_parse.unquote(mobj.group('file'))
170 video_url = mobj.group('server')+'/key='+mobj.group('file')
171 video_extension = video_url.split('.')[-1]
173 video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
176 # Can't see the description anywhere in the UI
177 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
178 # webpage, u'description', fatal=False)
179 # if video_description: video_description = unescapeHTML(video_description)
181 mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
183 video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
185 video_upload_date = None
186 self._downloader.report_warning(u'Unable to extract upload date')
188 video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
189 webpage, u'uploader id', default=u'anonymous')
191 video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
192 webpage, u'thumbnail', fatal=False)
197 'ext': video_extension,
198 'title': video_title,
199 # 'description': video_description,
200 'upload_date': video_upload_date,
201 'uploader_id': video_uploader_id,
202 'thumbnail': video_thumbnail
209 def gen_extractors():
210 """ Return a list of an instance of every supported extractor.
211 The order does matter; the first extractor matched is the one handling the URL.
239 StanfordOpenClassroomIE(),
279 def get_info_extractor(ie_name):
280 """Returns the info extractor class with the given ie_name"""
281 return globals()[ie_name+'IE']