10 import xml.etree.ElementTree
19 from .extractor.common import InfoExtractor, SearchInfoExtractor
21 from .extractor.ard import ARDIE
22 from .extractor.arte import ArteTvIE
23 from .extractor.bandcamp import BandcampIE
24 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
25 from .extractor.comedycentral import ComedyCentralIE
26 from .extractor.collegehumor import CollegeHumorIE
27 from .extractor.dailymotion import DailymotionIE
28 from .extractor.depositfiles import DepositFilesIE
29 from .extractor.eighttracks import EightTracksIE
30 from .extractor.escapist import EscapistIE
31 from .extractor.facebook import FacebookIE
32 from .extractor.funnyordie import FunnyOrDieIE
33 from .extractor.gametrailers import GametrailersIE
34 from .extractor.generic import GenericIE
35 from .extractor.googleplus import GooglePlusIE
36 from .extractor.googlesearch import GoogleSearchIE
37 from .extractor.infoq import InfoQIE
38 from .extractor.justintv import JustinTVIE
39 from .extractor.keek import KeekIE
40 from .extractor.liveleak import LiveLeakIE
41 from .extractor.metacafe import MetacafeIE
42 from .extractor.mixcloud import MixcloudIE
43 from .extractor.mtv import MTVIE
44 from .extractor.myspass import MySpassIE
45 from .extractor.myvideo import MyVideoIE
46 from .extractor.nba import NBAIE
47 from .extractor.statigram import StatigramIE
48 from .extractor.photobucket import PhotobucketIE
49 from .extractor.pornotube import PornotubeIE
50 from .extractor.rbmaradio import RBMARadioIE
51 from .extractor.redtube import RedTubeIE
52 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
53 from .extractor.spiegel import SpiegelIE
54 from .extractor.stanfordoc import StanfordOpenClassroomIE
55 from .extractor.steam import SteamIE
56 from .extractor.ted import TEDIE
57 from .extractor.tumblr import TumblrIE
58 from .extractor.ustream import UstreamIE
59 from .extractor.vbox7 import Vbox7IE
60 from .extractor.vimeo import VimeoIE
61 from .extractor.vine import VineIE
62 from .extractor.worldstarhiphop import WorldStarHipHopIE
63 from .extractor.xnxx import XNXXIE
64 from .extractor.xvideos import XVideosIE
65 from .extractor.yahoo import YahooIE, YahooSearchIE
66 from .extractor.youjizz import YouJizzIE
67 from .extractor.youku import YoukuIE
68 from .extractor.youporn import YouPornIE
69 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
70 from .extractor.zdf import ZDFIE
106 class InaIE(InfoExtractor):
107 """Information Extractor for Ina.fr"""
108 _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
110 def _real_extract(self,url):
111 mobj = re.match(self._VALID_URL, url)
113 video_id = mobj.group('id')
114 mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
115 video_extension = 'mp4'
116 webpage = self._download_webpage(mrss_url, video_id)
118 self.report_extraction(video_id)
120 video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
121 webpage, u'video URL')
123 video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
129 'ext': video_extension,
130 'title': video_title,
133 class HowcastIE(InfoExtractor):
134 """Information Extractor for Howcast.com"""
135 _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
137 def _real_extract(self, url):
138 mobj = re.match(self._VALID_URL, url)
140 video_id = mobj.group('id')
141 webpage_url = 'http://www.howcast.com/videos/' + video_id
142 webpage = self._download_webpage(webpage_url, video_id)
144 self.report_extraction(video_id)
146 video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
147 webpage, u'video URL')
149 video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
152 video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
153 webpage, u'description', fatal=False)
155 thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
156 webpage, u'thumbnail', fatal=False)
162 'title': video_title,
163 'description': video_description,
164 'thumbnail': thumbnail,
168 class FlickrIE(InfoExtractor):
169 """Information Extractor for Flickr videos"""
170 _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
172 def _real_extract(self, url):
173 mobj = re.match(self._VALID_URL, url)
175 video_id = mobj.group('id')
176 video_uploader_id = mobj.group('uploader_id')
177 webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
178 webpage = self._download_webpage(webpage_url, video_id)
180 secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
182 first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
183 first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
185 node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
186 first_xml, u'node_id')
188 second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
189 second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
191 self.report_extraction(video_id)
193 mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
195 raise ExtractorError(u'Unable to extract video url')
196 video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
198 video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
199 webpage, u'video title')
201 video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
202 webpage, u'description', fatal=False)
204 thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
205 webpage, u'thumbnail', fatal=False)
211 'title': video_title,
212 'description': video_description,
213 'thumbnail': thumbnail,
214 'uploader_id': video_uploader_id,
217 class TeamcocoIE(InfoExtractor):
218 _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
220 def _real_extract(self, url):
221 mobj = re.match(self._VALID_URL, url)
223 raise ExtractorError(u'Invalid URL: %s' % url)
224 url_title = mobj.group('url_title')
225 webpage = self._download_webpage(url, url_title)
227 video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
228 webpage, u'video id')
230 self.report_extraction(video_id)
232 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
235 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
236 webpage, u'thumbnail', fatal=False)
238 video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
239 webpage, u'description', fatal=False)
241 data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
242 data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
244 video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
251 'title': video_title,
252 'thumbnail': thumbnail,
253 'description': video_description,
256 class XHamsterIE(InfoExtractor):
257 """Information Extractor for xHamster"""
258 _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
260 def _real_extract(self,url):
261 mobj = re.match(self._VALID_URL, url)
263 video_id = mobj.group('id')
264 mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
265 webpage = self._download_webpage(mrss_url, video_id)
267 mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
269 raise ExtractorError(u'Unable to extract media URL')
270 if len(mobj.group('server')) == 0:
271 video_url = compat_urllib_parse.unquote(mobj.group('file'))
273 video_url = mobj.group('server')+'/key='+mobj.group('file')
274 video_extension = video_url.split('.')[-1]
276 video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
279 # Can't see the description anywhere in the UI
280 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
281 # webpage, u'description', fatal=False)
282 # if video_description: video_description = unescapeHTML(video_description)
284 mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
286 video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
288 video_upload_date = None
289 self._downloader.report_warning(u'Unable to extract upload date')
291 video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
292 webpage, u'uploader id', default=u'anonymous')
294 video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
295 webpage, u'thumbnail', fatal=False)
300 'ext': video_extension,
301 'title': video_title,
302 # 'description': video_description,
303 'upload_date': video_upload_date,
304 'uploader_id': video_uploader_id,
305 'thumbnail': video_thumbnail
308 class HypemIE(InfoExtractor):
309 """Information Extractor for hypem"""
310 _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
312 def _real_extract(self, url):
313 mobj = re.match(self._VALID_URL, url)
315 raise ExtractorError(u'Invalid URL: %s' % url)
316 track_id = mobj.group(1)
318 data = { 'ax': 1, 'ts': time.time() }
319 data_encoded = compat_urllib_parse.urlencode(data)
320 complete_url = url + "?" + data_encoded
321 request = compat_urllib_request.Request(complete_url)
322 response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
323 cookie = urlh.headers.get('Set-Cookie', '')
325 self.report_extraction(track_id)
327 html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
328 response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
330 track_list = json.loads(html_tracks)
331 track = track_list[u'tracks'][0]
333 raise ExtractorError(u'Hypemachine contained invalid JSON.')
336 track_id = track[u"id"]
337 artist = track[u"artist"]
338 title = track[u"song"]
340 serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
341 request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
342 request.add_header('cookie', cookie)
343 song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
345 song_data = json.loads(song_data_json)
347 raise ExtractorError(u'Hypemachine contained invalid JSON.')
348 final_url = song_data[u"url"]
360 def gen_extractors():
361 """ Return a list of an instance of every supported extractor.
362 The order does matter; the first extractor matched is the one handling the URL.
390 StanfordOpenClassroomIE(),
430 def get_info_extractor(ie_name):
431 """Returns the info extractor class with the given ie_name"""
432 return globals()[ie_name+'IE']