10 import xml.etree.ElementTree
19 from .extractor.common import InfoExtractor, SearchInfoExtractor
21 from .extractor.ard import ARDIE
22 from .extractor.arte import ArteTvIE
23 from .extractor.bandcamp import BandcampIE
24 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
25 from .extractor.comedycentral import ComedyCentralIE
26 from .extractor.collegehumor import CollegeHumorIE
27 from .extractor.dailymotion import DailymotionIE
28 from .extractor.depositfiles import DepositFilesIE
29 from .extractor.eighttracks import EightTracksIE
30 from .extractor.escapist import EscapistIE
31 from .extractor.facebook import FacebookIE
32 from .extractor.funnyordie import FunnyOrDieIE
33 from .extractor.gametrailers import GametrailersIE
34 from .extractor.generic import GenericIE
35 from .extractor.googleplus import GooglePlusIE
36 from .extractor.googlesearch import GoogleSearchIE
37 from .extractor.howcast import HowcastIE
38 from .extractor.hypem import HypemIE
39 from .extractor.ina import InaIE
40 from .extractor.infoq import InfoQIE
41 from .extractor.justintv import JustinTVIE
42 from .extractor.keek import KeekIE
43 from .extractor.liveleak import LiveLeakIE
44 from .extractor.metacafe import MetacafeIE
45 from .extractor.mixcloud import MixcloudIE
46 from .extractor.mtv import MTVIE
47 from .extractor.myspass import MySpassIE
48 from .extractor.myvideo import MyVideoIE
49 from .extractor.nba import NBAIE
50 from .extractor.statigram import StatigramIE
51 from .extractor.photobucket import PhotobucketIE
52 from .extractor.pornotube import PornotubeIE
53 from .extractor.rbmaradio import RBMARadioIE
54 from .extractor.redtube import RedTubeIE
55 from .extractor.soundcloud import SoundcloudIE, SoundcloudSetIE
56 from .extractor.spiegel import SpiegelIE
57 from .extractor.stanfordoc import StanfordOpenClassroomIE
58 from .extractor.steam import SteamIE
59 from .extractor.ted import TEDIE
60 from .extractor.tumblr import TumblrIE
61 from .extractor.ustream import UstreamIE
62 from .extractor.vbox7 import Vbox7IE
63 from .extractor.vimeo import VimeoIE
64 from .extractor.vine import VineIE
65 from .extractor.worldstarhiphop import WorldStarHipHopIE
66 from .extractor.xnxx import XNXXIE
67 from .extractor.xvideos import XVideosIE
68 from .extractor.yahoo import YahooIE, YahooSearchIE
69 from .extractor.youjizz import YouJizzIE
70 from .extractor.youku import YoukuIE
71 from .extractor.youporn import YouPornIE
72 from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
73 from .extractor.zdf import ZDFIE
112 class FlickrIE(InfoExtractor):
113 """Information Extractor for Flickr videos"""
114 _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
116 def _real_extract(self, url):
117 mobj = re.match(self._VALID_URL, url)
119 video_id = mobj.group('id')
120 video_uploader_id = mobj.group('uploader_id')
121 webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
122 webpage = self._download_webpage(webpage_url, video_id)
124 secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
126 first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
127 first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
129 node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
130 first_xml, u'node_id')
132 second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
133 second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
135 self.report_extraction(video_id)
137 mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
139 raise ExtractorError(u'Unable to extract video url')
140 video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
142 video_title = self._html_search_regex(r'<meta property="og:title" content=(?:"([^"]+)"|\'([^\']+)\')',
143 webpage, u'video title')
145 video_description = self._html_search_regex(r'<meta property="og:description" content=(?:"([^"]+)"|\'([^\']+)\')',
146 webpage, u'description', fatal=False)
148 thumbnail = self._html_search_regex(r'<meta property="og:image" content=(?:"([^"]+)"|\'([^\']+)\')',
149 webpage, u'thumbnail', fatal=False)
155 'title': video_title,
156 'description': video_description,
157 'thumbnail': thumbnail,
158 'uploader_id': video_uploader_id,
161 class TeamcocoIE(InfoExtractor):
162 _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
164 def _real_extract(self, url):
165 mobj = re.match(self._VALID_URL, url)
167 raise ExtractorError(u'Invalid URL: %s' % url)
168 url_title = mobj.group('url_title')
169 webpage = self._download_webpage(url, url_title)
171 video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
172 webpage, u'video id')
174 self.report_extraction(video_id)
176 video_title = self._html_search_regex(r'<meta property="og:title" content="(.+?)"',
179 thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)"',
180 webpage, u'thumbnail', fatal=False)
182 video_description = self._html_search_regex(r'<meta property="og:description" content="(.*?)"',
183 webpage, u'description', fatal=False)
185 data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
186 data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
188 video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
195 'title': video_title,
196 'thumbnail': thumbnail,
197 'description': video_description,
200 class XHamsterIE(InfoExtractor):
201 """Information Extractor for xHamster"""
202 _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
204 def _real_extract(self,url):
205 mobj = re.match(self._VALID_URL, url)
207 video_id = mobj.group('id')
208 mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
209 webpage = self._download_webpage(mrss_url, video_id)
211 mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
213 raise ExtractorError(u'Unable to extract media URL')
214 if len(mobj.group('server')) == 0:
215 video_url = compat_urllib_parse.unquote(mobj.group('file'))
217 video_url = mobj.group('server')+'/key='+mobj.group('file')
218 video_extension = video_url.split('.')[-1]
220 video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
223 # Can't see the description anywhere in the UI
224 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
225 # webpage, u'description', fatal=False)
226 # if video_description: video_description = unescapeHTML(video_description)
228 mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
230 video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
232 video_upload_date = None
233 self._downloader.report_warning(u'Unable to extract upload date')
235 video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
236 webpage, u'uploader id', default=u'anonymous')
238 video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'',
239 webpage, u'thumbnail', fatal=False)
244 'ext': video_extension,
245 'title': video_title,
246 # 'description': video_description,
247 'upload_date': video_upload_date,
248 'uploader_id': video_uploader_id,
249 'thumbnail': video_thumbnail
256 def gen_extractors():
257 """ Return a list of an instance of every supported extractor.
258 The order does matter; the first extractor matched is the one handling the URL.
286 StanfordOpenClassroomIE(),
326 def get_info_extractor(ie_name):
327 """Returns the info extractor class with the given ie_name"""
328 return globals()[ie_name+'IE']