X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=d661d517dea96e09819ec52f58cf0205f40b0d72;hb=12887875a26c46822d26c626ef20b8693547835f;hp=d7ddf4e37b1a8e1103391ac3cc1aeed483d584c2;hpb=6d4363368affe197f1c3efbd34d18b365c3d929d;p=youtube-dl
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index d7ddf4e37..d661d517d 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -1330,7 +1330,7 @@ class GenericIE(InfoExtractor):
opener = compat_urllib_request.OpenerDirector()
for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
HTTPMethodFallback, HEADRedirectHandler,
- compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
+ compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
opener.add_handler(handler())
response = opener.open(HeadRequest(url))
@@ -1366,6 +1366,9 @@ class GenericIE(InfoExtractor):
if mobj is None:
# Broaden the search a little bit
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+ if mobj is None:
+ # Broaden the search a little bit: JWPlayer JS loader
+ mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
@@ -1469,7 +1472,7 @@ class YoutubeSearchIE(InfoExtractor):
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
request = compat_urllib_request.Request(result_url)
try:
- data = compat_urllib_request.urlopen(request).read()
+ data = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
return
@@ -1918,9 +1921,8 @@ class BlipTVUserIE(InfoExtractor):
while True:
self.report_download_page(username, pagenum)
-
- request = compat_urllib_request.Request( page_base + "&page=" + str(pagenum) )
-
+ url = page_base + "&page=" + str(pagenum)
+ request = compat_urllib_request.Request( url )
try:
page = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -2098,6 +2100,10 @@ class FacebookIE(InfoExtractor):
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_url = params['hd_src']
+ if not video_url:
+ video_url = params['sd_src']
+ if not video_url:
+ raise ExtractorError(u'Cannot find video URL')
video_duration = int(params['video_duration'])
m = re.search('
', webpage)
@@ -3620,18 +3626,22 @@ class SteamIE(InfoExtractor):
mweb = re.finditer(urlRE, webpage)
namesRE = r'(?P.+?)'
titles = re.finditer(namesRE, webpage)
+ thumbsRE = r''
+ thumbs = re.finditer(thumbsRE, webpage)
videos = []
- for vid,vtitle in zip(mweb,titles):
+ for vid,vtitle,thumb in zip(mweb,titles,thumbs):
video_id = vid.group('videoID')
title = vtitle.group('videoName')
video_url = vid.group('videoURL')
+ video_thumb = thumb.group('thumbnail')
if not video_url:
self._downloader.trouble(u'ERROR: Cannot find video url for %s' % video_id)
info = {
'id':video_id,
'url':video_url,
'ext': 'flv',
- 'title': unescapeHTML(title)
+ 'title': unescapeHTML(title),
+ 'thumbnail': video_thumb
}
videos.append(info)
return videos
@@ -3725,13 +3735,13 @@ class YouPornIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
# Get the video title
- result = re.search(r'videoTitleArea">(?P.*)', webpage)
+ result = re.search(r'(?P.*)', webpage)
if result is None:
- raise ExtractorError(u'ERROR: unable to extract video title')
+ raise ExtractorError(u'Unable to extract video title')
video_title = result.group('title').strip()
# Get the video date
- result = re.search(r'Date:(?P.*)', webpage)
+ result = re.search(r'Date:(?P.*) ', webpage)
if result is None:
self._downloader.to_stderr(u'WARNING: unable to extract video date')
upload_date = None
@@ -3739,9 +3749,9 @@ class YouPornIE(InfoExtractor):
upload_date = result.group('date').strip()
# Get the video uploader
- result = re.search(r'Submitted:(?P.*)', webpage)
+ result = re.search(r'Submitted:(?P.*)', webpage)
if result is None:
- self._downloader.to_stderr(u'ERROR: unable to extract uploader')
+ self._downloader.to_stderr(u'WARNING: unable to extract uploader')
video_uploader = None
else:
video_uploader = result.group('uploader').strip()
@@ -3970,28 +3980,76 @@ class KeekIE(InfoExtractor):
return [info]
class TEDIE(InfoExtractor):
- _VALID_URL=r'http://www.ted.com/talks/(?P\w+)'
+ _VALID_URL=r'''http://www.ted.com/
+ (
+ ((?Pplaylists)/(?P\d+)) # We have a playlist
+ |
+ ((?Ptalks)) # We have a simple talk
+ )
+ /(?P\w+) # Here goes the name and then ".html"
+ '''
+
+ def suitable(self, url):
+ """Receives a URL and returns True if suitable for this IE."""
+ return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+
def _real_extract(self, url):
- m=re.match(self._VALID_URL, url)
- videoName=m.group('videoName')
- webpage=self._download_webpage(url, 0, 'Downloading \"%s\" page' % videoName)
- #If the url includes the language we get the title translated
- title_RE=r'(?P[\s\w:/\.\?=\+-\\\']*)
'
+ m=re.match(self._VALID_URL, url, re.VERBOSE)
+ if m.group('type_talk'):
+ return [self._talk_info(url)]
+ else :
+ playlist_id=m.group('playlist_id')
+ name=m.group('name')
+ self._downloader.to_screen(u'[%s] Getting info of playlist %s: "%s"' % (self.IE_NAME,playlist_id,name))
+ return self._playlist_videos_info(url,name,playlist_id)
+
+ def _talk_video_link(self,mediaSlug):
+ '''Returns the video link for that mediaSlug'''
+ return 'http://download.ted.com/talks/%s.mp4' % mediaSlug
+
+ def _playlist_videos_info(self,url,name,playlist_id=0):
+ '''Returns the videos of the playlist'''
+ video_RE=r'''
+ \d+)"
+ ([.\s]*?)data-playlist_item_id="(\d+)"
+ ([.\s]*?)data-mediaslug="(?P.+?)"
+ '''
+ video_name_RE=r'(?P.+?)
'
+ webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
+ m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
+ m_names=re.finditer(video_name_RE,webpage)
+ info=[]
+ for m_video, m_name in zip(m_videos,m_names):
+ video_id=m_video.group('video_id')
+ talk_url='http://www.ted.com%s' % m_name.group('talk_url')
+ info.append(self._talk_info(talk_url,video_id))
+ return info
+
+ def _talk_info(self, url, video_id=0):
+ """Return the video for the talk in the url"""
+ m=re.match(self._VALID_URL, url,re.VERBOSE)
+ videoName=m.group('name')
+ webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
+ # If the url includes the language we get the title translated
+ title_RE=r'(?P.*)
'
title=re.search(title_RE, webpage).group('title')
info_RE=r'''