X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2FInfoExtractors.py;h=8ea03ea0105792069d7bee50e525f079bb958a5b;hb=b49e75ff9a423eca11d71e07daa69150d3066288;hp=35ba6cc5c9d59752621178f568473f49a7357156;hpb=92b91c18780938283c505f5662c458e049bf3567;p=youtube-dl

diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 35ba6cc5c..8ea03ea01 100644
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -29,33 +29,35 @@ class InfoExtractor(object):
 	"""Information Extractor class.
 
 	Information extractors are the classes that, given a URL, extract
-	information from the video (or videos) the URL refers to. This
-	information includes the real video URL, the video title and simplified
-	title, author and others. The information is stored in a dictionary
-	which is then passed to the FileDownloader. The FileDownloader
-	processes this information possibly downloading the video to the file
-	system, among other possible outcomes. The dictionaries must include
-	the following fields:
-
-	id:		Video identifier.
-	url:		Final video URL.
-	uploader:	Nickname of the video uploader.
-	title:		Literal title.
-	ext:		Video filename extension.
-	format:		Video format.
-	player_url:	SWF Player URL (may be None).
-
-	The following fields are optional. Their primary purpose is to allow
-	youtube-dl to serve as the backend for a video search function, such
-	as the one in youtube2mp3.  They are only used when their respective
-	forced printing functions are called:
-
-	thumbnail:	Full URL to a video thumbnail image.
-	description:	One-line video description.
+	information about the video (or videos) the URL refers to. This
+	information includes the real video URL, the video title, author and
+	others. The information is stored in a dictionary which is then 
+	passed to the FileDownloader. The FileDownloader processes this
+	information possibly downloading the video to the file system, among
+	other possible outcomes.
+
+	The dictionaries must include the following fields:
+
+	id:             Video identifier.
+	url:            Final video URL.
+	uploader:       Nickname of the video uploader.
+	upload_date:    Video upload date (YYYYMMDD).
+	title:          Video title, unescaped.
+	ext:            Video filename extension.
+
+	The following fields are optional:
+
+	format:         The video format, defaults to ext (used for --get-format)
+	thumbnail:      Full URL to a video thumbnail image.
+	description:    One-line video description.
+	player_url:     SWF Player URL (used for rtmpdump).
 
 	Subclasses of this one should re-define the _real_initialize() and
 	_real_extract() methods and define a _VALID_URL regexp.
 	Probably, they should also be added to the list of extractors.
+
+	_real_extract() must return a *list* of information dictionaries as
+	described above.
 	"""
 
 	_ready = False
@@ -475,6 +477,9 @@ class YoutubeIE(InfoExtractor):
 			# Extension
 			video_extension = self._video_extensions.get(format_param, 'flv')
 
+			video_format = '{} - {}'.format(format_param.decode('utf-8') if format_param else video_extension.decode('utf-8'),
+				                            self._video_dimensions.get(format_param, '???'))
+
 			results.append({
 				'id':		video_id.decode('utf-8'),
 				'url':		video_real_url.decode('utf-8'),
@@ -482,7 +487,7 @@ class YoutubeIE(InfoExtractor):
 				'upload_date':	upload_date,
 				'title':	video_title,
 				'ext':		video_extension.decode('utf-8'),
-				'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
+				'format':	video_format,
 				'thumbnail':	video_thumbnail.decode('utf-8'),
 				'description':	video_description,
 				'player_url':	player_url,
@@ -616,8 +621,6 @@ class MetacafeIE(InfoExtractor):
 			'upload_date':	u'NA',
 			'title':	video_title,
 			'ext':		video_extension.decode('utf-8'),
-			'format':	u'NA',
-			'player_url':	None,
 		}]
 
 
@@ -715,8 +718,6 @@ class DailymotionIE(InfoExtractor):
 			'upload_date':	video_upload_date,
 			'title':	video_title,
 			'ext':		video_extension.decode('utf-8'),
-			'format':	u'NA',
-			'player_url':	None,
 		}]
 
 
@@ -810,8 +811,6 @@ class GoogleIE(InfoExtractor):
 			'upload_date':	u'NA',
 			'title':	video_title,
 			'ext':		video_extension.decode('utf-8'),
-			'format':	u'NA',
-			'player_url':	None,
 		}]
 
 
@@ -877,8 +876,6 @@ class PhotobucketIE(InfoExtractor):
 			'upload_date':	u'NA',
 			'title':	video_title,
 			'ext':		video_extension.decode('utf-8'),
-			'format':	u'NA',
-			'player_url':	None,
 		}]
 
 
@@ -1022,7 +1019,6 @@ class YahooIE(InfoExtractor):
 			'thumbnail':	video_thumbnail.decode('utf-8'),
 			'description':	video_description,
 			'thumbnail':	video_thumbnail,
-			'player_url':	None,
 		}]
 
 
@@ -1136,7 +1132,6 @@ class VimeoIE(InfoExtractor):
 			'ext':		video_extension,
 			'thumbnail':	video_thumbnail,
 			'description':	video_description,
-			'player_url':	None,
 		}]
 
 
@@ -1282,8 +1277,6 @@ class GenericIE(InfoExtractor):
 			'upload_date':	u'NA',
 			'title':	video_title,
 			'ext':		video_extension.decode('utf-8'),
-			'format':	u'NA',
-			'player_url':	None,
 		}]
 
 
@@ -1888,8 +1881,6 @@ class DepositFilesIE(InfoExtractor):
 			'upload_date':	u'NA',
 			'title':	file_title,
 			'ext':		file_extension.decode('utf-8'),
-			'format':	u'NA',
-			'player_url':	None,
 		}]
 
 
@@ -2095,7 +2086,6 @@ class FacebookIE(InfoExtractor):
 				'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
 				'thumbnail':	video_thumbnail.decode('utf-8'),
 				'description':	video_description.decode('utf-8'),
-				'player_url':	None,
 			})
 		return results
 
@@ -2243,8 +2233,6 @@ class MyVideoIE(InfoExtractor):
 			'upload_date':  u'NA',
 			'title':	video_title,
 			'ext':		u'flv',
-			'format':	u'NA',
-			'player_url':	None,
 		}]
 
 class ComedyCentralIE(InfoExtractor):
@@ -2253,6 +2241,25 @@ class ComedyCentralIE(InfoExtractor):
 	_VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
 	IE_NAME = u'comedycentral'
 
+	_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
+
+	_video_extensions = {
+		'3500': 'mp4',
+		'2200': 'mp4',
+		'1700': 'mp4',
+		'1200': 'mp4',
+		'750': 'mp4',
+		'400': 'mp4',
+	}
+	_video_dimensions = {
+		'3500': '1280x720',
+		'2200': '960x540',
+		'1700': '768x432',
+		'1200': '640x360',
+		'750': '512x288',
+		'400': '384x216',
+	}
+
 	def report_extraction(self, episode_id):
 		self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
 
@@ -2265,6 +2272,13 @@ class ComedyCentralIE(InfoExtractor):
 	def report_player_url(self, episode_id):
 		self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
 
+
+	def _print_formats(self, formats):
+		print('Available formats:')
+		for x in formats:
+			print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
+
+
 	def _real_extract(self, url):
 		mobj = re.match(self._VALID_URL, url)
 		if mobj is None:
@@ -2305,10 +2319,19 @@ class ComedyCentralIE(InfoExtractor):
 			epTitle = mobj.group('episode')
 
 		mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
+
 		if len(mMovieParams) == 0:
-			self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
-			return
+			# The Colbert Report embeds the information in a without
+			# a URL prefix; so extract the alternate reference
+			# and then add the URL prefix manually.
 
+			altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
+			if len(altMovieParams) == 0:
+				self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
+				return
+			else:
+				mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
+		
 		playerUrl_raw = mMovieParams[0][0]
 		self.report_player_url(epTitle)
 		try:
@@ -2357,10 +2380,31 @@ class ComedyCentralIE(InfoExtractor):
 			if len(turls) == 0:
 				self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
 				continue
+			
+			if self._downloader.params.get('listformats', None):
+				self._print_formats([i[0] for i in turls])
+				return
 
 			# For now, just pick the highest bitrate
 			format,video_url = turls[-1]
 
+			# Get the format arg from the arg stream
+			req_format = self._downloader.params.get('format', None)
+
+			# Select format if we can find one
+			for f,v in turls:
+				if f == req_format:
+					format, video_url = f, v
+					break
+
+			# Patch to download from alternative CDN, which does not
+			# break on current RTMPDump builds
+			broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
+			better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
+
+			if video_url.startswith(broken_cdn):
+				video_url = video_url.replace(broken_cdn, better_cdn)
+
 			effTitle = showId + u'-' + epTitle
 			info = {
 				'id': shortMediaId,
@@ -2372,7 +2416,7 @@ class ComedyCentralIE(InfoExtractor):
 				'format': format,
 				'thumbnail': None,
 				'description': officialTitle,
-				'player_url': playerUrl
+				'player_url': None #playerUrl
 			}
 
 			results.append(info)
@@ -2445,7 +2489,6 @@ class EscapistIE(InfoExtractor):
 			'upload_date': None,
 			'title': showName,
 			'ext': 'flv',
-			'format': 'flv',
 			'thumbnail': imgUrl,
 			'description': description,
 			'player_url': playerUrl,
@@ -2510,7 +2553,6 @@ class CollegeHumorIE(InfoExtractor):
 			info['url'] = videoNode.findall('./file')[0].text
 			info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
 			info['ext'] = info['url'].rpartition('.')[2]
-			info['format'] = info['ext']
 		except IndexError:
 			self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
 			return
@@ -2581,10 +2623,8 @@ class XVideosIE(InfoExtractor):
 			'upload_date': None,
 			'title': video_title,
 			'ext': 'flv',
-			'format': 'flv',
 			'thumbnail': video_thumbnail,
 			'description': None,
-			'player_url': None,
 		}
 
 		return [info]
@@ -2678,8 +2718,6 @@ class SoundcloudIE(InfoExtractor):
 			'upload_date':  upload_date,
 			'title':	title,
 			'ext':		u'mp3',
-			'format':	u'NA',
-			'player_url':	None,
 			'description': description.decode('utf-8')
 		}]
 
@@ -2746,11 +2784,9 @@ class InfoQIE(InfoExtractor):
 			'uploader': None,
 			'upload_date': None,
 			'title': video_title,
-			'ext': extension,
-			'format': extension, # Extension is always(?) mp4, but seems to be flv
+			'ext': extension, # Extension is always(?) mp4, but seems to be flv
 			'thumbnail': None,
 			'description': video_description,
-			'player_url': None,
 		}
 
 		return [info]
@@ -2911,7 +2947,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
 				self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
 				return
 			info['ext'] = info['url'].rpartition('.')[2]
-			info['format'] = info['ext']
 			return [info]
 		elif mobj.group('course'): # A course page
 			course = mobj.group('course')
@@ -3185,7 +3220,6 @@ class YoukuIE(InfoExtractor):
 				'uploader': None,
 				'title': video_title,
 				'ext': ext,
-				'format': u'NA'
 			}
 			files_info.append(info)
 
@@ -3243,18 +3277,16 @@ class XNXXIE(InfoExtractor):
 			return
 		video_thumbnail = result.group(1).decode('utf-8')
 
-		info = {'id': video_id,
-				'url': video_url,
-				'uploader': None,
-				'upload_date': None,
-				'title': video_title,
-				'ext': 'flv',
-				'format': 'flv',
-				'thumbnail': video_thumbnail,
-				'description': None,
-				'player_url': None}
-
-		return [info]
+		return [{
+			'id': video_id,
+			'url': video_url,
+			'uploader': None,
+			'upload_date': None,
+			'title': video_title,
+			'ext': 'flv',
+			'thumbnail': video_thumbnail,
+			'description': None,
+		}]
 
 
 class GooglePlusIE(InfoExtractor):
@@ -3376,6 +3408,4 @@ class GooglePlusIE(InfoExtractor):
 			'upload_date':	upload_date.decode('utf-8'),
 			'title':	video_title.decode('utf-8'),
 			'ext':		video_extension.decode('utf-8'),
-			'format':	u'NA',
-			'player_url':	None,
 		}]