X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2FInfoExtractors.py;h=8ea03ea0105792069d7bee50e525f079bb958a5b;hb=b49e75ff9a423eca11d71e07daa69150d3066288;hp=35ba6cc5c9d59752621178f568473f49a7357156;hpb=92b91c18780938283c505f5662c458e049bf3567;p=youtube-dl diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 35ba6cc5c..8ea03ea01 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -29,33 +29,35 @@ class InfoExtractor(object): """Information Extractor class. Information extractors are the classes that, given a URL, extract - information from the video (or videos) the URL refers to. This - information includes the real video URL, the video title and simplified - title, author and others. The information is stored in a dictionary - which is then passed to the FileDownloader. The FileDownloader - processes this information possibly downloading the video to the file - system, among other possible outcomes. The dictionaries must include - the following fields: - - id: Video identifier. - url: Final video URL. - uploader: Nickname of the video uploader. - title: Literal title. - ext: Video filename extension. - format: Video format. - player_url: SWF Player URL (may be None). - - The following fields are optional. Their primary purpose is to allow - youtube-dl to serve as the backend for a video search function, such - as the one in youtube2mp3. They are only used when their respective - forced printing functions are called: - - thumbnail: Full URL to a video thumbnail image. - description: One-line video description. + information about the video (or videos) the URL refers to. This + information includes the real video URL, the video title, author and + others. The information is stored in a dictionary which is then + passed to the FileDownloader. The FileDownloader processes this + information possibly downloading the video to the file system, among + other possible outcomes. + + The dictionaries must include the following fields: + + id: Video identifier. + url: Final video URL. + uploader: Nickname of the video uploader. + upload_date: Video upload date (YYYYMMDD). + title: Video title, unescaped. + ext: Video filename extension. + + The following fields are optional: + + format: The video format, defaults to ext (used for --get-format) + thumbnail: Full URL to a video thumbnail image. + description: One-line video description. + player_url: SWF Player URL (used for rtmpdump). Subclasses of this one should re-define the _real_initialize() and _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. + + _real_extract() must return a *list* of information dictionaries as + described above. """ _ready = False @@ -475,6 +477,9 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') + video_format = '{} - {}'.format(format_param.decode('utf-8') if format_param else video_extension.decode('utf-8'), + self._video_dimensions.get(format_param, '???')) + results.append({ 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), @@ -482,7 +487,7 @@ class YoutubeIE(InfoExtractor): 'upload_date': upload_date, 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), + 'format': video_format, 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'player_url': player_url, @@ -616,8 +621,6 @@ class MetacafeIE(InfoExtractor): 'upload_date': u'NA', 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -715,8 +718,6 @@ class DailymotionIE(InfoExtractor): 'upload_date': video_upload_date, 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -810,8 +811,6 @@ class GoogleIE(InfoExtractor): 'upload_date': u'NA', 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -877,8 +876,6 @@ class PhotobucketIE(InfoExtractor): 'upload_date': u'NA', 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -1022,7 +1019,6 @@ class YahooIE(InfoExtractor): 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'thumbnail': video_thumbnail, - 'player_url': None, }] @@ -1136,7 +1132,6 @@ class VimeoIE(InfoExtractor): 'ext': video_extension, 'thumbnail': video_thumbnail, 'description': video_description, - 'player_url': None, }] @@ -1282,8 +1277,6 @@ class GenericIE(InfoExtractor): 'upload_date': u'NA', 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -1888,8 +1881,6 @@ class DepositFilesIE(InfoExtractor): 'upload_date': u'NA', 'title': file_title, 'ext': file_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -2095,7 +2086,6 @@ class FacebookIE(InfoExtractor): 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description.decode('utf-8'), - 'player_url': None, }) return results @@ -2243,8 +2233,6 @@ class MyVideoIE(InfoExtractor): 'upload_date': u'NA', 'title': video_title, 'ext': u'flv', - 'format': u'NA', - 'player_url': None, }] class ComedyCentralIE(InfoExtractor): @@ -2253,6 +2241,25 @@ class ComedyCentralIE(InfoExtractor): _VALID_URL = r'^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?Pthedailyshow|colbertnation)\.com/full-episodes/(?P.*)$' IE_NAME = u'comedycentral' + _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] + + _video_extensions = { + '3500': 'mp4', + '2200': 'mp4', + '1700': 'mp4', + '1200': 'mp4', + '750': 'mp4', + '400': 'mp4', + } + _video_dimensions = { + '3500': '1280x720', + '2200': '960x540', + '1700': '768x432', + '1200': '640x360', + '750': '512x288', + '400': '384x216', + } + def report_extraction(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) @@ -2265,6 +2272,13 @@ class ComedyCentralIE(InfoExtractor): def report_player_url(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) + + def _print_formats(self, formats): + print('Available formats:') + for x in formats: + print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) + + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -2305,10 +2319,19 @@ class ComedyCentralIE(InfoExtractor): epTitle = mobj.group('episode') mMovieParams = re.findall('(?: