X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FFileDownloader.py;h=49f3a871261d3816ec0b537ae60644691dc52f19;hb=f008688520ad0fadcf80601ce193d352cc0d4bd3;hp=574863e7c9da066b1fc29741a83a58f2a2baaed4;hpb=5c01dd1e7374d1c84d19dab3e0172ee31a7f69e1;p=youtube-dl diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 574863e7c..49f3a8712 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -54,6 +54,7 @@ class FileDownloader(object): quiet: Do not print messages to stdout. forceurl: Force printing final URL. forcetitle: Force printing title. + forceid: Force printing ID. forcethumbnail: Force printing thumbnail URL. forcedescription: Force printing description. forcefilename: Force printing final filename. @@ -82,7 +83,6 @@ class FileDownloader(object): writeinfojson: Write the video description to a .info.json file writethumbnail: Write the thumbnail image to a file writesubtitles: Write the video subtitles to a file - onlysubtitles: Downloads only the subtitles of the video allsubtitles: Downloads all the subtitles of the video listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [sbv/srt] (default=srt) @@ -92,6 +92,7 @@ class FileDownloader(object): min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size daterange: A DateRange object, download only if the upload_date is in the range. + skip_download: Skip the actual download of the video file """ params = None @@ -435,47 +436,45 @@ class FileDownloader(object): return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) return None - def extract_info(self, url, download = True, ie_name = None): + def extract_info(self, url, download=True, ie_key=None, extra_info={}): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. + extra_info is a dict containing the extra values to add to each result ''' - suitable_found = False - #We copy the original list - ies = list(self._ies) - - if ie_name is not None: - #We put in the first place the given info extractor - first_ie = get_info_extractor(ie_name)() - first_ie.set_downloader(self) - ies.insert(0, first_ie) + if ie_key: + ie = get_info_extractor(ie_key)() + ie.set_downloader(self) + ies = [ie] + else: + ies = self._ies for ie in ies: - # Go to next InfoExtractor if not suitable if not ie.suitable(url): continue - # Warn if the _WORKING attribute is False if not ie.working(): - self.report_warning(u'the program functionality for this site has been marked as broken, ' - u'and will probably not work. If you want to go on, use the -i option.') + self.report_warning(u'The program functionality for this site has been marked as broken, ' + u'and will probably not work.') - # Suitable InfoExtractor found - suitable_found = True - - # Extract information from URL and process it try: - ie_results = ie.extract(url) - if ie_results is None: # Finished already (backwards compatibility; listformats and friends should be moved here) + ie_result = ie.extract(url) + if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) break - results = [] - for ie_result in ie_results: - if not 'extractor' in ie_result: - #The extractor has already been set somewhere else - ie_result['extractor'] = ie.IE_NAME - results.append(self.process_ie_result(ie_result, download)) - return results + if isinstance(ie_result, list): + # Backwards compatibility: old IE result format + for result in ie_result: + result.update(extra_info) + ie_result = { + '_type': 'compat_list', + 'entries': ie_result, + } + else: + ie_result.update(extra_info) + if 'extractor' not in ie_result: + ie_result['extractor'] = ie.IE_NAME + return self.process_ie_result(ie_result, download=download) except ExtractorError as de: # An error we somewhat expected self.report_error(compat_str(de), de.format_traceback()) break @@ -485,33 +484,36 @@ class FileDownloader(object): break else: raise - if not suitable_found: - self.report_error(u'no suitable InfoExtractor: %s' % url) + else: + self.report_error(u'no suitable InfoExtractor: %s' % url) - def process_ie_result(self, ie_result, download = True): + def process_ie_result(self, ie_result, download=True, extra_info={}): """ - Take the result of the ie and return a list of videos. - For url elements it will search the suitable ie and get the videos - For playlist elements it will process each of the elements of the 'entries' key - + Take the result of the ie(may be modified) and resolve all unresolved + references (URLs, playlist items). + It will also download the videos if 'download'. + Returns the resolved ie_result. """ - result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system + + result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': if 'playlist' not in ie_result: - #It isn't part of a playlist + # It isn't part of a playlist ie_result['playlist'] = None ie_result['playlist_index'] = None if download: - #Do the download: self.process_info(ie_result) return ie_result elif result_type == 'url': - #We get the video pointed by the url - result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0] - return result + # We have to add extra_info to the results because it may be + # contained in a playlist + return self.extract_info(ie_result['url'], + download, + ie_key=ie_result.get('ie_key'), + extra_info=extra_info) elif result_type == 'playlist': - #We process each entry in the playlist + # We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) @@ -533,23 +535,35 @@ class FileDownloader(object): for i,entry in enumerate(entries,1): self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) - entry_result = self.process_ie_result(entry, False) - entry_result['playlist'] = playlist - entry_result['playlist_index'] = i + playliststart - #We must do the download here to correctly set the 'playlist' key - if download: - self.process_info(entry_result) + extra = { + 'playlist': playlist, + 'playlist_index': i + playliststart, + } + entry_result = self.process_ie_result(entry, + download=download, + extra_info=extra) playlist_results.append(entry_result) - result = ie_result.copy() - result['entries'] = playlist_results - return result + ie_result['entries'] = playlist_results + return ie_result + elif result_type == 'compat_list': + def _fixup(r): + r.setdefault('extractor', ie_result['extractor']) + return r + ie_result['entries'] = [ + self.process_ie_result(_fixup(r), download=download) + for r in ie_result['entries'] + ] + return ie_result + else: + raise Exception('Invalid result type: %s' % result_type) def process_info(self, info_dict): - """Process a single dictionary returned by an InfoExtractor.""" + """Process a single resolved IE result.""" + assert info_dict.get('_type', 'video') == 'video' #We increment the download the download count here to match the previous behaviour. self.increment_downloads() - + info_dict['fulltitle'] = info_dict['title'] if len(info_dict['title']) > 200: info_dict['title'] = info_dict['title'][:197] + u'...' @@ -575,6 +589,8 @@ class FileDownloader(object): # Forced printings if self.params.get('forcetitle', False): compat_print(info_dict['title']) + if self.params.get('forceid', False): + compat_print(info_dict['id']) if self.params.get('forceurl', False): compat_print(info_dict['url']) if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: @@ -595,7 +611,7 @@ class FileDownloader(object): try: dn = os.path.dirname(encodeFilename(filename)) - if dn != '' and not os.path.exists(dn): # dn is already encoded + if dn != '' and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: self.report_error(u'unable to create directory ' + compat_str(err)) @@ -628,8 +644,6 @@ class FileDownloader(object): except (OSError, IOError): self.report_error(u'Cannot write subtitles file ' + descfn) return - if self.params.get('onlysubtitles', False): - return if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: subtitles = info_dict['subtitles'] @@ -647,8 +661,6 @@ class FileDownloader(object): except (OSError, IOError): self.report_error(u'Cannot write subtitles file ' + descfn) return - if self.params.get('onlysubtitles', False): - return if self.params.get('writeinfojson', False): infofn = filename + u'.info.json' @@ -736,7 +748,7 @@ class FileDownloader(object): except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') - def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path): + def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -751,12 +763,15 @@ class FileDownloader(object): # the connection was interrumpted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename] + if self.params.get('verbose', False): basic_args[1] = '-v' if player_url is not None: basic_args += ['-W', player_url] if page_url is not None: basic_args += ['--pageUrl', page_url] if play_path is not None: basic_args += ['-y', play_path] + if tc_url is not None: + basic_args += ['--tcUrl', url] args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): try: @@ -812,7 +827,8 @@ class FileDownloader(object): return self._download_with_rtmpdump(filename, url, info_dict.get('player_url', None), info_dict.get('page_url', None), - info_dict.get('play_path', None)) + info_dict.get('play_path', None), + info_dict.get('tc_url', None)) tmpfilename = self.temp_name(filename) stream = None