From: Philipp Hagemeister Date: Sun, 25 Jan 2015 16:55:31 +0000 (+0100) Subject: Merge remote-tracking branch 'David-Development/rtl2.py' X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=d4f64cabf4ede444b390bb71b90ad4103ce572c0;hp=fe41ddbb285abccc3c4d7a3ebc1238c13ec72577 Merge remote-tracking branch 'David-Development/rtl2.py' --- diff --git a/.travis.yml b/.travis.yml index f14014414..fb34299fc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,9 @@ python: - "2.7" - "3.3" - "3.4" +before_install: + - sudo apt-get update -qq + - sudo apt-get install -yqq rtmpdump script: nosetests test --verbose notifications: email: diff --git a/AUTHORS b/AUTHORS index b8bf3cb6f..8362b6d8a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -104,3 +104,4 @@ Ondřej Caletka Dinesh S Johan K. Jensen Yen Chi Hsuan +Enam Mijbah Noor diff --git a/test/helper.py b/test/helper.py index c416f388c..27a68091f 100644 --- a/test/helper.py +++ b/test/helper.py @@ -140,7 +140,7 @@ def expect_info_dict(self, got_dict, expected_dict): # Are checkable fields missing from the test case definition? test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) for key, value in got_dict.items() - if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) + if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): diff --git a/test/test_utils.py b/test/test_utils.py index bdd7f268a..ebec7986f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -52,6 +52,7 @@ from youtube_dl.utils import ( urlencode_postdata, version_tuple, xpath_with_ns, + render_table, ) @@ -434,5 +435,15 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertTrue(is_html( # UTF-32-LE b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) + def test_render_table(self): + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]]), + 'a bcd\n' + '123 4\n' + '9999 51') + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 54e732943..b772f87f1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -54,8 +54,10 @@ from .utils import ( PostProcessingError, platform_name, preferredencoding, + render_table, SameFileError, sanitize_filename, + std_headers, subtitles_filename, takewhile_inclusive, UnavailableVideoError, @@ -135,6 +137,7 @@ class YoutubeDL(object): nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. + playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. @@ -144,6 +147,7 @@ class YoutubeDL(object): writeinfojson: Write the video description to a .info.json file writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file + write_all_thumbnails: Write all thumbnail formats to files writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video @@ -194,11 +198,12 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * filename: The final filename - * status: One of "downloading" and "finished" - - The dict may also have some of the following entries: + * status: One of "downloading" and "finished". + Check this first and ignore unknown values. + If status is one of "downloading" or "finished", the + following properties may also be present: + * filename: The final filename (always present) * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown * tmpfilename: The filename we're currently writing to @@ -220,12 +225,15 @@ class YoutubeDL(object): youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download. external_downloader: Executable of the external downloader to call. + listformats: Print an overview of available video formats and exit. + list_thumbnails: Print a table of all thumbnails and exit. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, - noresizebuffer, retries, continuedl, noprogress, consoletitle + noresizebuffer, retries, continuedl, noprogress, consoletitle, + xattr_set_filesize. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -698,24 +706,51 @@ class YoutubeDL(object): if playlistend == -1: playlistend = None + playlistitems_str = self.params.get('playlist_items', None) + playlistitems = None + if playlistitems_str is not None: + def iter_playlistitems(format): + for string_segment in format.split(','): + if '-' in string_segment: + start, end = string_segment.split('-') + for item in range(int(start), int(end) + 1): + yield int(item) + else: + yield int(string_segment) + playlistitems = iter_playlistitems(playlistitems_str) + ie_entries = ie_result['entries'] if isinstance(ie_entries, list): n_all_entries = len(ie_entries) - entries = ie_entries[playliststart:playlistend] + if playlistitems: + entries = [ie_entries[i - 1] for i in playlistitems] + else: + entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, PagedList): - entries = ie_entries.getslice( - playliststart, playlistend) + if playlistitems: + entries = [] + for item in playlistitems: + entries.extend(ie_entries.getslice( + item - 1, item + )) + else: + entries = ie_entries.getslice( + playliststart, playlistend) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % (ie_result['extractor'], playlist, n_entries)) else: # iterable - entries = list(itertools.islice( - ie_entries, playliststart, playlistend)) + if playlistitems: + entry_list = list(ie_entries) + entries = [entry_list[i - 1] for i in playlistitems] + else: + entries = list(itertools.islice( + ie_entries, playliststart, playlistend)) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % @@ -865,6 +900,42 @@ class YoutubeDL(object): return matches[-1] return None + def _calc_headers(self, info_dict): + res = std_headers.copy() + + add_headers = info_dict.get('http_headers') + if add_headers: + res.update(add_headers) + + cookies = self._calc_cookies(info_dict) + if cookies: + res['Cookie'] = cookies + + return res + + def _calc_cookies(self, info_dict): + class _PseudoRequest(object): + def __init__(self, url): + self.url = url + self.headers = {} + self.unverifiable = False + + def add_unredirected_header(self, k, v): + self.headers[k] = v + + def get_full_url(self): + return self.url + + def is_unverifiable(self): + return self.unverifiable + + def has_header(self, h): + return h in self.headers + + pr = _PseudoRequest(info_dict['url']) + self.cookiejar.add_cookie_header(pr) + return pr.headers.get('Cookie') + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -879,9 +950,14 @@ class YoutubeDL(object): info_dict['playlist_index'] = None thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('width'), t.get('height'), t.get('url'))) + t.get('preference'), t.get('width'), t.get('height'), + t.get('id'), t.get('url'))) for t in thumbnails: if 'width' in t and 'height' in t: t['resolution'] = '%dx%d' % (t['width'], t['height']) @@ -933,6 +1009,11 @@ class YoutubeDL(object): # Automatically determine file extension if missing if 'ext' not in format: format['ext'] = determine_ext(format['url']).lower() + # Add HTTP headers, so that external programs can use them from the + # json output + full_format_info = info_dict.copy() + full_format_info.update(format) + format['http_headers'] = self._calc_headers(full_format_info) format_limit = self.params.get('format_limit', None) if format_limit: @@ -948,9 +1029,12 @@ class YoutubeDL(object): # element in the 'formats' field in info_dict is info_dict itself, # wich can't be exported to json info_dict['formats'] = formats - if self.params.get('listformats', None): + if self.params.get('listformats'): self.list_formats(info_dict) return + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + return req_format = self.params.get('format') if req_format is None: @@ -1157,25 +1241,7 @@ class YoutubeDL(object): self.report_error('Cannot write metadata to JSON file ' + infofn) return - if self.params.get('writethumbnail', False): - if info_dict.get('thumbnail') is not None: - thumb_format = determine_ext(info_dict['thumbnail'], 'jpg') - thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format - if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): - self.to_screen('[%s] %s: Thumbnail is already present' % - (info_dict['extractor'], info_dict['id'])) - else: - self.to_screen('[%s] %s: Downloading thumbnail ...' % - (info_dict['extractor'], info_dict['id'])) - try: - uf = self.urlopen(info_dict['thumbnail']) - with open(thumb_filename, 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen('[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (info_dict['thumbnail'], compat_str(err))) + self._write_thumbnails(info_dict, filename) if not self.params.get('skip_download', False): try: @@ -1186,6 +1252,7 @@ class YoutubeDL(object): if self.params.get('verbose'): self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) + if info_dict.get('requested_formats') is not None: downloaded = [] success = True @@ -1458,8 +1525,26 @@ class YoutubeDL(object): header_line = line({ 'format_id': 'format code', 'ext': 'extension', 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) - self.to_screen('[info] Available formats for %s:\n%s\n%s' % - (info_dict['id'], header_line, '\n'.join(formats_s))) + self.to_screen( + '[info] Available formats for %s:\n%s\n%s' % + (info_dict['id'], header_line, '\n'.join(formats_s))) + + def list_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if not thumbnails: + tn_url = info_dict.get('thumbnail') + if tn_url: + thumbnails = [{'id': '0', 'url': tn_url}] + else: + self.to_screen( + '[info] No thumbnails present for %s' % info_dict['id']) + return + + self.to_screen( + '[info] Thumbnails for %s:' % info_dict['id']) + self.to_screen(render_table( + ['ID', 'width', 'height', 'URL'], + [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) def urlopen(self, req): """ Start an HTTP download """ @@ -1605,3 +1690,39 @@ class YoutubeDL(object): if encoding is None: encoding = preferredencoding() return encoding + + def _write_thumbnails(self, info_dict, filename): + if self.params.get('writethumbnail', False): + thumbnails = info_dict.get('thumbnails') + if thumbnails: + thumbnails = [thumbnails[-1]] + elif self.params.get('write_all_thumbnails', False): + thumbnails = info_dict.get('thumbnails') + else: + return + + if not thumbnails: + # No thumbnails present, so return immediately + return + + for t in thumbnails: + thumb_ext = determine_ext(t['url'], 'jpg') + suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' + thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' + thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext + + if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): + self.to_screen('[%s] %s: Thumbnail %sis already present' % + (info_dict['extractor'], info_dict['id'], thumb_display_id)) + else: + self.to_screen('[%s] %s: Downloading thumbnail %s...' % + (info_dict['extractor'], info_dict['id'], thumb_display_id)) + try: + uf = self.urlopen(t['url']) + with open(thumb_filename, 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % + (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning('Unable to download thumbnail "%s": %s' % + (t['url'], compat_str(err))) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3fc7dc5c2..71d2c6f35 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -143,10 +143,13 @@ def _real_main(argv=None): parser.error('invalid max_filesize specified') opts.max_filesize = numeric_limit if opts.retries is not None: - try: - opts.retries = int(opts.retries) - except (TypeError, ValueError): - parser.error('invalid retry count specified') + if opts.retries in ('inf', 'infinite'): + opts_retries = float('inf') + else: + try: + opts_retries = int(opts.retries) + except (TypeError, ValueError): + parser.error('invalid retry count specified') if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -238,6 +241,12 @@ def _real_main(argv=None): 'verboseOutput': opts.verbose, 'exec_cmd': opts.exec_cmd, }) + if opts.xattr_set_filesize: + try: + import xattr + xattr # Confuse flake8 + except ImportError: + parser.error('setting filesize xattr requested but python-xattr is not available') ydl_opts = { 'usenetrc': opts.usenetrc, @@ -268,7 +277,7 @@ def _real_main(argv=None): 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, - 'retries': opts.retries, + 'retries': opts_retries, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, @@ -286,6 +295,7 @@ def _real_main(argv=None): 'writeannotations': opts.writeannotations, 'writeinfojson': opts.writeinfojson, 'writethumbnail': opts.writethumbnail, + 'write_all_thumbnails': opts.write_all_thumbnails, 'writesubtitles': opts.writesubtitles, 'writeautomaticsub': opts.writeautomaticsub, 'allsubtitles': opts.allsubtitles, @@ -331,6 +341,9 @@ def _real_main(argv=None): 'call_home': opts.call_home, 'sleep_interval': opts.sleep_interval, 'external_downloader': opts.external_downloader, + 'list_thumbnails': opts.list_thumbnails, + 'playlist_items': opts.playlist_items, + 'xattr_set_filesize': opts.xattr_set_filesize, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index c35c42c1d..7bb3a948d 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -25,21 +25,23 @@ class FileDownloader(object): Available options: - verbose: Print additional info to stdout. - quiet: Do not print messages to stdout. - ratelimit: Download speed limit, in bytes/sec. - retries: Number of times to retry for HTTP error 5xx - buffersize: Size of download buffer in bytes. - noresizebuffer: Do not automatically resize the download buffer. - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - test: Download only first bytes to test the downloader. - min_filesize: Skip files smaller than this size - max_filesize: Skip files larger than this size + verbose: Print additional info to stdout. + quiet: Do not print messages to stdout. + ratelimit: Download speed limit, in bytes/sec. + retries: Number of times to retry for HTTP error 5xx + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + test: Download only first bytes to test the downloader. + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size + xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. + (experimenatal) Subclasses of this one must re-define the real_download method. """ diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 7ebe40096..af9fdba75 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -7,7 +7,6 @@ import sys from .common import FileDownloader from ..utils import ( encodeFilename, - std_headers, ) @@ -46,42 +45,6 @@ class ExternalFD(FileDownloader): def supports(cls, info_dict): return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') - def _calc_headers(self, info_dict): - res = std_headers.copy() - - ua = info_dict.get('user_agent') - if ua is not None: - res['User-Agent'] = ua - - cookies = self._calc_cookies(info_dict) - if cookies: - res['Cookie'] = cookies - - return res - - def _calc_cookies(self, info_dict): - class _PseudoRequest(object): - def __init__(self, url): - self.url = url - self.headers = {} - self.unverifiable = False - - def add_unredirected_header(self, k, v): - self.headers[k] = v - - def get_full_url(self): - return self.url - - def is_unverifiable(self): - return self.unverifiable - - def has_header(self, h): - return h in self.headers - - pr = _PseudoRequest(info_dict['url']) - self.ydl.cookiejar.add_cookie_header(pr) - return pr.headers.get('Cookie') - def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = self._make_cmd(tmpfilename, info_dict) @@ -107,7 +70,7 @@ class ExternalFD(FileDownloader): class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-o', tmpfilename] - for key, val in self._calc_headers(info_dict).items(): + for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--', info_dict['url']] return cmd @@ -116,7 +79,7 @@ class CurlFD(ExternalFD): class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] - for key, val in self._calc_headers(info_dict).items(): + for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--', info_dict['url']] return cmd @@ -131,7 +94,7 @@ class Aria2cFD(ExternalFD): if dn: cmd += ['--dir', dn] cmd += ['--out', os.path.basename(tmpfilename)] - for key, val in self._calc_headers(info_dict).items(): + for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--', info_dict['url']] return cmd diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index e68f20c9f..8a1d578d5 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -24,10 +24,6 @@ class HttpFD(FileDownloader): # Do not include the Accept-Encoding header headers = {'Youtubedl-no-compression': 'True'} - if 'user_agent' in info_dict: - headers['Youtubedl-user-agent'] = info_dict['user_agent'] - if 'http_referer' in info_dict: - headers['Referer'] = info_dict['http_referer'] add_headers = info_dict.get('http_headers') if add_headers: headers.update(add_headers) @@ -161,6 +157,14 @@ class HttpFD(FileDownloader): except (OSError, IOError) as err: self.report_error('unable to open for writing: %s' % str(err)) return False + + if self.params.get('xattr_set_filesize', False) and data_len is not None: + try: + import xattr + xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + except(OSError, IOError, ImportError) as err: + self.report_error('unable to set filesize xattr: %s' % str(err)) + try: stream.write(data_block) except (IOError, OSError) as err: diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 7cd0482c7..70621946d 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -129,7 +129,9 @@ class AppleTrailersIE(InfoExtractor): 'thumbnail': thumbnail, 'upload_date': upload_date, 'uploader_id': uploader_id, - 'user_agent': 'QuickTime compatible (youtube-dl)', + 'http_headers': { + 'User-Agent': 'QuickTime compatible (youtube-dl)', + }, }) return { diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index f42862be3..f016368fa 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -95,7 +95,7 @@ class AtresPlayerIE(SubtitlesInfoExtractor): for fmt in ['windows', 'android_tablet']: request = compat_urllib_request.Request( self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token)) - request.add_header('Youtubedl-user-agent', self._USER_AGENT) + request.add_header('User-Agent', self._USER_AGENT) fmt_json = self._download_json( request, video_id, 'Downloading %s video JSON' % fmt) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 8bfe50214..693ba22c6 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor): # Album playlist ripped from fakeshoredrive with no metadata { 'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project', + 'info_dict': { + 'title': 'PPP (Pistol P Project)', + 'id': '837572', + }, 'playlist': [{ 'info_dict': { - 'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', - 'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', + 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)', + 'id': '837577', 'ext': 'mp3', + 'uploader': 'Lil Herb a.k.a. G Herbo', } }], 'params': { - 'playliststart': 8, - 'playlistend': 8, + 'playliststart': 9, + 'playlistend': 9, } } ] diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 14b814120..436cc5155 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor): # For some weird reason, blip.tv serves a video instead of subtitles # when we request with a common UA req = compat_urllib_request.Request(url) - req.add_header('Youtubedl-user-agent', 'youtube-dl') + req.add_header('User-Agent', 'youtube-dl') return self._download_webpage(req, None, note=False) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 03f3f18c8..7b7a832dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,7 +108,6 @@ class InfoExtractor(object): (quality takes higher priority) -1 for default (order by other properties), -2 or smaller for less than default. - * http_referer HTTP Referer header value to set. * http_method HTTP method to use for the download. * http_headers A dictionary of additional HTTP headers to add to the request. @@ -130,7 +129,9 @@ class InfoExtractor(object): something like "4234987", title "Dancing naked mole rats", and display_id "dancing-naked-mole-rats" thumbnails: A list of dictionaries, with the following entries: + * "id" (optional, string) - Thumbnail format ID * "url" + * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) * "resolution" (optional, string "{width}x{height"}, diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 81ceace53..1ccc1a964 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -5,6 +5,7 @@ import hashlib from .common import InfoExtractor from ..compat import ( + compat_urllib_parse, compat_urllib_request, compat_urlparse, ) @@ -16,7 +17,8 @@ from ..utils import ( class FC2IE(InfoExtractor): _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P[^/]+)' IE_NAME = 'fc2' - _TEST = { + _NETRC_MACHINE = 'fc2' + _TESTS = [{ 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', 'md5': 'a6ebe8ebe0396518689d963774a54eb7', 'info_dict': { @@ -24,12 +26,57 @@ class FC2IE(InfoExtractor): 'ext': 'flv', 'title': 'Boxing again with Puff', }, - } + }, { + 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/', + 'info_dict': { + 'id': '20150125cEva0hDn', + 'ext': 'mp4', + }, + 'params': { + 'username': 'ytdl@yt-dl.org', + 'password': '(snip)', + 'skip': 'requires actual password' + } + }] + + def _login(self): + (username, password) = self._get_login_info() + if username is None or password is None: + return False + + # Log in + login_form_strs = { + 'email': username, + 'password': password, + 'done': 'video', + 'Submit': ' Login ', + } + + # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode + # chokes on unicode + login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) + login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') + request = compat_urllib_request.Request( + 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) + + login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') + if 'mode=redirect&login=done' not in login_results: + self.report_warning('unable to log in: bad username or password') + return False + + # this is also needed + login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done') + self._download_webpage( + login_redir, None, note='Login redirect', errnote='Login redirect failed') + + return True def _real_extract(self, url): video_id = self._match_id(url) + self._login() webpage = self._download_webpage(url, video_id) self._downloader.cookiejar.clear_session_cookies() # must clear + self._login() title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) @@ -46,7 +93,12 @@ class FC2IE(InfoExtractor): info = compat_urlparse.parse_qs(info_webpage) if 'err_code' in info: - raise ExtractorError('Error code: %s' % info['err_code'][0]) + # most of the time we can still download wideo even if err_code is 403 or 602 + self.report_warning( + 'Error code was: %s... but still trying' % info['err_code'][0]) + + if 'filepath' not in info: + raise ExtractorError('Cannot download file. Are you logged in?') video_url = info['filepath'][0] + '?mid=' + info['mid'][0] title_info = info.get('title') diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py index 68e2db943..0fb29de75 100644 --- a/youtube_dl/extractor/folketinget.py +++ b/youtube_dl/extractor/folketinget.py @@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P[0-9]+)\.aspx' _TEST = { 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player', + 'md5': '6269e8626fa1a891bf5369b386ae996a', 'info_dict': { 'id': '1165642', 'ext': 'mp4', @@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor): 'upload_date': '20141120', 'duration': 3960, }, - 'params': { - 'skip_download': 'rtmpdump required', - } } def _real_extract(self, url): diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index 6f3d2345b..e46954b47 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -2,18 +2,17 @@ from __future__ import unicode_literals import json -import re from .common import InfoExtractor from ..utils import ( int_or_none, - unescapeHTML, + js_to_json, ) class KrasViewIE(InfoExtractor): IE_DESC = 'Красвью' - _VALID_URL = r'https?://krasview\.ru/video/(?P\d+)' + _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P\d+)' _TEST = { 'url': 'http://krasview.ru/video/512228', @@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - flashvars = json.loads(self._search_regex( - r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars')) + flashvars = json.loads(js_to_json(self._search_regex( + r'video_Init\(({.+?})', webpage, 'flashvars'))) video_url = flashvars['url'] - title = unescapeHTML(flashvars['title']) - description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None)) - thumbnail = flashvars['image'] - duration = int(flashvars['duration']) - filesize = int(flashvars['size']) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage, default=None) + thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage) + duration = int_or_none(flashvars.get('duration')) width = int_or_none(self._og_search_property('video:width', webpage, 'video width')) height = int_or_none(self._og_search_property('video:height', webpage, 'video height')) @@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'duration': duration, - 'filesize': filesize, 'width': width, 'height': height, } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5ebc78033..22a726327 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor): webpage_url = self._MOBILE_TEMPLATE % mtvn_id req = compat_urllib_request.Request(webpage_url) # Otherwise we get a webpage that would execute some javascript - req.add_header('Youtubedl-user-agent', 'curl/7') + req.add_header('User-Agent', 'curl/7') webpage = self._download_webpage(req, mtvn_id, 'Downloading mobile page') metrics_url = unescapeHTML(self._search_regex(r'[\da-z_-]+)\.html' _TEST = { 'url': 'http://ubu.com/film/her_noise.html', - 'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9', + 'md5': '138d5652618bf0f03878978db9bef1ee', 'info_dict': { 'id': 'her_noise', - 'ext': 'mp4', + 'ext': 'm4v', 'title': 'Her Noise - The Making Of (2007)', 'duration': 3600, }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex( r'.+?Film & Video: ([^<]+)', webpage, 'title') duration = int_or_none(self._html_search_regex( - r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None)) - if duration: - duration *= 60 + r'Duration: (\d+) minutes', webpage, 'duration', fatal=False), + invscale=60) formats = [] - FORMAT_REGEXES = [ - ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"], - ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'] + ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"), + ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'), ] - + preference = qualities([fid for fid, _ in FORMAT_REGEXES]) for format_id, format_regex in FORMAT_REGEXES: m = re.search(format_regex, webpage) if m: formats.append({ 'url': m.group(1), 'format_id': format_id, + 'preference': preference(format_id), }) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index fc6e05fe0..273030316 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -62,5 +62,7 @@ class VideoMegaIE(InfoExtractor): 'title': title, 'formats': formats, 'thumbnail': thumbnail, - 'http_referer': iframe_url, + 'http_headers': { + 'Referer': iframe_url, + }, } diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 45466e31b..313b9c15d 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -169,7 +169,9 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, - 'user_agent': 'mobile', + 'http_headers': { + 'User-Agent': 'mobile', + }, } diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b38b8349f..dbc6f5528 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -200,6 +200,10 @@ def parseOpts(overrideArguments=None): '--playlist-end', dest='playlistend', metavar='NUMBER', default=None, type=int, help='playlist video to end at (default is last)') + selection.add_option( + '--playlist-items', + dest='playlist_items', metavar='ITEM_SPEC', default=None, + help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', @@ -373,7 +377,7 @@ def parseOpts(overrideArguments=None): downloader.add_option( '-R', '--retries', dest='retries', metavar='RETRIES', default=10, - help='number of retries (default is %default)') + help='number of retries (default is %default), or "infinite".') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', @@ -390,6 +394,10 @@ def parseOpts(overrideArguments=None): '--playlist-reverse', action='store_true', help='Download playlist videos in reverse order') + downloader.add_option( + '--xattr-set-filesize', + dest='xattr_set_filesize', action='store_true', + help='(experimental) set file xattribute ytdl.filesize with expected filesize') downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', @@ -614,10 +622,6 @@ def parseOpts(overrideArguments=None): '--write-annotations', action='store_true', dest='writeannotations', default=False, help='write video annotations to a .annotation file') - filesystem.add_option( - '--write-thumbnail', - action='store_true', dest='writethumbnail', default=False, - help='write thumbnail image to disk') filesystem.add_option( '--load-info', dest='load_info_filename', metavar='FILE', @@ -637,6 +641,20 @@ def parseOpts(overrideArguments=None): action='store_true', dest='rm_cachedir', help='Delete all filesystem cache files') + thumbnail = optparse.OptionGroup(parser, 'Thumbnail images') + thumbnail.add_option( + '--write-thumbnail', + action='store_true', dest='writethumbnail', default=False, + help='write thumbnail image to disk') + thumbnail.add_option( + '--write-all-thumbnails', + action='store_true', dest='write_all_thumbnails', default=False, + help='write all thumbnail image formats to disk') + thumbnail.add_option( + '--list-thumbnails', + action='store_true', dest='list_thumbnails', default=False, + help='Simulate and list all available thumbnail formats') + postproc = optparse.OptionGroup(parser, 'Post-processing Options') postproc.add_option( '-x', '--extract-audio', @@ -702,6 +720,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(selection) parser.add_option_group(downloader) parser.add_option_group(filesystem) + parser.add_option_group(thumbnail) parser.add_option_group(verbosity) parser.add_option_group(workarounds) parser.add_option_group(video_format) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2970d02a1..b8c52af74 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -606,11 +606,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): if 'Accept-encoding' in req.headers: del req.headers['Accept-encoding'] del req.headers['Youtubedl-no-compression'] - if 'Youtubedl-user-agent' in req.headers: - if 'User-agent' in req.headers: - del req.headers['User-agent'] - req.headers['User-agent'] = req.headers['Youtubedl-user-agent'] - del req.headers['Youtubedl-user-agent'] if sys.version_info < (2, 7) and '#' in req.get_full_url(): # Python 2.6 is brain-dead when it comes to fragments @@ -1664,3 +1659,11 @@ def determine_protocol(info_dict): return 'f4m' return compat_urllib_parse_urlparse(url).scheme + + +def render_table(header_row, data): + """ Render a list of rows, each as a list of values """ + table = [header_row] + data + max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] + format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' + return '\n'.join(format_str % tuple(row) for row in table)