From: remitamine Date: Wed, 16 Mar 2016 12:16:27 +0000 (+0100) Subject: Merge pull request #8092 from bpfoley/twitter-thumbnail X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=83548824c29ccdf53a4659260aa3898939833882;hp=8bb56eeeea8154f811076c0a9093203fab224003 Merge pull request #8092 from bpfoley/twitter-thumbnail [utils] Add extract_attributes for extracting html tag attributes --- diff --git a/.gitignore b/.gitignore index 0422adf44..26dbde73d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.pyc *.pyo +*.class *~ *.DS_Store wine-py2exe/ @@ -32,4 +33,4 @@ test/testdata .tox youtube-dl.zsh .idea -.idea/* \ No newline at end of file +.idea/* diff --git a/AUTHORS b/AUTHORS index b51e23f2d..aa48cd5a6 100644 --- a/AUTHORS +++ b/AUTHORS @@ -161,3 +161,5 @@ Jens Wille Robin Houtevelts Patrick Griffis Aidan Rowe +mutantmonkey +Ben Congdon diff --git a/Makefile b/Makefile index cb449b7e6..e98806791 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete + find . -name "*.class" -delete PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin @@ -44,7 +45,7 @@ test: ot: offlinetest offlinetest: codetest - nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py tar: youtube-dl.tar.gz diff --git a/README.md b/README.md index d66804a7a..68db546ef 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,8 @@ which means you can modify it, redistribute it or use it however you like. on Windows) --flat-playlist Do not extract the videos of a playlist, only list them. + --mark-watched Mark videos watched (YouTube only) + --no-mark-watched Do not mark videos watched (YouTube only) --no-color Do not emit color codes in output ## Network Options: @@ -179,7 +181,7 @@ which means you can modify it, redistribute it or use it however you like. to play it) --external-downloader COMMAND Use the specified external downloader. Currently supports - aria2c,axel,curl,httpie,wget + aria2c,avconv,axel,curl,ffmpeg,httpie,wget --external-downloader-args ARGS Give these arguments to the external downloader diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 43403233d..a6dcc2576 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -54,6 +54,7 @@ - **AtresPlayer** - **ATTTechChannel** - **AudiMedia** + - **AudioBoom** - **audiomack** - **audiomack:album** - **Azubu** @@ -167,6 +168,8 @@ - **Dump** - **Dumpert** - **dvtv**: http://video.aktualne.cz/ + - **dw** + - **dw:article** - **EaglePlatform** - **EbaumsWorld** - **EchoMsk** @@ -190,10 +193,10 @@ - **ExpoTV** - **ExtremeTube** - **facebook** - - **facebook:post** - **faz.net** - **fc2** - **Fczenit** + - **features.aol.com** - **fernsehkritik.tv** - **Firstpost** - **FiveTV** @@ -293,6 +296,7 @@ - **kontrtube**: KontrTube.ru - Труба зовёт - **KrasView**: Красвью - **Ku6** + - **KUSI** - **kuwo:album**: 酷我音乐 - 专辑 - **kuwo:category**: 酷我音乐 - 分类 - **kuwo:chart**: 酷我音乐 - 排行榜 @@ -301,12 +305,11 @@ - **kuwo:song**: 酷我音乐 - **la7.tv** - **Laola1Tv** + - **Le**: 乐视网 - **Lecture2Go** - **Lemonde** - - **Letv**: 乐视网 + - **LePlaylist** - **LetvCloud**: 乐视云 - - **LetvPlaylist** - - **LetvTv** - **Libsyn** - **life:embed** - **lifenews**: LIFE | NEWS @@ -324,6 +327,7 @@ - **m6** - **macgamestore**: MacGameStore trailers - **mailru**: Видео@Mail.Ru + - **MakersChannel** - **MakerTV** - **Malemotion** - **MatchTV** @@ -334,6 +338,7 @@ - **Mgoon** - **Minhateca** - **MinistryGrid** + - **Minoto** - **miomio.tv** - **MiTele**: mitele.es - **mixcloud** @@ -421,6 +426,7 @@ - **Npr** - **NRK** - **NRKPlaylist** + - **NRKSkole**: NRK Skole - **NRKTV**: NRK TV and NRK Radio - **ntv.ru** - **Nuvid** @@ -669,6 +675,7 @@ - **UDNEmbed**: 聯合影音 - **Unistra** - **Urort**: NRK P3 Urørt + - **USAToday** - **ustream** - **ustream:channel** - **Ustudio** @@ -682,6 +689,7 @@ - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **Vice** + - **ViceShow** - **Viddler** - **video.google:search**: Google Video search - **video.mit.edu** @@ -709,6 +717,7 @@ - **vimeo:channel** - **vimeo:group** - **vimeo:likes**: Vimeo user likes + - **vimeo:ondemand** - **vimeo:review**: Review pages on vimeo - **vimeo:user** - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication) diff --git a/test/helper.py b/test/helper.py index bdd7acca4..f2d878212 100644 --- a/test/helper.py +++ b/test/helper.py @@ -11,8 +11,11 @@ import sys import youtube_dl.extractor from youtube_dl import YoutubeDL -from youtube_dl.utils import ( +from youtube_dl.compat import ( + compat_os_name, compat_str, +) +from youtube_dl.utils import ( preferredencoding, write_string, ) @@ -42,7 +45,7 @@ def report_warning(message): Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored ''' - if sys.stderr.isatty() and os.name != 'nt': + if sys.stderr.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 59f7ab49d..efbee3b71 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -502,6 +502,9 @@ class TestYoutubeDL(unittest.TestCase): assertRegexpMatches(self, ydl._format_note({ 'vbr': 10, }), '^\s*10k$') + assertRegexpMatches(self, ydl._format_note({ + 'fps': 30, + }), '^30fps$') def test_postprocessors(self): filename = 'post-processor-testfile.mp4' diff --git a/test/test_http.py b/test/test_http.py index f2e305b6f..fc59b1aed 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -52,7 +52,12 @@ class TestHTTP(unittest.TestCase): ('localhost', 0), HTTPTestRequestHandler) self.httpd.socket = ssl.wrap_socket( self.httpd.socket, certfile=certfn, server_side=True) - self.port = self.httpd.socket.getsockname()[1] + if os.name == 'java': + # In Jython SSLSocket is not a subclass of socket.socket + sock = self.httpd.socket.sock + else: + sock = self.httpd.socket + self.port = sock.getsockname()[1] self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() diff --git a/test/test_utils.py b/test/test_utils.py index cb85e18f0..5a0109977 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -42,6 +42,7 @@ from youtube_dl.utils import ( orderedSet, parse_duration, parse_filesize, + parse_count, parse_iso8601, read_batch_urls, sanitize_filename, @@ -62,6 +63,7 @@ from youtube_dl.utils import ( lowercase_escape, url_basename, urlencode_postdata, + update_url_query, version_tuple, xpath_with_ns, xpath_element, @@ -78,6 +80,8 @@ from youtube_dl.utils import ( from youtube_dl.compat import ( compat_chr, compat_etree_fromstring, + compat_urlparse, + compat_parse_qs, ) @@ -456,6 +460,40 @@ class TestUtil(unittest.TestCase): data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) self.assertTrue(isinstance(data, bytes)) + def test_update_url_query(self): + def query_dict(url): + return compat_parse_qs(compat_urlparse.urlparse(url).query) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), + query_dict('http://example.com/path?quality=HD&format=mp4')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), + query_dict('http://example.com/path?system=LINUX&system=WINDOWS')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'fields': 'id,formats,subtitles'})), + query_dict('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), + query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path?manifest=f4m', {'manifest': []})), + query_dict('http://example.com/path')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), + query_dict('http://example.com/path?system=LINUX')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'fields': b'id,formats,subtitles'})), + query_dict('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'width': 1080, 'height': 720})), + query_dict('http://example.com/path?width=1080&height=720')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'bitrate': 5020.43})), + query_dict('http://example.com/path?bitrate=5020.43')) + self.assertEqual(query_dict(update_url_query( + 'http://example.com/path', {'test': '第二行тест'})), + query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) + def test_dict_get(self): FALSE_VALUES = { 'none': None, @@ -656,6 +694,15 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) self.assertEqual(parse_filesize('1,24 KB'), 1240) + def test_parse_count(self): + self.assertEqual(parse_count(None), None) + self.assertEqual(parse_count(''), None) + self.assertEqual(parse_count('0'), 0) + self.assertEqual(parse_count('1000'), 1000) + self.assertEqual(parse_count('1.000'), 1000) + self.assertEqual(parse_count('1.1k'), 1100) + self.assertEqual(parse_count('1.1kk'), 1100000) + def test_version_tuple(self): self.assertEqual(version_tuple('1'), (1,)) self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8f3a8b9e3..8c651cd52 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -24,9 +24,6 @@ import time import tokenize import traceback -if os.name == 'nt': - import ctypes - from .compat import ( compat_basestring, compat_cookiejar, @@ -34,6 +31,7 @@ from .compat import ( compat_get_terminal_size, compat_http_client, compat_kwargs, + compat_os_name, compat_str, compat_tokenize_tokenize, compat_urllib_error, @@ -87,6 +85,7 @@ from .extractor import get_info_extractor, gen_extractors from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( + FFmpegFixupM3u8PP, FFmpegFixupM4aPP, FFmpegFixupStretchedPP, FFmpegMergerPP, @@ -95,6 +94,9 @@ from .postprocessor import ( ) from .version import __version__ +if compat_os_name == 'nt': + import ctypes + class YoutubeDL(object): """YoutubeDL class. @@ -450,7 +452,7 @@ class YoutubeDL(object): def to_console_title(self, message): if not self.params.get('consoletitle', False): return - if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): + if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) @@ -521,7 +523,7 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' @@ -533,7 +535,7 @@ class YoutubeDL(object): Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': + if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;31mERROR:\033[0m' else: _msg_header = 'ERROR:' @@ -566,7 +568,7 @@ class YoutubeDL(object): elif template_dict.get('height'): template_dict['resolution'] = '%sp' % template_dict['height'] elif template_dict.get('width'): - template_dict['resolution'] = '?x%d' % template_dict['width'] + template_dict['resolution'] = '%dx?' % template_dict['width'] sanitize = lambda k, v: sanitize_filename( compat_str(v), @@ -1232,6 +1234,10 @@ class YoutubeDL(object): if t.get('id') is None: t['id'] = '%d' % i + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + return + if thumbnails and 'thumbnail' not in info_dict: info_dict['thumbnail'] = thumbnails[-1]['url'] @@ -1333,9 +1339,6 @@ class YoutubeDL(object): if self.params.get('listformats'): self.list_formats(info_dict) return - if self.params.get('list_thumbnails'): - self.list_thumbnails(info_dict) - return req_format = self.params.get('format') if req_format is None: @@ -1637,6 +1640,8 @@ class YoutubeDL(object): if fixup_policy is None: fixup_policy = 'detect_or_warn' + INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.' + stretched_ratio = info_dict.get('stretched_ratio') if stretched_ratio is not None and stretched_ratio != 1: if fixup_policy == 'warn': @@ -1649,15 +1654,18 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(stretched_pp) else: self.report_warning( - '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( - info_dict['id'], stretched_ratio)) + '%s: Non-uniform pixel ratio (%s). %s' + % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') - if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': + if (info_dict.get('requested_formats') is None and + info_dict.get('container') == 'm4a_dash'): if fixup_policy == 'warn': - self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( - info_dict['id'])) + self.report_warning( + '%s: writing DASH m4a. ' + 'Only some players support this container.' + % info_dict['id']) elif fixup_policy == 'detect_or_warn': fixup_pp = FFmpegFixupM4aPP(self) if fixup_pp.available: @@ -1665,8 +1673,27 @@ class YoutubeDL(object): info_dict['__postprocessors'].append(fixup_pp) else: self.report_warning( - '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( - info_dict['id'])) + '%s: writing DASH m4a. ' + 'Only some players support this container. %s' + % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) + else: + assert fixup_policy in ('ignore', 'never') + + if (info_dict.get('protocol') == 'm3u8_native' or + info_dict.get('protocol') == 'm3u8' and + self.params.get('hls_prefer_native')): + if fixup_policy == 'warn': + self.report_warning('%s: malformated aac bitstream.' % ( + info_dict['id'])) + elif fixup_policy == 'detect_or_warn': + fixup_pp = FFmpegFixupM3u8PP(self) + if fixup_pp.available: + info_dict.setdefault('__postprocessors', []) + info_dict['__postprocessors'].append(fixup_pp) + else: + self.report_warning( + '%s: malformated aac bitstream. %s' + % (info_dict['id'], INSTALL_FFMPEG_MESSAGE)) else: assert fixup_policy in ('ignore', 'never') @@ -1830,7 +1857,9 @@ class YoutubeDL(object): if fdict.get('vbr') is not None: res += '%4dk' % fdict['vbr'] if fdict.get('fps') is not None: - res += ', %sfps' % fdict['fps'] + if res: + res += ', ' + res += '%sfps' % fdict['fps'] if fdict.get('acodec') is not None: if res: res += ', ' @@ -1873,13 +1902,8 @@ class YoutubeDL(object): def list_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if not thumbnails: - tn_url = info_dict.get('thumbnail') - if tn_url: - thumbnails = [{'id': '0', 'url': tn_url}] - else: - self.to_screen( - '[info] No thumbnails present for %s' % info_dict['id']) - return + self.to_screen('[info] No thumbnails present for %s' % info_dict['id']) + return self.to_screen( '[info] Thumbnails for %s:' % info_dict['id']) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 7b9afc36d..74702786a 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -331,6 +331,9 @@ def compat_ord(c): return ord(c) +compat_os_name = os._name if os.name == 'java' else os.name + + if sys.version_info >= (3, 0): compat_getenv = os.getenv compat_expanduser = os.path.expanduser @@ -351,7 +354,7 @@ else: # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib # for different platforms with correct environment variables decoding. - if os.name == 'posix': + if compat_os_name == 'posix': def compat_expanduser(path): """Expand ~ and ~user constructions. If user or $HOME is unknown, do nothing.""" @@ -375,7 +378,7 @@ else: userhome = pwent.pw_dir userhome = userhome.rstrip('/') return (userhome + path[i:]) or '/' - elif os.name == 'nt' or os.name == 'ce': + elif compat_os_name == 'nt' or compat_os_name == 'ce': def compat_expanduser(path): """Expand ~ and ~user constructs. @@ -562,6 +565,7 @@ __all__ = [ 'compat_itertools_count', 'compat_kwargs', 'compat_ord', + 'compat_os_name', 'compat_parse_qs', 'compat_print', 'compat_shlex_split', diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index dccc59212..73b34fdae 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -1,14 +1,16 @@ from __future__ import unicode_literals from .common import FileDownloader -from .external import get_external_downloader from .f4m import F4mFD from .hls import HlsFD -from .hls import NativeHlsFD from .http import HttpFD -from .rtsp import RtspFD from .rtmp import RtmpFD from .dash import DashSegmentsFD +from .rtsp import RtspFD +from .external import ( + get_external_downloader, + FFmpegFD, +) from ..utils import ( determine_protocol, @@ -16,8 +18,8 @@ from ..utils import ( PROTOCOL_MAP = { 'rtmp': RtmpFD, - 'm3u8_native': NativeHlsFD, - 'm3u8': HlsFD, + 'm3u8_native': HlsFD, + 'm3u8': FFmpegFD, 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, @@ -30,14 +32,17 @@ def get_suitable_downloader(info_dict, params={}): protocol = determine_protocol(info_dict) info_dict['protocol'] = protocol + # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): + # return FFmpegFD + external_downloader = params.get('external_downloader') if external_downloader is not None: ed = get_external_downloader(external_downloader) - if ed.supports(info_dict): + if ed.can_download(info_dict): return ed if protocol == 'm3u8' and params.get('hls_prefer_native'): - return NativeHlsFD + return HlsFD return PROTOCOL_MAP.get(protocol, HttpFD) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 2d5154051..f39db58f6 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -5,6 +5,7 @@ import re import sys import time +from ..compat import compat_os_name from ..utils import ( encodeFilename, error_to_compat_str, @@ -219,7 +220,7 @@ class FileDownloader(object): if self.params.get('progress_with_newline', False): self.to_screen(fullmsg) else: - if os.name == 'nt': + if compat_os_name == 'nt': prev_len = getattr(self, '_report_progress_prev_line_length', 0) if prev_len > len(fullmsg): diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 2bc011266..30277dc20 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -2,8 +2,11 @@ from __future__ import unicode_literals import os.path import subprocess +import sys +import re from .common import FileDownloader +from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( cli_option, cli_valueless_option, @@ -11,6 +14,8 @@ from ..utils import ( cli_configuration_args, encodeFilename, encodeArgument, + handle_youtubedl_headers, + check_executable, ) @@ -45,10 +50,18 @@ class ExternalFD(FileDownloader): def exe(self): return self.params.get('external_downloader') + @classmethod + def available(cls): + return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT]) + @classmethod def supports(cls, info_dict): return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') + @classmethod + def can_download(cls, info_dict): + return cls.available() and cls.supports(info_dict) + def _option(self, command_option, param): return cli_option(self.params, command_option, param) @@ -76,6 +89,8 @@ class ExternalFD(FileDownloader): class CurlFD(ExternalFD): + AVAILABLE_OPT = '-V' + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '--location', '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): @@ -89,6 +104,8 @@ class CurlFD(ExternalFD): class AxelFD(ExternalFD): + AVAILABLE_OPT = '-V' + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): @@ -99,6 +116,8 @@ class AxelFD(ExternalFD): class WgetFD(ExternalFD): + AVAILABLE_OPT = '--version' + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] for key, val in info_dict['http_headers'].items(): @@ -112,6 +131,8 @@ class WgetFD(ExternalFD): class Aria2cFD(ExternalFD): + AVAILABLE_OPT = '-v' + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c'] cmd += self._configuration_args([ @@ -130,12 +151,112 @@ class Aria2cFD(ExternalFD): class HttpieFD(ExternalFD): + @classmethod + def available(cls): + return check_executable('http', ['--version']) + def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] for key, val in info_dict['http_headers'].items(): cmd += ['%s:%s' % (key, val)] return cmd + +class FFmpegFD(ExternalFD): + @classmethod + def supports(cls, info_dict): + return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') + + @classmethod + def available(cls): + return FFmpegPostProcessor().available + + def _call_downloader(self, tmpfilename, info_dict): + url = info_dict['url'] + ffpp = FFmpegPostProcessor(downloader=self) + if not ffpp.available: + self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') + return False + ffpp.check_version() + + args = [ffpp.executable, '-y'] + + args += self._configuration_args() + + # start_time = info_dict.get('start_time') or 0 + # if start_time: + # args += ['-ss', compat_str(start_time)] + # end_time = info_dict.get('end_time') + # if end_time: + # args += ['-t', compat_str(end_time - start_time)] + + if info_dict['http_headers'] and re.match(r'^https?://', url): + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + headers = handle_youtubedl_headers(info_dict['http_headers']) + args += [ + '-headers', + ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + + protocol = info_dict.get('protocol') + + if protocol == 'rtmp': + player_url = info_dict.get('player_url') + page_url = info_dict.get('page_url') + app = info_dict.get('app') + play_path = info_dict.get('play_path') + tc_url = info_dict.get('tc_url') + flash_version = info_dict.get('flash_version') + live = info_dict.get('rtmp_live', False) + if player_url is not None: + args += ['-rtmp_swfverify', player_url] + if page_url is not None: + args += ['-rtmp_pageurl', page_url] + if app is not None: + args += ['-rtmp_app', app] + if play_path is not None: + args += ['-rtmp_playpath', play_path] + if tc_url is not None: + args += ['-rtmp_tcurl', tc_url] + if flash_version is not None: + args += ['-rtmp_flashver', flash_version] + if live: + args += ['-rtmp_live', 'live'] + + args += ['-i', url, '-c', 'copy'] + if protocol == 'm3u8': + if self.params.get('hls_use_mpegts', False): + args += ['-f', 'mpegts'] + else: + args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + elif protocol == 'rtmp': + args += ['-f', 'flv'] + else: + args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] + + args = [encodeArgument(opt) for opt in args] + args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) + + self._debug_cmd(args) + + proc = subprocess.Popen(args, stdin=subprocess.PIPE) + try: + retval = proc.wait() + except KeyboardInterrupt: + # subprocces.run would send the SIGKILL signal to ffmpeg and the + # mp4 file couldn't be played, but if we ask ffmpeg to quit it + # produces a file that is playable (this is mostly useful for live + # streams). Note that Windows is not affected and produces playable + # files (see https://github.com/rg3/youtube-dl/issues/8300). + if sys.platform != 'win32': + proc.communicate(b'q') + raise + return retval + + +class AVconvFD(FFmpegFD): + pass + _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 5bc99492b..a5bae9669 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -99,7 +99,8 @@ class FragmentFD(FileDownloader): state['eta'] = self.calc_eta( start, time_now, estimated_size, state['downloaded_bytes']) - state['speed'] = s.get('speed') + state['speed'] = s.get('speed') or ctx.get('speed') + ctx['speed'] = state['speed'] ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 2a775bf00..a01dac031 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -1,87 +1,19 @@ from __future__ import unicode_literals -import os +import os.path import re -import subprocess -import sys -from .common import FileDownloader from .fragment import FragmentFD from ..compat import compat_urlparse -from ..postprocessor.ffmpeg import FFmpegPostProcessor from ..utils import ( - encodeArgument, encodeFilename, sanitize_open, - handle_youtubedl_headers, ) -class HlsFD(FileDownloader): - def real_download(self, filename, info_dict): - url = info_dict['url'] - self.report_destination(filename) - tmpfilename = self.temp_name(filename) - - ffpp = FFmpegPostProcessor(downloader=self) - if not ffpp.available: - self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') - return False - ffpp.check_version() - - args = [ffpp.executable, '-y'] - - if info_dict['http_headers'] and re.match(r'^https?://', url): - # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: - # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. - headers = handle_youtubedl_headers(info_dict['http_headers']) - args += [ - '-headers', - ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] - - args += ['-i', url, '-c', 'copy'] - if self.params.get('hls_use_mpegts', False): - args += ['-f', 'mpegts'] - else: - args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] - - args = [encodeArgument(opt) for opt in args] - args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) - - self._debug_cmd(args) - - proc = subprocess.Popen(args, stdin=subprocess.PIPE) - try: - retval = proc.wait() - except KeyboardInterrupt: - # subprocces.run would send the SIGKILL signal to ffmpeg and the - # mp4 file couldn't be played, but if we ask ffmpeg to quit it - # produces a file that is playable (this is mostly useful for live - # streams). Note that Windows is not affected and produces playable - # files (see https://github.com/rg3/youtube-dl/issues/8300). - if sys.platform != 'win32': - proc.communicate(b'q') - raise - if retval == 0: - fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) - self.try_rename(tmpfilename, filename) - self._hook_progress({ - 'downloaded_bytes': fsize, - 'total_bytes': fsize, - 'filename': filename, - 'status': 'finished', - }) - return True - else: - self.to_stderr('\n') - self.report_error('%s exited with code %d' % (ffpp.basename, retval)) - return False - - -class NativeHlsFD(FragmentFD): - """ A more limited implementation that does not require ffmpeg """ +class HlsFD(FragmentFD): + """ A limited implementation that does not require ffmpeg """ FD_NAME = 'hlsnative' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 037654a23..9502d07a4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -23,7 +23,10 @@ from .alphaporno import AlphaPornoIE from .animeondemand import AnimeOnDemandIE from .anitube import AnitubeIE from .anysex import AnySexIE -from .aol import AolIE +from .aol import ( + AolIE, + AolFeaturesIE, +) from .allocine import AllocineIE from .aparat import AparatIE from .appleconnect import AppleConnectIE @@ -51,6 +54,7 @@ from .arte import ( from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE from .audimedia import AudiMediaIE +from .audioboom import AudioBoomIE from .audiomack import AudiomackIE, AudiomackAlbumIE from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE @@ -185,6 +189,10 @@ from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .dropbox import DropboxIE +from .dw import ( + DWIE, + DWArticleIE, +) from .eagleplatform import EaglePlatformIE from .ebaumsworld import EbaumsWorldIE from .echomsk import EchoMskIE @@ -209,10 +217,7 @@ from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE -from .facebook import ( - FacebookIE, - FacebookPostIE, -) +from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE from .fczenit import FczenitIE @@ -340,6 +345,7 @@ from .konserthusetplay import KonserthusetPlayIE from .kontrtube import KontrTubeIE from .krasview import KrasViewIE from .ku6 import Ku6IE +from .kusi import KUSIIE from .kuwo import ( KuwoIE, KuwoAlbumIE, @@ -383,6 +389,7 @@ from .lynda import ( from .m6 import M6IE from .macgamestore import MacGameStoreIE from .mailru import MailRuIE +from .makerschannel import MakersChannelIE from .makertv import MakerTVIE from .malemotion import MalemotionIE from .matchtv import MatchTVIE @@ -392,6 +399,7 @@ from .metacritic import MetacriticIE from .mgoon import MgoonIE from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE +from .minoto import MinotoIE from .miomio import MioMioIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE @@ -590,6 +598,7 @@ from .regiotv import RegioTVIE from .restudy import RestudyIE from .reverbnation import ReverbNationIE from .revision3 import Revision3IE +from .rice import RICEIE from .ringtv import RingTVIE from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE @@ -728,6 +737,7 @@ from .theplatform import ( ThePlatformFeedIE, ) from .thesixtyone import TheSixtyOneIE +from .thestar import TheStarIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .tinypic import TinyPicIE @@ -774,6 +784,7 @@ from .tv2 import ( TV2IE, TV2ArticleIE, ) +from .tv3 import TV3IE from .tv4 import TV4IE from .tvc import ( TVCIE, @@ -813,6 +824,7 @@ from .udn import UDNEmbedIE from .digiteka import DigitekaIE from .unistra import UnistraIE from .urort import UrortIE +from .usatoday import USATodayIE from .ustream import UstreamIE, UstreamChannelIE from .ustudio import UstudioIE from .varzesh3 import Varzesh3IE @@ -828,7 +840,10 @@ from .vgtv import ( VGTVIE, ) from .vh1 import VH1IE -from .vice import ViceIE +from .vice import ( + ViceIE, + ViceShowIE, +) from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE @@ -855,6 +870,7 @@ from .vimeo import ( VimeoChannelIE, VimeoGroupsIE, VimeoLikesIE, + VimeoOndemandIE, VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py index 5b2c0dc9a..cddcaa489 100644 --- a/youtube_dl/extractor/aljazeera.py +++ b/youtube_dl/extractor/aljazeera.py @@ -13,24 +13,18 @@ class AlJazeeraIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Slum - Episode 1: Deliverance', 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', - 'uploader': 'Al Jazeera English', + 'uploader_id': '665003303001', + 'timestamp': 1411116829, + 'upload_date': '20140919', }, - 'add_ie': ['BrightcoveLegacy'], + 'add_ie': ['BrightcoveNew'], 'skip': 'Not accessible from Travis CI server', } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s' def _real_extract(self, url): program_name = self._match_id(url) webpage = self._download_webpage(url, program_name) brightcove_id = self._search_regex( r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id') - - return { - '_type': 'url', - 'url': ( - 'brightcove:' - 'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc' - '&%40videoPlayer={0}'.format(brightcove_id) - ), - 'ie_key': 'BrightcoveLegacy', - } + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index b51eafc45..b761b2cc4 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -1,24 +1,11 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor class AolIE(InfoExtractor): IE_NAME = 'on.aol.com' - _VALID_URL = r'''(?x) - (?: - aol-video:| - http://on\.aol\.com/ - (?: - video/.*-| - playlist/(?P[^/?#]+?)-(?P[0-9]+)[?#].*_videoid= - ) - ) - (?P[0-9]+) - (?:$|\?) - ''' + _VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P[0-9]+)(?:$|\?)' _TESTS = [{ 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', @@ -29,42 +16,31 @@ class AolIE(InfoExtractor): 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', }, 'add_ie': ['FiveMin'], - }, { - 'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316', - 'info_dict': { - 'id': '152147', - 'title': 'Brace Yourself - Today\'s Weirdest News', - }, - 'playlist_mincount': 10, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - playlist_id = mobj.group('playlist_id') - if not playlist_id or self._downloader.params.get('noplaylist'): - return self.url_result('5min:%s' % video_id) + video_id = self._match_id(url) + return self.url_result('5min:%s' % video_id) - self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) - webpage = self._download_webpage(url, playlist_id) - title = self._html_search_regex( - r'

(.+?)

', webpage, 'title') - playlist_html = self._search_regex( - r"(?s)(.*?)", webpage, - 'playlist HTML') - entries = [{ - '_type': 'url', - 'url': 'aol-video:%s' % m.group('id'), - 'ie_key': 'Aol', - } for m in re.finditer( - r"[0-9]+)'\s+class='video-thumb'>", - playlist_html)] +class AolFeaturesIE(InfoExtractor): + IE_NAME = 'features.aol.com' + _VALID_URL = r'http://features\.aol\.com/video/(?P[^/?#]+)' - return { - '_type': 'playlist', - 'id': playlist_id, - 'display_id': mobj.group('playlist_display_id'), - 'title': title, - 'entries': entries, - } + _TESTS = [{ + 'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts', + 'md5': '7db483bb0c09c85e241f84a34238cc75', + 'info_dict': { + 'id': '519507715', + 'ext': 'mp4', + 'title': 'What To Watch - February 17, 2016', + }, + 'add_ie': ['FiveMin'], + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + return self.url_result(self._search_regex( + r'