From: Sergey M․ Date: Thu, 14 May 2015 09:18:58 +0000 (+0600) Subject: Merge branch 'best-fallback-on-outdated-avconv' of https://github.com/dstftw/youtube... X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=98b8ec8616611194dee77dc6ad88303bae72ce6d;hp=97fcf1bbd07ae0c5b6e530dcf2623d199452a76c;p=youtube-dl Merge branch 'best-fallback-on-outdated-avconv' of https://github.com/dstftw/youtube-dl into dstftw-best-fallback-on-outdated-avconv Conflicts: youtube_dl/YoutubeDL.py --- diff --git a/test/test_utils.py b/test/test_utils.py index 86b110a7d..b40107037 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -600,7 +600,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')

The following line contains Chinese characters and special symbols

第二行
♪♪

-

Third
Line

+

Third
Line

''' diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 00f86b342..4c8196d08 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1085,8 +1085,8 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: req_format_list = [] - if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' - and info_dict['extractor'] in ['youtube', 'ted']): + if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and + info_dict['extractor'] in ['youtube', 'ted']): merger = FFmpegMergerPP(self) if merger.available and merger.can_merge(): req_format_list.append('bestvideo+bestaudio') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9cc9f851f..ace17857c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -240,7 +240,13 @@ def _real_main(argv=None): if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) if opts.embedthumbnail: - postprocessors.append({'key': 'EmbedThumbnail'}) + already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails + postprocessors.append({ + 'key': 'EmbedThumbnail', + 'already_have_thumbnail': already_have_thumbnail + }) + if not already_have_thumbnail: + opts.writethumbnail = True # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e808f2734..de19dfd7a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -489,8 +489,9 @@ from .soundgasm import ( ) from .southpark import ( SouthParkIE, - SouthParkEsIE, SouthParkDeIE, + SouthParkDkIE, + SouthParkEsIE, SouthParkNlIE ) from .space import SpaceIE @@ -543,7 +544,10 @@ from .thesixtyone import TheSixtyOneIE from .thisav import ThisAVIE from .tinypic import TinyPicIE from .tlc import TlcIE, TlcDeIE -from .tmz import TMZIE +from .tmz import ( + TMZIE, + TMZArticleIE, +) from .tnaflix import TNAFlixIE from .thvideo import ( THVideoIE, diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 6252be05b..3b2de517e 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor): 'ext': 'mp4', 'description': 'md5:5438d33774b6bdc662f9485a340401cc', 'title': 'Season 5 Episode 5', - 'thumbnail': 're:^https?://.*promo.*' + 'thumbnail': 're:^https?://.*\.jpg$' }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 1b14471e5..699b4f7d0 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -25,14 +25,14 @@ class CanalplusIE(InfoExtractor): } _TESTS = [{ - 'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', - 'md5': '3db39fb48b9685438ecf33a1078023e4', + 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', + 'md5': 'b3481d7ca972f61e37420798d0a9d934', 'info_dict': { - 'id': '922470', + 'id': '1263092', 'ext': 'flv', - 'title': 'Zapping - 26/08/13', - 'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', - 'upload_date': '20130826', + 'title': 'Le Zapping - 13/05/15', + 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f', + 'upload_date': '20150513', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', @@ -56,7 +56,7 @@ class CanalplusIE(InfoExtractor): 'skip': 'videos get deleted after a while', }, { 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', - 'md5': '65aa83ad62fe107ce29e564bb8712580', + 'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4', 'info_dict': { 'id': '1213714', 'ext': 'flv', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 981e34bc7..65bb77086 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1072,9 +1072,6 @@ class InfoExtractor(object): def _get_automatic_captions(self, *args, **kwargs): raise NotImplementedError("This method must be implemented by subclasses") - def _subtitles_timecode(self, seconds): - return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) - class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index 9c594b757..999fb5620 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -26,7 +26,7 @@ class DumpertIE(InfoExtractor): video_id = self._match_id(url) req = compat_urllib_request.Request(url) - req.add_header('Cookie', 'nsfw=1') + req.add_header('Cookie', 'nsfw=1; cpc=10') webpage = self._download_webpage(req, video_id) files_base64 = self._search_regex( diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py index 2bb078036..4597d1b96 100644 --- a/youtube_dl/extractor/kanalplay.py +++ b/youtube_dl/extractor/kanalplay.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, float_or_none, + srt_subtitles_timecode, ) @@ -39,8 +40,8 @@ class KanalPlayIE(InfoExtractor): '%s\r\n%s --> %s\r\n%s' % ( num, - self._subtitles_timecode(item['startMillis'] / 1000.0), - self._subtitles_timecode(item['endMillis'] / 1000.0), + srt_subtitles_timecode(item['startMillis'] / 1000.0), + srt_subtitles_timecode(item['endMillis'] / 1000.0), item['text'], ) for num, item in enumerate(subs, 1)) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index e91d3a248..cc70c2950 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, float_or_none, @@ -200,20 +199,10 @@ class NRKTVIE(InfoExtractor): url = "%s%s" % (baseurl, subtitlesurl) self._debug_print('%s: Subtitle url: %s' % (video_id, url)) captions = self._download_xml( - url, video_id, 'Downloading subtitles', - transform_source=lambda s: s.replace(r'
', '\r\n')) + url, video_id, 'Downloading subtitles') lang = captions.get('lang', 'no') - ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}')) - srt = '' - for pos, p in enumerate(ps): - begin = parse_duration(p.get('begin')) - duration = parse_duration(p.get('dur')) - starttime = self._subtitles_timecode(begin) - endtime = self._subtitles_timecode(begin + duration) - srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text) return {lang: [ {'ext': 'ttml', 'url': url}, - {'ext': 'srt', 'data': srt}, ]} def _extract_f4m(self, manifest_url, video_id): diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 6ffbe3863..7f254b867 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -89,7 +89,7 @@ class NYTimesIE(NYTimesBaseIE): class NYTimesArticleIE(NYTimesBaseIE): - _VALID_URL = r'https?://(?:www)?\.nytimes\.com/(.(?[^.]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?[^.]+)(?:\.html)?' _TESTS = [{ 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0', 'md5': 'e2076d58b4da18e6a001d53fd56db3c9', diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 155d0ee6a..fbc521d1a 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -6,6 +6,7 @@ from ..utils import ( unified_strdate, int_or_none, qualities, + unescapeHTML, ) @@ -36,8 +37,8 @@ class OdnoklassnikiIE(InfoExtractor): webpage = self._download_webpage(url, video_id) player = self._parse_json( - self._search_regex( - r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'), + unescapeHTML(self._search_regex( + r'data-attributes="([^"]+)"', webpage, 'player')), video_id) metadata = self._parse_json(player['flashvars']['metadata'], video_id) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 59e31198c..7fb165a87 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -57,3 +57,14 @@ class SouthParkNlIE(SouthParkIE): 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', 'playlist_count': 4, }] + + +class SouthParkDkIE(SouthParkIE): + IE_NAME = 'southparkstudios.dk' + _VALID_URL = r'https?://(?:www\.)?(?Psouthparkstudios\.dk/(?:clips|full-episodes)/(?P.+?)(\?|#|$))' + _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' + + _TESTS = [{ + 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop', + 'playlist_count': 4, + }] diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py index c5c6fdc51..7dbe68b5c 100644 --- a/youtube_dl/extractor/tmz.py +++ b/youtube_dl/extractor/tmz.py @@ -30,3 +30,31 @@ class TMZIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': self._html_search_meta('ThumbURL', webpage), } + + +class TMZArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P[^/]+)/?' + _TEST = { + 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', + 'md5': 'e482a414a38db73087450e3a6ce69d00', + 'info_dict': { + 'id': '0_6snoelag', + 'ext': 'mp4', + 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', + 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + embedded_video_info_str = self._html_search_regex( + r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info') + + embedded_video_info = self._parse_json( + embedded_video_info_str, video_id, + transform_source=lambda s: s.replace('\\', '')) + + return self.url_result( + 'http://www.tmz.com/videos/%s/' % embedded_video_info['id']) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 65c459fad..c733a48fa 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -75,7 +75,7 @@ class VineIE(InfoExtractor): return { 'id': video_id, 'title': self._og_search_title(webpage), - 'alt_title': self._og_search_description(webpage), + 'alt_title': self._og_search_description(webpage, default=None), 'description': data['description'], 'thumbnail': data['thumbnailUrl'], 'upload_date': unified_strdate(data['created']), diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py index 1afbe68ed..7dc1e2f2b 100644 --- a/youtube_dl/extractor/zingmp3.py +++ b/youtube_dl/extractor/zingmp3.py @@ -4,12 +4,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ExtractorError class ZingMp3BaseInfoExtractor(InfoExtractor): - @staticmethod - def _extract_item(item): + def _extract_item(self, item): + error_message = item.find('./errormessage').text + if error_message: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_message), + expected=True) + title = item.find('./title').text.strip() source = item.find('./source').text extension = item.attrib['type'] diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 4868a42fd..8f825f785 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -7,12 +7,9 @@ import subprocess from .ffmpeg import FFmpegPostProcessor -from ..compat import ( - compat_urlretrieve, -) from ..utils import ( - determine_ext, check_executable, + encodeArgument, encodeFilename, PostProcessingError, prepend_extension, @@ -25,26 +22,30 @@ class EmbedThumbnailPPError(PostProcessingError): class EmbedThumbnailPP(FFmpegPostProcessor): + def __init__(self, downloader=None, already_have_thumbnail=False): + super(EmbedThumbnailPP, self).__init__(downloader) + self._already_have_thumbnail = already_have_thumbnail + def run(self, info): filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') - temp_thumbnail = filename + '.' + determine_ext(info['thumbnail']) - if not info.get('thumbnail'): + if not info.get('thumbnails'): raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.') - compat_urlretrieve(info['thumbnail'], temp_thumbnail) + thumbnail_filename = info['thumbnails'][-1]['filename'] if info['ext'] == 'mp3': options = [ - '-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1', + '-c', 'copy', '-map', '0', '-map', '1', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"'] self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) + self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) - os.remove(encodeFilename(temp_thumbnail)) + if not self._already_have_thumbnail: + os.remove(encodeFilename(thumbnail_filename)) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) @@ -52,7 +53,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor): if not check_executable('AtomicParsley', ['-v']): raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.') - cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename] + cmd = [encodeFilename('AtomicParsley', True), + encodeFilename(filename, True), + encodeArgument('--artwork'), + encodeFilename(thumbnail_filename, True), + encodeArgument('-o'), + encodeFilename(temp_filename, True)] self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename) @@ -66,7 +72,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor): msg = stderr.decode('utf-8', 'replace').strip() raise EmbedThumbnailPPError(msg) - os.remove(encodeFilename(temp_thumbnail)) + if not self._already_have_thumbnail: + os.remove(encodeFilename(thumbnail_filename)) # for formats that don't support thumbnails (like 3gp) AtomicParsley # won't create to the temporary file if b'No changes' in stdout: diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index 93d0abcf6..7d88e1308 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -3,18 +3,34 @@ from __future__ import unicode_literals import os import subprocess import sys +import errno from .common import PostProcessor -from ..compat import ( - subprocess_check_output -) from ..utils import ( check_executable, hyphenate_date, version_tuple, + PostProcessingError, + encodeArgument, + encodeFilename, ) +class XAttrMetadataError(PostProcessingError): + def __init__(self, code=None, msg='Unknown error'): + super(XAttrMetadataError, self).__init__(msg) + self.code = code + + # Parsing code and msg + if (self.code in (errno.ENOSPC, errno.EDQUOT) or + 'No space left' in self.msg or 'Disk quota excedded' in self.msg): + self.reason = 'NO_SPACE' + elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: + self.reason = 'VALUE_TOO_LONG' + else: + self.reason = 'NOT_SUPPORTED' + + class XAttrMetadataPP(PostProcessor): # @@ -51,7 +67,10 @@ class XAttrMetadataPP(PostProcessor): raise ImportError def write_xattr(path, key, value): - return xattr.setxattr(path, key, value) + try: + xattr.set(path, key, value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) except ImportError: if os.name == 'nt': @@ -62,8 +81,11 @@ class XAttrMetadataPP(PostProcessor): assert os.path.exists(path) ads_fn = path + ":" + key - with open(ads_fn, "wb") as f: - f.write(value) + try: + with open(ads_fn, "wb") as f: + f.write(value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) else: user_has_setfattr = check_executable("setfattr", ['--version']) user_has_xattr = check_executable("xattr", ['-h']) @@ -71,12 +93,27 @@ class XAttrMetadataPP(PostProcessor): if user_has_setfattr or user_has_xattr: def write_xattr(path, key, value): + value = value.decode('utf-8') if user_has_setfattr: - cmd = ['setfattr', '-n', key, '-v', value, path] + executable = 'setfattr' + opts = ['-n', key, '-v', value] elif user_has_xattr: - cmd = ['xattr', '-w', key, value, path] - - subprocess_check_output(cmd) + executable = 'xattr' + opts = ['-w', key, value] + + cmd = ([encodeFilename(executable, True)] + + [encodeArgument(o) for o in opts] + + [encodeFilename(path, True)]) + + try: + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + stdout, stderr = p.communicate() + stderr = stderr.decode('utf-8', 'replace') + if p.returncode != 0: + raise XAttrMetadataError(p.returncode, stderr) else: # On Unix, and can't find pyxattr, setfattr, or xattr. @@ -121,6 +158,19 @@ class XAttrMetadataPP(PostProcessor): return [], info - except (subprocess.CalledProcessError, OSError): - self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)") + except XAttrMetadataError as e: + if e.reason == 'NO_SPACE': + self._downloader.report_warning( + 'There\'s no disk space left or disk quota exceeded. ' + + 'Extended attributes are not written.') + elif e.reason == 'VALUE_TOO_LONG': + self._downloader.report_warning( + 'Unable to write extended attributes due to too long values.') + else: + msg = 'This filesystem doesn\'t support extended attributes. ' + if os.name == 'nt': + msg += 'You need to use NTFS.' + else: + msg += '(You may have to enable them in your /etc/fstab)' + self._downloader.report_error(msg) return [], info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d73efcf25..ed9ed9ed6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1835,12 +1835,8 @@ def parse_dfxp_time_expr(time_expr): return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3)) -def format_srt_time(seconds): - (mins, secs) = divmod(seconds, 60) - (hours, mins) = divmod(mins, 60) - millisecs = (secs - int(secs)) * 1000 - secs = int(secs) - return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs) +def srt_subtitles_timecode(seconds): + return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) def dfxp2srt(dfxp_data): @@ -1866,10 +1862,14 @@ def dfxp2srt(dfxp_data): paras = dfxp.findall(_x('.//ttml:p')) for para, index in zip(paras, itertools.count(1)): + begin_time = parse_dfxp_time_expr(para.attrib['begin']) + end_time = parse_dfxp_time_expr(para.attrib.get('end')) + if not end_time: + end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur']) out.append('%d\n%s --> %s\n%s\n\n' % ( index, - format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))), - format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))), + srt_subtitles_timecode(begin_time), + srt_subtitles_timecode(end_time), parse_node(para))) return ''.join(out)