From: Sergey M․ Date: Sun, 8 Feb 2015 16:46:43 +0000 (+0600) Subject: Merge branch 'gamekings' of https://github.com/robin007bond/youtube-dl into robin007b... X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=3bd4bffb1ca5f44f7222355579e1f23ffd2bd31f;hp=c36b09a5026172c1ca452038fffccd68b14c528c;p=youtube-dl Merge branch 'gamekings' of https://github.com/robin007bond/youtube-dl into robin007bond-gamekings --- diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b188be636..2d8f9c316 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -441,6 +441,7 @@ - **tvp.pl** - **tvp.pl:Series** - **TVPlay**: TV3Play and related services + - **Tweakers** - **twitch:bookmarks** - **twitch:chapter** - **twitch:past_broadcasts** diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 678b9f7d1..b1cd6a69f 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -13,6 +13,7 @@ import copy from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL from youtube_dl.extractor import YoutubeIE +from youtube_dl.postprocessor.common import PostProcessor class YDL(FakeYDL): @@ -370,5 +371,35 @@ class TestFormatSelection(unittest.TestCase): 'vbr': 10, }), '^\s*10k$') + def test_postprocessors(self): + filename = 'post-processor-testfile.mp4' + audiofile = filename + '.mp3' + + class SimplePP(PostProcessor): + def run(self, info): + with open(audiofile, 'wt') as f: + f.write('EXAMPLE') + info['filepath'] + return False, info + + def run_pp(params): + with open(filename, 'wt') as f: + f.write('EXAMPLE') + ydl = YoutubeDL(params) + ydl.add_post_processor(SimplePP()) + ydl.post_process(filename, {'filepath': filename}) + + run_pp({'keepvideo': True}) + self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) + self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + os.unlink(filename) + os.unlink(audiofile) + + run_pp({'keepvideo': False}) + self.assertFalse(os.path.exists(filename), '%s exists' % filename) + self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + os.unlink(audiofile) + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1730df4cd..633e3d8a1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -938,6 +938,9 @@ class YoutubeDL(object): def has_header(self, h): return h in self.headers + def get_header(self, h, default=None): + return self.headers.get(h, default) + pr = _PseudoRequest(info_dict['url']) self.cookiejar.add_cookie_header(pr) return pr.headers.get('Cookie') @@ -1076,7 +1079,8 @@ class YoutubeDL(object): else self.params['merge_output_format']) selected_format = { 'requested_formats': formats_info, - 'format': rf, + 'format': '%s+%s' % (formats_info[0].get('format'), + formats_info[1].get('format')), 'format_id': '%s+%s' % (formats_info[0].get('format_id'), formats_info[1].get('format_id')), 'width': formats_info[0].get('width'), diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 047f7002a..0d7a120bc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE from .addanime import AddAnimeIE from .adobetv import AdobeTVIE from .adultswim import AdultSwimIE +from .aftenposten import AftenpostenIE from .aftonbladet import AftonbladetIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py new file mode 100644 index 000000000..2b257ede7 --- /dev/null +++ b/youtube_dl/extractor/aftenposten.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + xpath_with_ns, + xpath_text, + find_xpath_attr, +) + + +class AftenpostenIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P[^/]+)-\d+\.html' + + _TEST = { + 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish', + 'md5': 'fd828cd29774a729bf4d4425fe192972', + 'info_dict': { + 'id': '21039', + 'ext': 'mov', + 'title': 'TRAILER: "Sweatshop" - I can´t take any more', + 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238', + 'timestamp': 1416927969, + 'upload_date': '20141125', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'data-xs-id="(\d+)"', webpage, 'video id') + + data = self._download_xml( + 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id) + + NS_MAP = { + 'atom': 'http://www.w3.org/2005/Atom', + 'xt': 'http://xstream.dk/', + 'media': 'http://search.yahoo.com/mrss/', + } + + entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) + + title = xpath_text( + entry, xpath_with_ns('./atom:title', NS_MAP), 'title') + description = xpath_text( + entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') + timestamp = parse_iso8601(xpath_text( + entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) + + formats = [] + media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) + for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): + media_url = media_content.get('url') + if not media_url: + continue + tbr = int_or_none(media_content.get('bitrate')) + mobj = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', media_url) + if mobj: + formats.append({ + 'url': mobj.group('url'), + 'play_path': 'mp4:%s' % mobj.group('playpath'), + 'app': mobj.group('app'), + 'ext': 'flv', + 'tbr': tbr, + 'format_id': 'rtmp-%d' % tbr, + }) + else: + formats.append({ + 'url': media_url, + 'tbr': tbr, + }) + self._sort_formats(formats) + + link = find_xpath_attr( + entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') + if link is not None: + formats.append({ + 'url': link.get('href'), + 'format_id': link.get('rel'), + }) + + thumbnails = [{ + 'url': splash.get('url'), + 'width': int_or_none(splash.get('width')), + 'height': int_or_none(splash.get('height')), + } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 15006336f..63429780e 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -20,6 +20,7 @@ class AparatIE(InfoExtractor): 'id': 'wP8On', 'ext': 'mp4', 'title': 'تیم گلکسی 11 - زومیت', + 'age_limit': 0, }, # 'skip': 'Extremely unreliable', } @@ -34,7 +35,8 @@ class AparatIE(InfoExtractor): video_id + '/vt/frame') webpage = self._download_webpage(embed_url, video_id) - video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) + video_urls = [video_url.replace('\\/', '/') for video_url in re.findall( + r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)] for i, video_url in enumerate(video_urls): req = HEADRequest(video_url) res = self._request_webpage( @@ -46,7 +48,7 @@ class AparatIE(InfoExtractor): title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') thumbnail = self._search_regex( - r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) return { 'id': video_id, @@ -54,4 +56,5 @@ class AparatIE(InfoExtractor): 'url': video_url, 'ext': 'mp4', 'thumbnail': thumbnail, + 'age_limit': self._family_friendly_search(webpage), } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 602601b24..2f5ba7aee 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -656,6 +656,21 @@ class InfoExtractor(object): } return RATING_TABLE.get(rating.lower(), None) + def _family_friendly_search(self, html): + # See http://schema.org/VideoObj + family_friendly = self._html_search_meta('isFamilyFriendly', html) + + if not family_friendly: + return None + + RATING_TABLE = { + '1': 0, + 'true': 0, + '0': 18, + 'false': 18, + } + return RATING_TABLE.get(family_friendly.lower(), None) + def _twitter_search_player(self, html): return self._html_search_meta('twitter:player', html, 'twitter card player') @@ -707,9 +722,9 @@ class InfoExtractor(object): f.get('quality') if f.get('quality') is not None else -1, f.get('tbr') if f.get('tbr') is not None else -1, f.get('vbr') if f.get('vbr') is not None else -1, - ext_preference, f.get('height') if f.get('height') is not None else -1, f.get('width') if f.get('width') is not None else -1, + ext_preference, f.get('abr') if f.get('abr') is not None else -1, audio_ext_preference, f.get('fps') if f.get('fps') is not None else -1, @@ -765,7 +780,7 @@ class InfoExtractor(object): self.to_screen(msg) time.sleep(timeout) - def _extract_f4m_formats(self, manifest_url, video_id): + def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest') @@ -778,26 +793,28 @@ class InfoExtractor(object): media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') for i, media_el in enumerate(media_nodes): if manifest_version == '2.0': - manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href') + manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + + (media_el.attrib.get('href') or media_el.attrib.get('url'))) tbr = int_or_none(media_el.attrib.get('bitrate')) - format_id = 'f4m-%d' % (i if tbr is None else tbr) formats.append({ - 'format_id': format_id, + 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), 'url': manifest_url, 'ext': 'flv', 'tbr': tbr, 'width': int_or_none(media_el.attrib.get('width')), 'height': int_or_none(media_el.attrib.get('height')), + 'preference': preference, }) self._sort_formats(formats) return formats def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, - entry_protocol='m3u8', preference=None): + entry_protocol='m3u8', preference=None, + m3u8_id=None): formats = [{ - 'format_id': 'm3u8-meta', + 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])), 'url': m3u8_url, 'ext': ext, 'protocol': 'm3u8', @@ -833,9 +850,8 @@ class InfoExtractor(object): formats.append({'url': format_url(line)}) continue tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) - f = { - 'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)), + 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])), 'url': format_url(line.strip()), 'tbr': tbr, 'ext': ext, diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index b116d251d..1d9166455 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor): duration = parse_duration(self._html_search_regex( r'\s*-?\s*(.*?)', webpage, 'duration', fatal=False)) - family_friendly = self._html_search_meta( - 'isFamilyFriendly', webpage, default='false') flashvars = compat_parse_qs(self._html_search_regex( r'[0-9]+)/(?P[^/?#]+)/?' _TESTS = [{ 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', + 'md5': 'e736ce0c665e459ddb818546220b4ef8', 'info_dict': { 'id': 'e174042', 'ext': 'mp3', @@ -18,9 +18,6 @@ class RTPIE(InfoExtractor): 'description': 'As paixões musicais de António Cartaxo e António Macedo', 'thumbnail': 're:^https?://.*\.jpg', }, - 'params': { - 'skip_download': True, # RTMP download - }, }, { 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'only_matching': True, @@ -37,20 +34,48 @@ class RTPIE(InfoExtractor): player_config = self._search_regex( r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config') - config = json.loads(js_to_json(player_config)) + config = self._parse_json(player_config, video_id) path, ext = config.get('file').rsplit('.', 1) formats = [{ + 'format_id': 'rtmp', + 'ext': ext, + 'vcodec': config.get('type') == 'audio' and 'none' or None, + 'preference': -2, + 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), 'app': config.get('application'), 'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path), 'page_url': url, - 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config), 'rtmp_live': config.get('live', False), - 'ext': ext, - 'vcodec': config.get('type') == 'audio' and 'none' or None, 'player_url': 'http://programas.rtp.pt/play/player.swf?v3', + 'rtmp_real_time': True, }] + # Construct regular HTTP download URLs + replacements = { + 'audio': { + 'format_id': 'mp3', + 'pattern': r'^nas2\.share/wavrss/', + 'repl': 'http://rsspod.rtp.pt/podcasts/', + 'vcodec': 'none', + }, + 'video': { + 'format_id': 'mp4_h264', + 'pattern': r'^nas2\.share/h264/', + 'repl': 'http://rsspod.rtp.pt/videocasts/', + 'vcodec': 'h264', + }, + } + r = replacements[config['type']] + if re.match(r['pattern'], config['file']) is not None: + formats.append({ + 'format_id': r['format_id'], + 'url': re.sub(r['pattern'], r['repl'], config['file']), + 'vcodec': r['vcodec'], + }) + + self._sort_formats(formats) + return { 'id': video_id, 'title': title, diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index 5e84c1098..d0981115d 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -6,12 +6,14 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, + compat_urllib_parse_urlparse, ) from ..utils import ( int_or_none, parse_duration, parse_iso8601, unescapeHTML, + xpath_text, ) @@ -159,11 +161,27 @@ class RTSIE(InfoExtractor): return int_or_none(self._search_regex( r'-([0-9]+)k\.', url, 'bitrate', default=None)) - formats = [{ - 'format_id': fid, - 'url': furl, - 'tbr': extract_bitrate(furl), - } for fid, furl in info['streams'].items()] + formats = [] + for format_id, format_url in info['streams'].items(): + if format_url.endswith('.f4m'): + token = self._download_xml( + 'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path, + video_id, 'Downloading %s token' % format_id) + auth_params = xpath_text(token, './/authparams', 'auth params') + if not auth_params: + continue + formats.extend(self._extract_f4m_formats( + '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params), + video_id, f4m_id=format_id)) + elif format_url.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id)) + else: + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'tbr': extract_bitrate(format_url), + }) if 'media' in info: formats.extend([{ diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py deleted file mode 100644 index feef33e27..000000000 --- a/youtube_dl/extractor/soulanime.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - HEADRequest, - urlhandle_detect_ext, -) - - -class SoulAnimeWatchingIE(InfoExtractor): - IE_NAME = "soulanime:watching" - IE_DESC = "SoulAnime video" - _TEST = { - 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', - 'md5': '05fae04abf72298098b528e98abf4298', - 'info_dict': { - 'id': 'seirei-tsukai-no-blade-dance-episode-9', - 'ext': 'mp4', - 'title': 'seirei-tsukai-no-blade-dance-episode-9', - 'description': 'seirei-tsukai-no-blade-dance-episode-9' - } - } - _VALID_URL = r'http://[w.]*soul-anime\.(?P[^/]+)/watch[^/]*/(?P[^/]+)' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - domain = mobj.group('domain') - - page = self._download_webpage(url, video_id) - - video_url_encoded = self._html_search_regex( - r'
[^<]*[^/]+)/anime./(?P[^/]+)' - - _EPISODE_REGEX = r'' - - _TEST = { - 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', - 'info_dict': { - 'id': 'black-rock-shooter-tv' - }, - 'playlist_count': 8 - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - series_id = mobj.group('id') - domain = mobj.group('domain') - - pattern = re.compile(self._EPISODE_REGEX) - - page = self._download_webpage(url, series_id, "Downloading series page") - mobj = pattern.findall(page) - - entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] - - return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 18a823719..e85d452a3 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -15,7 +15,8 @@ class TeamcocoIE(InfoExtractor): 'id': '80187', 'ext': 'mp4', 'title': 'Conan Becomes A Mary Kay Beauty Consultant', - 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' + 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', + 'age_limit': 0, } }, { 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', @@ -24,7 +25,8 @@ class TeamcocoIE(InfoExtractor): 'id': '19705', 'ext': 'mp4', "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", - "title": "Louis C.K. Interview Pt. 1 11/3/11" + "title": "Louis C.K. Interview Pt. 1 11/3/11", + 'age_limit': 0, } } ] @@ -83,4 +85,5 @@ class TeamcocoIE(InfoExtractor): 'title': self._og_search_title(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), + 'age_limit': self._family_friendly_search(webpage), } diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index ba65996dc..102362b29 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -1,6 +1,8 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( float_or_none, @@ -11,7 +13,7 @@ from ..utils import ( class TvigleIE(InfoExtractor): IE_NAME = 'tvigle' IE_DESC = 'Интернет-телевидение Tvigle.ru' - _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P[^/]+)/$' + _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P[^/]+)/$|cloud\.tvigle\.ru/video/(?P\d+))' _TESTS = [ { @@ -38,16 +40,22 @@ class TvigleIE(InfoExtractor): 'duration': 186.080, 'age_limit': 0, }, - }, + }, { + 'url': 'https://cloud.tvigle.ru/video/5267604/', + 'only_matching': True, + } ] def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - video_id = self._html_search_regex( - r'
  • ', webpage, 'video id') + if not video_id: + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex( + r'
  • ', + webpage, 'video id') video_data = self._download_json( 'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id) diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index e332d4694..c80ec15cf 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -1,35 +1,65 @@ -# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import ( + xpath_text, + xpath_with_ns, + int_or_none, + float_or_none, +) class TweakersIE(InfoExtractor): - _VALID_URL = r'https?://tweakers\.net/video/(?P[0-9]+).*' + _VALID_URL = r'https?://tweakers\.net/video/(?P\d+)' _TEST = { 'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html', - 'md5': 'f7f7f3027166a7f32f024b4ae6571ced', + 'md5': '1b5afa817403bb5baa08359dca31e6df', 'info_dict': { 'id': '9926', 'ext': 'mp4', - 'title': 'New-Nintendo-3Ds-Xl-Op-Alle-Fronten-Beter', + 'title': 'New Nintendo 3DS XL - Op alle fronten beter', + 'description': 'md5:f97324cc71e86e11c853f0763820e3ba', + 'thumbnail': 're:^https?://.*\.jpe?g$', + 'duration': 386, } } def _real_extract(self, url): - splitted_url = re.split('.html|/', url) - del splitted_url[-1] # To remove extra '/' at the end video_id = self._match_id(url) - title = splitted_url[5].title() # Retrieve title for URL and capitalize - splitted_url[3] = splitted_url[3] + '/player' # Add /player to get the player page - player_url = '/'.join(splitted_url) + '.html' - player_page = self._download_webpage(player_url, video_id) + + playlist = self._download_xml( + 'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id, + video_id) + + NS_MAP = { + 'xspf': 'http://xspf.org/ns/0/', + 's1': 'http://static.streamone.nl/player/ns/0', + } + + track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)) + + title = xpath_text( + track, xpath_with_ns('./xspf:title', NS_MAP), 'title') + description = xpath_text( + track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') + thumbnail = xpath_text( + track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail') + duration = float_or_none( + xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), + 1000) + + formats = [{ + 'url': location.text, + 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), + 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), + 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), + } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] return { 'id': video_id, - 'ext': 'mp4', 'title': title, - 'url': re.findall('http.*mp4', player_page)[0], + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, } diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 4a4422c5a..01d25f760 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -166,14 +166,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): if filecodec is None: raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe') - uses_avconv = self._uses_avconv() more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']: # Lossless, but in another container acodec = 'copy' extension = 'm4a' - more_opts = ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc'] + more_opts = ['-bsf:a', 'aac_adtstoasc'] elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']: # Lossless if possible acodec = 'copy' @@ -189,9 +188,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = [] if self._preferredquality is not None: if int(self._preferredquality) < 10: - more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality] + more_opts += ['-q:a', self._preferredquality] else: - more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k'] + more_opts += ['-b:a', self._preferredquality + 'k'] else: # We convert the audio (lossy) acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] @@ -200,13 +199,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): if self._preferredquality is not None: # The opus codec doesn't support the -aq option if int(self._preferredquality) < 10 and extension != 'opus': - more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality] + more_opts += ['-q:a', self._preferredquality] else: - more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k'] + more_opts += ['-b:a', self._preferredquality + 'k'] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] if self._preferredcodec == 'm4a': - more_opts += ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc'] + more_opts += ['-bsf:a', 'aac_adtstoasc'] if self._preferredcodec == 'vorbis': extension = 'ogg' if self._preferredcodec == 'wav': diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 36591a43d..1091ae61b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.04' +__version__ = '2015.02.06'