From 3b18f539a7e122004f4010981b044738985cf871 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 7 Sep 2015 11:18:21 +0100 Subject: [PATCH] [ndr] fix info extraction --- youtube_dl/extractor/ndr.py | 102 +++++++++++++++++------------------- 1 file changed, 48 insertions(+), 54 deletions(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 79a13958b..77b7e5eb8 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -1,68 +1,66 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, - qualities, - parse_duration, ) +preferences = {'xl': 4, 'l': 3, 'm': 2, 's': 1, 'xs': 0,} + + class NDRBaseIE(InfoExtractor): def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - page = self._download_webpage(url, video_id, 'Downloading page') + json_data = self._download_json('http://www.ndr.de/%s-ppjson.json' % video_id, video_id, 'Downloading page') - title = self._og_search_title(page).strip() - description = self._og_search_description(page) - if description: - description = description.strip() + formats = [] + objetType = json_data.get('config').get('objectType') + if objetType == 'video': + for key, f in json_data.get('playlist').items(): + if key != 'config': + src = f['src'] + if '.f4m' in src: + formats.extend(self._extract_f4m_formats(src, video_id)) + elif '.m3u8' in src: + formats.extend(self._extract_m3u8_formats(src, video_id)) + else: + quality = f.get('quality') + formats.append({ + 'url': src, + 'format_id': quality, + 'preference': preferences.get(quality), + }) + elif objetType == 'audio': + for key, f in json_data.get('playlist').items(): + if key != 'config': + formats.append({ + 'url': f['src'], + 'format_id': 'mp3', + + }) + else: + raise ExtractorError('No media links available for %s' % video_id) - duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None)) - if not duration: - duration = parse_duration(self._html_search_regex( - r'(\d+:\d+)', - page, 'duration', default=None)) + self._sort_formats(formats) - formats = [] + config = json_data.get('playlist').get('config') - mp3_url = re.search(r'''\{src:'(?P