X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Farte.py;h=8273bd6c9ae3cdff82052c8f63efc68be97561b3;hb=674fb0fcc54c72448f80a0573f7fd116f220827e;hp=3a34d1ecc67e590c568d10131b557b1d3022fe4d;hpb=f9befee1f5e96e8b9b7032f33e70290ffd7769f5;p=youtube-dl diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 3a34d1ecc..8273bd6c9 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -5,16 +5,15 @@ import re from .common import InfoExtractor from ..utils import ( - ExtractorError, find_xpath_attr, unified_strdate, - determine_ext, get_element_by_id, - compat_str, get_element_by_attribute, + int_or_none, + qualities, ) -# There are different sources of video in arte.tv, the extraction process +# There are different sources of video in arte.tv, the extraction process # is different for each one. The videos usually expire in 7 days, so we can't # add tests. @@ -38,7 +37,7 @@ class ArteTvIE(InfoExtractor): config_xml_url, video_id, note='Downloading configuration') formats = [{ - 'forma_id': q.attrib['quality'], + 'format_id': q.attrib['quality'], # The playpath starts at 'mp4:', if we don't manually # split the url, rtmpdump will incorrectly parse them 'url': q.text.split('mp4:', 1)[0], @@ -90,95 +89,67 @@ class ArteTVPlus7IE(InfoExtractor): if not upload_date_str: upload_date_str = player_info.get('VDA', '').split(' ')[0] + title = player_info['VTI'].strip() + subtitle = player_info.get('VSU', '').strip() + if subtitle: + title += ' - %s' % subtitle + info_dict = { 'id': player_info['VID'], - 'title': player_info['VTI'], + 'title': title, 'description': player_info.get('VDE'), 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - - all_formats = player_info['VSR'].values() - # Some formats use the m3u8 protocol - all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) - def _match_lang(f): - if f.get('versionCode') is None: - return True - # Return true if that format is in the language of the url - if lang == 'fr': - l = 'F' - elif lang == 'de': - l = 'A' - else: - l = lang - regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] - return any(re.match(r, f['versionCode']) for r in regexes) - # Some formats may not be in the same language as the url - # TODO: Might want not to drop videos that does not match requested language - # but to process those formats with lower precedence - formats = filter(_match_lang, all_formats) - formats = list(formats) # in python3 filter returns an iterator - if not formats: - # Some videos are only available in the 'Originalversion' - # they aren't tagged as being in French or German - # Sometimes there are neither videos of requested lang code - # nor original version videos available - # For such cases we just take all_formats as is - formats = all_formats - if not formats: - raise ExtractorError('The formats list is empty') - - if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: - def sort_key(f): - return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) - else: - def sort_key(f): - versionCode = f.get('versionCode') - if versionCode is None: - versionCode = '' - return ( - # Sort first by quality - int(f.get('height', -1)), - int(f.get('bitrate', -1)), - # The original version with subtitles has lower relevance - re.match(r'VO-ST(F|A)', versionCode) is None, - # The version with sourds/mal subtitles has also lower relevance - re.match(r'VO?(F|A)-STM\1', versionCode) is None, - # Prefer http downloads over m3u8 - 0 if f['url'].endswith('m3u8') else 1, - ) - formats = sorted(formats, key=sort_key) - def _format(format_info): - quality = '' - height = format_info.get('height') - if height is not None: - quality = compat_str(height) - bitrate = format_info.get('bitrate') - if bitrate is not None: - quality += '-%d' % bitrate - if format_info.get('versionCode') is not None: - format_id = '%s-%s' % (quality, format_info['versionCode']) - else: - format_id = quality - media_type = format_info.get('mediaType') - if media_type is not None: - format_id += '-%s' % media_type - info = { + qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) + + formats = [] + for format_id, format_dict in player_info['VSR'].items(): + f = dict(format_dict) + versionCode = f.get('versionCode') + + langcode = { + 'fr': 'F', + 'de': 'A', + }.get(lang, lang) + lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode] + lang_pref = ( + None if versionCode is None else ( + 10 if any(re.match(r, versionCode) for r in lang_rexs) + else -10)) + source_pref = 0 + if versionCode is not None: + # The original version with subtitles has lower relevance + if re.match(r'VO-ST(F|A)', versionCode): + source_pref -= 10 + # The version with sourds/mal subtitles has also lower relevance + elif re.match(r'VO?(F|A)-STM\1', versionCode): + source_pref -= 9 + format = { 'format_id': format_id, - 'format_note': format_info.get('versionLibelle'), - 'width': format_info.get('width'), - 'height': height, + 'preference': -10 if f.get('videoFormat') == 'M3U8' else None, + 'language_preference': lang_pref, + 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')), + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'tbr': int_or_none(f.get('bitrate')), + 'quality': qfunc(f.get('quality')), + 'source_preference': source_pref, } - if format_info['mediaType'] == 'rtmp': - info['url'] = format_info['streamer'] - info['play_path'] = 'mp4:' + format_info['url'] - info['ext'] = 'flv' + + if f.get('mediaType') == 'rtmp': + format['url'] = f['streamer'] + format['play_path'] = 'mp4:' + f['url'] + format['ext'] = 'flv' else: - info['url'] = format_info['url'] - info['ext'] = determine_ext(info['url']) - return info - info_dict['formats'] = [_format(f) for f in formats] + format['url'] = f['url'] + + formats.append(format) + + self._check_formats(formats, video_id) + self._sort_formats(formats) + info_dict['formats'] = formats return info_dict