X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Farte.py;h=b9a9440c09b85365a2997bd5feddbae017601c2d;hb=894dd8682e07be0d85d1eb2401b9f99ce056e809;hp=1c72b2ff6f5b4d5a6c01a4a0dfde2b2de6517e04;hpb=b5f4775b3821ed22e90cecd895977fbefbc97def;p=youtube-dl diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 1c72b2ff6..b9a9440c0 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -10,8 +10,8 @@ from ..utils import ( unified_strdate, determine_ext, get_element_by_id, - compat_str, get_element_by_attribute, + int_or_none, ) # There are different sources of video in arte.tv, the extraction process @@ -78,22 +78,36 @@ class ArteTVPlus7IE(InfoExtractor): def _extract_from_webpage(self, webpage, video_id, lang): json_url = self._html_search_regex( - r'arte_vp_url="(.*?)"', webpage, 'json vp url') + [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'], + webpage, 'json vp url') return self._extract_from_json_url(json_url, video_id, lang) def _extract_from_json_url(self, json_url, video_id, lang): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] + upload_date_str = player_info.get('shootingDate') + if not upload_date_str: + upload_date_str = player_info.get('VDA', '').split(' ')[0] + + title = player_info['VTI'].strip() + subtitle = player_info.get('VSU', '').strip() + if subtitle: + title += ' - %s' % subtitle + info_dict = { 'id': player_info['VID'], - 'title': player_info['VTI'], + 'title': title, 'description': player_info.get('VDE'), - 'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]), + 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - all_formats = player_info['VSR'].values() + all_formats = [] + for format_id, format_dict in player_info['VSR'].items(): + fmt = dict(format_dict) + fmt['format_id'] = format_id + all_formats.append(fmt) # Some formats use the m3u8 protocol all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) def _match_lang(f): @@ -144,22 +158,12 @@ class ArteTVPlus7IE(InfoExtractor): ) formats = sorted(formats, key=sort_key) def _format(format_info): - quality = '' - height = format_info.get('height') - if height is not None: - quality = compat_str(height) - bitrate = format_info.get('bitrate') - if bitrate is not None: - quality += '-%d' % bitrate - if format_info.get('versionCode') is not None: - format_id = '%s-%s' % (quality, format_info['versionCode']) - else: - format_id = quality info = { - 'format_id': format_id, - 'format_note': format_info.get('versionLibelle'), - 'width': format_info.get('width'), - 'height': height, + 'format_id': format_info['format_id'], + 'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')), + 'width': int_or_none(format_info.get('width')), + 'height': int_or_none(format_info.get('height')), + 'tbr': int_or_none(format_info.get('bitrate')), } if format_info['mediaType'] == 'rtmp': info['url'] = format_info['streamer']