from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
from ..utils import (
IE_NAME = 'arte.tv'
def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ lang = mobj.group('lang')
+ video_id = mobj.group('id')
+
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml_doc = self._download_xml(
formats = [{
'forma_id': q.attrib['quality'],
'url': q.text,
+ 'ext': 'flv',
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
- } for q in config.findall('.//quality')]
+ } for q in config.findall('./urls/url')]
self._sort_formats(formats)
title = config.find('.//name').text
'id': video_id,
'title': title,
'thumbnail': thumbnail,
- 'url': video_url,
- 'ext': 'flv',
+ 'formats': formats,
}
return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang):
- json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
+ json_url = self._html_search_regex(
+ r'arte_vp_url="(.*?)"', webpage, 'json vp url')
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
else:
def sort_key(f):
+ versionCode = f.get('versionCode')
+ if versionCode is None:
+ versionCode = ''
return (
# Sort first by quality
- int(f.get('height',-1)),
- int(f.get('bitrate',-1)),
+ int(f.get('height', -1)),
+ int(f.get('bitrate', -1)),
# The original version with subtitles has lower relevance
- re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
+ re.match(r'VO-ST(F|A)', versionCode) is None,
# The version with sourds/mal subtitles has also lower relevance
- re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+ re.match(r'VO?(F|A)-STM\1', versionCode) is None,
# Prefer http downloads over m3u8
0 if f['url'].endswith('m3u8') else 1,
)