# encoding: utf-8
+from __future__ import unicode_literals
+
import re
import json
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
- IE_NAME = u'arte.tv'
+ IE_NAME = 'arte.tv'
@classmethod
def suitable(cls, url):
# r'src="(.*?/videothek_js.*?\.js)',
# 0,
# [
- # (1, 'url', u'Invalid URL: %s' % url)
+ # (1, 'url', 'Invalid URL: %s' % url)
# ]
# )
# http_host = url.split('/')[2]
# '(rtmp://.*?)\'',
# re.DOTALL,
# [
- # (1, 'path', u'could not extract video path: %s' % url),
- # (2, 'player', u'could not extract video player: %s' % url),
- # (3, 'url', u'could not extract video url: %s' % url)
+ # (1, 'path', 'could not extract video path: %s' % url),
+ # (2, 'player', 'could not extract video player: %s' % url),
+ # (3, 'url', 'could not extract video url: %s' % url)
# ]
# )
- # video_url = u'%s/%s' % (info.get('url'), info.get('path'))
+ # video_url = '%s/%s' % (info.get('url'), info.get('path'))
def _real_extract(self, url):
mobj = re.match(self._VIDEOS_URL, url)
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, url) is not None:
- raise ExtractorError(u'Arte live streams are not yet supported, sorry')
+ raise ExtractorError('Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
# return
+ raise ExtractorError('No video found')
+
def _extract_video(self, url, video_id, lang):
"""Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
- ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
+ ref_xml_doc = self._download_xml(
+ ref_xml_url, video_id, note='Downloading metadata')
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref']
- config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
+ config_xml = self._download_webpage(
+ config_xml_url, video_id, note='Downloading configuration')
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
def _key(m):
def _extract_liveweb(self, url, name, lang):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
- video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
+ video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
- video_id, u'Downloading information')
+ video_id, 'Downloading information')
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
class ArteTVPlus7IE(InfoExtractor):
- IE_NAME = u'arte.tv:+7'
- _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
+ IE_NAME = 'arte.tv:+7'
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
@classmethod
def _extract_url_info(cls, url):
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
# The version with sourds/mal subtitles has also lower relevance
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+ # Prefer http downloads over m3u8
+ 0 if f['url'].endswith('m3u8') else 1,
)
formats = sorted(formats, key=sort_key)
def _format(format_info):
if bitrate is not None:
quality += '-%d' % bitrate
if format_info.get('versionCode') is not None:
- format_id = u'%s-%s' % (quality, format_info['versionCode'])
+ format_id = '%s-%s' % (quality, format_info['versionCode'])
else:
format_id = quality
info = {
'width': format_info.get('width'),
'height': height,
}
- if format_info['mediaType'] == u'rtmp':
+ if format_info['mediaType'] == 'rtmp':
info['url'] = format_info['streamer']
info['play_path'] = 'mp4:' + format_info['url']
info['ext'] = 'flv'
# It also uses the arte_vp_url url from the webpage to extract the information
class ArteTVCreativeIE(ArteTVPlus7IE):
- IE_NAME = u'arte.tv:creative'
+ IE_NAME = 'arte.tv:creative'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
_TEST = {
- u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
- u'file': u'050489-002.mp4',
- u'info_dict': {
- u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design',
+ 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
+ 'info_dict': {
+ 'id': '050489-002',
+ 'ext': 'mp4',
+ 'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
},
}
class ArteTVFutureIE(ArteTVPlus7IE):
- IE_NAME = u'arte.tv:future'
+ IE_NAME = 'arte.tv:future'
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)'
_TEST = {
- u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
- u'file': u'050940-003.mp4',
- u'info_dict': {
- u'title': u'Les champignons au secours de la planète',
+ 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
+ 'info_dict': {
+ 'id': '050940-003',
+ 'ext': 'mp4',
+ 'title': 'Les champignons au secours de la planète',
},
}
row = get_element_by_id(anchor_id, webpage)
return self._extract_from_webpage(row, anchor_id, lang)
-class ArteTVDDCIE(ArteTVPlus7IE):
- IE_NAME = u'arte.tv:ddc'
- _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
- _TEST = {
- u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
- u'file': u'049881-009_PLUS7-D.flv',
- u'info_dict': {
- u'title': u'Mit offenen Karten',
- },
- }
+class ArteTVDDCIE(ArteTVPlus7IE):
+ IE_NAME = 'arte.tv:ddc'
+ _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
return self._extract_from_json_url(json_url, video_id, lang)
+
+
+class ArteTVConcertIE(ArteTVPlus7IE):
+ IE_NAME = 'arte.tv:concert'
+ _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
+
+ _TEST = {
+ 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
+ 'md5': '9ea035b7bd69696b67aa2ccaaa218161',
+ 'info_dict': {
+ 'id': '186',
+ 'ext': 'mp4',
+ 'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
+ 'upload_date': '20140128',
+ },
+ }