X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Farte.py;h=ffc321821cd3a4a0ba9a62ad97b6f443d8ecacb3;hb=4c76aa06665621c7689938afd7bbdbc797b5c7ea;hp=f405329297da70c5c686216c35b16e2ab1c2ded5;hpb=6e6b9f600f2f447604f6108fb6486b73cc25def1;p=youtube-dl diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index f40532929..ffc321821 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,15 +6,18 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, compat_urllib_parse_urlparse, ) from ..utils import ( + ExtractorError, find_xpath_attr, - unified_strdate, get_element_by_attribute, int_or_none, NO_DEFAULT, qualities, + try_get, + unified_strdate, ) # There are different sources of video in arte.tv, the extraction process @@ -79,6 +82,16 @@ class ArteTVBaseIE(InfoExtractor): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] + vsr = try_get(player_info, lambda x: x['VSR'], dict) + if not vsr: + error = None + if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error': + error = try_get( + player_info, lambda x: x['custom_msg']['msg'], compat_str) + if not error: + error = 'Video %s is not available' % player_info.get('VID') or video_id + raise ExtractorError(error, expected=True) + upload_date_str = player_info.get('shootingDate') if not upload_date_str: upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] @@ -107,7 +120,7 @@ class ArteTVBaseIE(InfoExtractor): langcode = LANGS.get(lang, lang) formats = [] - for format_id, format_dict in player_info['VSR'].items(): + for format_id, format_dict in vsr.items(): f = dict(format_dict) versionCode = f.get('versionCode') l = re.escape(langcode) @@ -180,11 +193,17 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVPlus7IE(ArteTVBaseIE): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?Pfr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P[^/]+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?Pfr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', 'only_matching': True, + }, { + 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', + 'only_matching': True, + }, { + 'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn', + 'only_matching': True, }] @classmethod @@ -240,10 +259,10 @@ class ArteTVPlus7IE(ArteTVBaseIE): return self._extract_from_json_url(json_url, video_id, lang, title=title) # Different kind of embed URL (e.g. # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - embed_url = self._search_regex( - r']+src=(["\'])(?P.+?)\1', - webpage, 'embed url', group='url') - return self.url_result(embed_url) + entries = [ + self.url_result(url) + for _, url in re.findall(r']+src=(["\'])(?P.+?)\1', webpage)] + return self.playlist_result(entries) # It also uses the arte_vp_url url from the webpage to extract the information @@ -252,22 +271,17 @@ class ArteTVCreativeIE(ArteTVPlus7IE): _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de|en|es)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', + 'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', 'info_dict': { - 'id': '72176', + 'id': '057405-001-A', 'ext': 'mp4', - 'title': 'Folge 2 - Corporate Design', - 'upload_date': '20131004', + 'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', + 'upload_date': '20150716', }, }, { 'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', - 'info_dict': { - 'id': '160676', - 'ext': 'mp4', - 'title': 'Monty Python live (mostly)', - 'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n', - 'upload_date': '20140805', - } + 'playlist_count': 11, + 'add_ie': ['Youtube'], }, { 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', 'only_matching': True, @@ -349,14 +363,13 @@ class ArteTVCinemaIE(ArteTVPlus7IE): _VALID_URL = r'https?://cinema\.arte\.tv/(?Pfr|de|en|es)/(?P.+)' _TESTS = [{ - 'url': 'http://cinema.arte.tv/de/node/38291', - 'md5': '6b275511a5107c60bacbeeda368c3aa1', + 'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', + 'md5': 'a5b9dd5575a11d93daf0e3f404f45438', 'info_dict': { - 'id': '055876-000_PWA12025-D', + 'id': '062494-000-A', 'ext': 'mp4', - 'title': 'Tod auf dem Nil', - 'upload_date': '20160122', - 'description': 'md5:7f749bbb77d800ef2be11d54529b96bc', + 'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', + 'upload_date': '20150807', }, }] @@ -413,6 +426,22 @@ class ArteTVEmbedIE(ArteTVPlus7IE): return self._extract_from_json_url(json_url, video_id, lang) +class TheOperaPlatformIE(ArteTVPlus7IE): + IE_NAME = 'theoperaplatform' + _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?Pfr|de|en|es)/(?P[^/?#&]+)' + + _TESTS = [{ + 'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello', + 'md5': '970655901fa2e82e04c00b955e9afe7b', + 'info_dict': { + 'id': '060338-009-A', + 'ext': 'mp4', + 'title': 'Verdi - OTELLO', + 'upload_date': '20160927', + }, + }] + + class ArteTVPlaylistIE(ArteTVBaseIE): IE_NAME = 'arte.tv:playlist' _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?Pfr|de|en|es)/[^#]*#collection/(?PPL-\d+)' @@ -422,6 +451,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): 'info_dict': { 'id': 'PL-013263', 'title': 'Areva & Uramin', + 'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', }, 'playlist_mincount': 6, }, {