X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ffrancetv.py;h=81b468c7d1e030f7ba67fed2a7ef2562c8164c76;hb=HEAD;hp=e0734d59ac3189ada79eaf98d1461d8c07c47ecb;hpb=99892e9908c5366be334f48d5c9ce0044ec37a47;p=youtube-dl diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index e0734d59a..81b468c7d 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -11,21 +11,24 @@ from ..compat import ( ) from ..utils import ( clean_html, + determine_ext, ExtractorError, int_or_none, parse_duration, - determine_ext, + try_get, + url_or_none, ) from .dailymotion import DailymotionIE class FranceTVBaseInfoExtractor(InfoExtractor): - def _make_url_result(self, video_id, catalog=None): - full_id = 'francetv:%s' % video_id - if catalog: + def _make_url_result(self, video_or_full_id, catalog=None): + full_id = 'francetv:%s' % video_or_full_id + if '@' not in video_or_full_id and catalog: full_id += '@%s' % catalog return self.url_result( - full_id, ie=FranceTVIE.ie_key(), video_id=video_id) + full_id, ie=FranceTVIE.ie_key(), + video_id=video_or_full_id.split('@')[0]) class FranceTVIE(InfoExtractor): @@ -76,6 +79,10 @@ class FranceTVIE(InfoExtractor): }, { 'url': 'francetv:NI_657393@Regions', 'only_matching': True, + }, { + # france-3 live + 'url': 'francetv:SIM_France3', + 'only_matching': True, }] def _extract_video(self, video_id, catalogue=None): @@ -109,17 +116,18 @@ class FranceTVIE(InfoExtractor): def sign(manifest_url, manifest_id): for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'): - signed_url = self._download_webpage( + signed_url = url_or_none(self._download_webpage( 'https://%s/esi/TA' % host, video_id, 'Downloading signed %s manifest URL' % manifest_id, fatal=False, query={ 'url': manifest_url, - }) - if (signed_url and isinstance(signed_url, compat_str) and - re.search(r'^(?:https?:)?//', signed_url)): + })) + if signed_url: return signed_url return manifest_url + is_live = None + formats = [] for video in info['videos']: if video['statut'] != 'ONLINE': @@ -127,11 +135,15 @@ class FranceTVIE(InfoExtractor): video_url = video['url'] if not video_url: continue + if is_live is None: + is_live = (try_get( + video, lambda x: x['plages_ouverture'][0]['direct'], + bool) is True) or '/live.francetv.fr/' in video_url format_id = video['format'] ext = determine_ext(video_url) if ext == 'f4m': if georestricted: - # See https://github.com/rg3/youtube-dl/issues/3963 + # See https://github.com/ytdl-org/youtube-dl/issues/3963 # m3u8 urls work fine continue formats.extend(self._extract_f4m_formats( @@ -172,11 +184,12 @@ class FranceTVIE(InfoExtractor): return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': clean_html(info['synopsis']), 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), 'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']), 'timestamp': int_or_none(info['diffusion']['timestamp']), + 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, } @@ -202,7 +215,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': '162311093', + 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', @@ -245,6 +258,10 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): }, { 'url': 'https://www.france.tv/142749-rouge-sang.html', 'only_matching': True, + }, { + # france-3 live + 'url': 'https://www.france.tv/france-3/direct.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -254,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): catalogue = None video_id = self._search_regex( - r'data-main-video=(["\'])(?P(?:(?!\1).)+)\1', + r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: @@ -354,12 +371,38 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): self.url_result(dailymotion_url, DailymotionIE.ie_key()) for dailymotion_url in dailymotion_urls]) - video_id, catalogue = self._search_regex( - (r'id-video=([^@]+@[^"]+)', + video_id = self._search_regex( + (r'player\.load[^;]+src:\s*["\']([^"\']+)', + r'id-video=([^@]+@[^"]+)', r']+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), - webpage, 'video id').split('@') + webpage, 'video id') - return self._make_url_result(video_id, catalogue) + return self._make_url_result(video_id) + + +class FranceTVInfoSportIE(FranceTVBaseInfoExtractor): + IE_NAME = 'sport.francetvinfo.fr' + _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018', + 'info_dict': { + 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea', + 'ext': 'mp4', + 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018', + 'timestamp': 1523639962, + 'upload_date': '20180413', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [FranceTVIE.ie_key()], + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id') + return self._make_url_result(video_id, 'Sport-web') class GenerationWhatIE(InfoExtractor): @@ -431,3 +474,43 @@ class CultureboxIE(FranceTVBaseInfoExtractor): webpage, 'video id').split('@') return self._make_url_result(video_id, catalogue) + + +class FranceTVJeunesseIE(FranceTVBaseInfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P[^/?#&]+))' + + _TESTS = [{ + 'url': 'https://www.zouzous.fr/heros/simon', + 'info_dict': { + 'id': 'simon', + }, + 'playlist_count': 9, + }, { + 'url': 'https://www.ludo.fr/heros/ninjago', + 'info_dict': { + 'id': 'ninjago', + }, + 'playlist_count': 10, + }, { + 'url': 'https://www.zouzous.fr/heros/simon?abc', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + + playlist = self._download_json( + '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id) + + if not playlist.get('count'): + raise ExtractorError( + '%s is not available' % playlist_id, expected=True) + + entries = [] + for item in playlist['items']: + identity = item.get('identity') + if identity and isinstance(identity, compat_str): + entries.append(self._make_url_result(identity)) + + return self.playlist_result(entries, playlist_id)