X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Frai.py;h=7ff1d06c46aacfa67a0134c878d635dce598028f;hb=545a23f11bbdf27d6d204225c578512af7184e0b;hp=115cc64cc6e08ec414de3c0dc671a19c33f1d7bc;hpb=8749477ed0a3cbc85d1726b6526fa5e794ce6072;p=youtube-dl diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 115cc64cc..7ff1d06c4 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_urllib_parse, + compat_urlparse, ) from ..utils import ( parse_duration, @@ -62,6 +63,29 @@ class RaiIE(InfoExtractor): 'description': 'Edizione delle ore 20:30 ', } }, + { + 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', + 'md5': '02b64456f7cc09f96ff14e7dd489017e', + 'info_dict': { + 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', + 'ext': 'flv', + 'title': 'Il Candidato - Primo episodio: "Le Primarie"', + 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!', + 'uploader': 'RaiTre', + } + }, + { + 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', + 'md5': '037104d2c14132887e5e4cf114569214', + 'info_dict': { + 'id': '0c7a664b-d0f4-4b2c-8835-3f82e46f433e', + 'ext': 'flv', + 'title': 'Il pacco', + 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', + 'uploader': 'RaiTre', + 'upload_date': '20141221', + }, + } ] def _extract_relinker_url(self, webpage): @@ -79,12 +103,15 @@ class RaiIE(InfoExtractor): relinker_url = self._extract_relinker_url(webpage) if not relinker_url: - iframe_path = self._search_regex( - r']+src="/?(dl/[^"]+\?iframe\b[^"]*)"', + iframe_url = self._search_regex( + [r']+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"', + r'drawMediaRaiTV\(["\'](.+?)["\']'], webpage, 'iframe') - iframe_page = self._download_webpage( - '%s/%s' % (host, iframe_path), video_id) - relinker_url = self._extract_relinker_url(iframe_page) + if not iframe_url.startswith('http'): + iframe_url = compat_urlparse.urljoin(url, iframe_url) + webpage = self._download_webpage( + iframe_url, video_id) + relinker_url = self._extract_relinker_url(webpage) relinker = self._download_json( '%s&output=47' % relinker_url, video_id) @@ -112,15 +139,15 @@ class RaiIE(InfoExtractor): uploader = media.get('author') upload_date = unified_strdate(media.get('date')) else: - title = self._search_regex( - r'var\s+videoTitolo\s*=\s*"([^"]+)";', - webpage, 'title', default=None) or self._og_search_title(webpage) + title = (self._search_regex( + r'var\s+videoTitolo\s*=\s*"(.+?)";', + webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"') description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) duration = None uploader = self._html_search_meta('Editore', webpage, 'uploader') upload_date = unified_strdate(self._html_search_meta( - 'item-date', webpage, 'upload date')) + 'item-date', webpage, 'upload date', default=None)) subtitles = self.extract_subtitles(video_id, webpage)