X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbiobiochiletv.py;h=dc86c57c5df2e1efdb995cae9bd482f31b6c48fa;hb=HEAD;hp=1332281337b2b5d64d0b89eb7d257a11ebffee71;hpb=d6712378e73951bede475569c887a1ac73f660a9;p=youtube-dl diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py index 133228133..dc86c57c5 100644 --- a/youtube_dl/extractor/biobiochiletv.py +++ b/youtube_dl/extractor/biobiochiletv.py @@ -2,11 +2,14 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + ExtractorError, + remove_end, +) class BioBioChileTVIE(InfoExtractor): - _VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P[^/]+)\.shtml' + _VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P[^/]+)\.shtml' _TESTS = [{ 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', @@ -15,9 +18,10 @@ class BioBioChileTVIE(InfoExtractor): 'id': 'sobre-camaras-y-camarillas-parlamentarias', 'ext': 'mp4', 'title': 'Sobre Cámaras y camarillas parlamentarias', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Fernando Atria', }, + 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', }, { # different uploader layout 'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml', @@ -26,12 +30,26 @@ class BioBioChileTVIE(InfoExtractor): 'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades', 'ext': 'mp4', 'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Piangella Obrador', }, 'params': { 'skip_download': True, }, + 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', + }, { + 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', + 'info_dict': { + 'id': 'b4xd0LK3SK', + 'ext': 'mp4', + # TODO: fix url_transparent information overriding + # 'uploader': 'Juan Pablo Echenique', + 'title': 'Comentario Oscar Cáceres', + }, + 'params': { + # empty m3u8 manifest + 'skip_download': True, + }, }, { 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', 'only_matching': True, @@ -45,42 +63,24 @@ class BioBioChileTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV') - - file_url = self._search_regex( - r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P.+?)\1', - webpage, 'file url', group='url') + rudo_url = self._search_regex( + r']+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage, 'embed URL', None, group='url') + if not rudo_url: + raise ExtractorError('No videos found') - base_url = self._search_regex( - r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*fileURL', webpage, - 'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/', - group='url') - - formats = self._extract_m3u8_formats( - '%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - f = { - 'url': '%s%s' % (base_url, file_url), - 'format_id': 'http', - 'protocol': 'http', - 'preference': 1, - } - if formats: - f_copy = formats[-1].copy() - f_copy.update(f) - f = f_copy - formats.append(f) - self._sort_formats(formats) + title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV') thumbnail = self._og_search_thumbnail(webpage) uploader = self._html_search_regex( - r']+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)', + r']+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)', webpage, 'uploader', fatal=False) return { + '_type': 'url_transparent', + 'url': rudo_url, 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'uploader': uploader, - 'formats': formats, }