X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=d224aa8e18471787bc00a515836b39fd558bac1f;hb=9e1a5b845586a0a5431fb72467142046d8571e6f;hp=51dbbc8db13fb680822eac81b526e7397d18b6d2;hpb=23be51d8ce132dbb967f460e1225fdaaa43dff39;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 51dbbc8db..d224aa8e1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,11 +7,12 @@ import re from .common import InfoExtractor from .youtube import YoutubeIE -from ..utils import ( +from ..compat import ( compat_urllib_parse, compat_urlparse, compat_xml_parse_error, - +) +from ..utils import ( determine_ext, ExtractorError, float_or_none, @@ -99,6 +100,22 @@ class GenericIE(InfoExtractor): 'uploader': 'Championat', }, }, + { + # https://github.com/rg3/youtube-dl/issues/3541 + 'add_ie': ['Brightcove'], + 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', + 'info_dict': { + 'id': '3866516442001', + 'ext': 'mp4', + 'title': 'Leer mij vrouwen kennen: Aflevering 1', + 'description': 'Leer mij vrouwen kennen: Aflevering 1', + 'uploader': 'SBS Broadcasting', + }, + 'skip': 'Restricted to Netherlands', + 'params': { + 'skip_download': True, # m3u8 download + }, + }, # Direct link to a video { 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', @@ -325,7 +342,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'age_limit': 18, 'uploader': 'www.handjobhub.com', - 'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub', + 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com', } }, # RSS feed @@ -389,7 +406,44 @@ class GenericIE(InfoExtractor): 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', 'duration': 1715.0, 'uploader': 'thoughtworks.wistia.com', - }, + }, + }, + # Direct download with broken HEAD + { + 'url': 'http://ai-radio.org:8000/radio.opus', + 'info_dict': { + 'id': 'radio', + 'ext': 'opus', + 'title': 'radio', + }, + 'params': { + 'skip_download': True, # infinite live stream + }, + 'expected_warnings': [ + r'501.*Not Implemented' + ], + }, + # Soundcloud embed + { + 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', + 'info_dict': { + 'id': '174391317', + 'ext': 'mp3', + 'description': 'md5:ff867d6b555488ad3c52572bb33d432c', + 'uploader': 'Sophos Security', + 'title': 'Chet Chat 171 - Oct 29, 2014', + 'upload_date': '20141029', + } + }, + # Livestream embed + { + 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast', + 'info_dict': { + 'id': '67864563', + 'ext': 'flv', + 'upload_date': '20141112', + 'title': 'Rosetta #CometLanding webcast HL 10', + } }, ] @@ -532,6 +586,7 @@ class GenericIE(InfoExtractor): return { 'id': video_id, 'title': os.path.splitext(url_basename(url))[0], + 'direct': True, 'formats': [{ 'format_id': m.group('format_id'), 'url': url, @@ -544,7 +599,7 @@ class GenericIE(InfoExtractor): self._downloader.report_warning('Falling back on generic information extractor.') if full_response: - webpage = _webpage_read_content(url, video_id) + webpage = self._webpage_read_content(full_response, url, video_id) else: webpage = self._download_webpage(url, video_id) self.report_extraction(video_id) @@ -678,7 +733,7 @@ class GenericIE(InfoExtractor): 'title': video_title, 'id': video_id, } - + match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) if match: return { @@ -693,7 +748,7 @@ class GenericIE(InfoExtractor): # Look for embedded blip.tv player mobj = re.search(r']*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) if mobj: - return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV') + return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV') mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage) if mobj: return self.url_result(mobj.group(1), 'BlipTV') @@ -729,7 +784,7 @@ class GenericIE(InfoExtractor): # Look for Ooyala videos mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or - re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage)) + re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage)) if mobj is not None: return OoyalaIE._build_url_result(mobj.group('ec')) @@ -823,7 +878,7 @@ class GenericIE(InfoExtractor): # Look for embeded soundcloud player mobj = re.search( - r'