X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=1b7697870bde93fd5bb4218d12eac8c166a306ba;hb=a8aa99442f0bd6e3c65bd6c464dfd0a87c99f65d;hp=3a908d01f23dbfac41ea124c65336cc12eb79e86;hpb=f971dcbba0671391c0ab3180d02143db28675c9a;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3a908d01f..1b7697870 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -12,6 +12,7 @@ from ..utils import ( compat_urlparse, compat_xml_parse_error, + determine_ext, ExtractorError, float_or_none, HEADRequest, @@ -351,7 +352,36 @@ class GenericIE(InfoExtractor): 'description': 're:' }, 'playlist_mincount': 11, - } + }, + # Multiple brightcove videos + # https://github.com/rg3/youtube-dl/issues/2283 + { + 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html', + 'info_dict': { + 'id': 'always-never', + 'title': 'Always / Never - The New Yorker', + }, + 'playlist_count': 3, + 'params': { + 'extract_flat': False, + 'skip_download': True, + } + }, + # MLB embed + { + 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/', + 'md5': '96f09a37e44da40dd083e12d9a683327', + 'info_dict': { + 'id': '33322633', + 'ext': 'mp4', + 'title': 'Ump changes call to ball', + 'description': 'md5:71c11215384298a172a6dcb4c2e20685', + 'duration': 48, + 'timestamp': 1401537900, + 'upload_date': '20140531', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, ] def report_download_webpage(self, video_id): @@ -794,6 +824,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'SBS') + mobj = re.search( + r']+?src=(["\'])(?Phttps?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'MLB') + # Start with something easy: JW Player in SWFObject found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if not found: @@ -830,13 +866,14 @@ class GenericIE(InfoExtractor): if m_video_type is not None: def check_video(vurl): vpath = compat_urlparse.urlparse(vurl).path - return '.' in vpath and not vpath.endswith('.swf') + vext = determine_ext(vpath) + return '.' in vpath and vext not in ('swf', 'png', 'jpg') found = list(filter( check_video, re.findall(r'.*?.*?]+)? src="([^"]+)"', webpage) if not found: found = re.search( r'(?i)