X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=1b7697870bde93fd5bb4218d12eac8c166a306ba;hb=cc7fec5818254f4679896823c7de9d17f50201ca;hp=44f7ea3fd89b3753052cb152ab3c6750ad1b4b18;hpb=0990305d2acc4c1b7869dae2773c1f24125804bd;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 44f7ea3fd..1b7697870 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -12,6 +12,7 @@ from ..utils import ( compat_urlparse, compat_xml_parse_error, + determine_ext, ExtractorError, float_or_none, HEADRequest, @@ -351,7 +352,36 @@ class GenericIE(InfoExtractor): 'description': 're:' }, 'playlist_mincount': 11, - } + }, + # Multiple brightcove videos + # https://github.com/rg3/youtube-dl/issues/2283 + { + 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html', + 'info_dict': { + 'id': 'always-never', + 'title': 'Always / Never - The New Yorker', + }, + 'playlist_count': 3, + 'params': { + 'extract_flat': False, + 'skip_download': True, + } + }, + # MLB embed + { + 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/', + 'md5': '96f09a37e44da40dd083e12d9a683327', + 'info_dict': { + 'id': '33322633', + 'ext': 'mp4', + 'title': 'Ump changes call to ball', + 'description': 'md5:71c11215384298a172a6dcb4c2e20685', + 'duration': 48, + 'timestamp': 1401537900, + 'upload_date': '20140531', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, ] def report_download_webpage(self, video_id): @@ -794,6 +824,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'SBS') + mobj = re.search( + r']+?src=(["\'])(?Phttps?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'MLB') + # Start with something easy: JW Player in SWFObject found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if not found: @@ -819,7 +855,6 @@ class GenericIE(InfoExtractor): \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s* ["']?url["']?\s*:\s*["']([^"']+)["'] ''', webpage) - assert found if not found: # Try to find twitter cards info found = re.findall(r'.*?.*?]+)? src="([^"]+)"', webpage) if not found: found = re.search( r'(?i)