X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=1b7697870bde93fd5bb4218d12eac8c166a306ba;hb=a8aa99442f0bd6e3c65bd6c464dfd0a87c99f65d;hp=59cc8a5ffe515ef26a21a1bcc84e9701ffb6c9fc;hpb=bcc069a937ca51b85e57fee61eff2f45f44816ac;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 59cc8a5ff..1b7697870 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -12,6 +12,7 @@ from ..utils import ( compat_urlparse, compat_xml_parse_error, + determine_ext, ExtractorError, float_or_none, HEADRequest, @@ -341,7 +342,46 @@ class GenericIE(InfoExtractor): 'uploader': 'www.handjobhub.com', 'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub', } - } + }, + # RSS feed + { + 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'info_dict': { + 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'title': 'Zero Punctuation', + 'description': 're:' + }, + 'playlist_mincount': 11, + }, + # Multiple brightcove videos + # https://github.com/rg3/youtube-dl/issues/2283 + { + 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html', + 'info_dict': { + 'id': 'always-never', + 'title': 'Always / Never - The New Yorker', + }, + 'playlist_count': 3, + 'params': { + 'extract_flat': False, + 'skip_download': True, + } + }, + # MLB embed + { + 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/', + 'md5': '96f09a37e44da40dd083e12d9a683327', + 'info_dict': { + 'id': '33322633', + 'ext': 'mp4', + 'title': 'Ump changes call to ball', + 'description': 'md5:71c11215384298a172a6dcb4c2e20685', + 'duration': 48, + 'timestamp': 1401537900, + 'upload_date': '20140531', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, ] def report_download_webpage(self, video_id): @@ -784,6 +824,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'SBS') + mobj = re.search( + r']+?src=(["\'])(?Phttps?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'MLB') + # Start with something easy: JW Player in SWFObject found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if not found: @@ -820,13 +866,14 @@ class GenericIE(InfoExtractor): if m_video_type is not None: def check_video(vurl): vpath = compat_urlparse.urlparse(vurl).path - return '.' in vpath and not vpath.endswith('.swf') + vext = determine_ext(vpath) + return '.' in vpath and vext not in ('swf', 'png', 'jpg') found = list(filter( check_video, re.findall(r'.*?.*?]+)? src="([^"]+)"', webpage) if not found: found = re.search( r'(?i)