X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=376feecae7a5ef59ae8e4cf674d984deefc0d6b0;hb=0791ac1b4415601f464f9656a4485b3ae6b67f4e;hp=27584c44cf122d2f4ac41463c5311184bc2a0268;hpb=8765222d2211cd6f2a40611249181af0bbb2d531;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 27584c44c..376feecae 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -198,6 +198,21 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html + { + 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf', + 'info_dict': { + 'id': 'mZlp2ctYIUEB', + 'ext': 'mp4', + 'title': 'Tikibad ontruimd wegens brand', + 'description': 'md5:05ca046ff47b931f9b04855015e163a4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 33, + }, + 'params': { + 'skip_download': True, + }, + }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', @@ -304,6 +319,19 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Ooyala'], }, + { + # ooyala video embedded with http://player.ooyala.com/iframe.js + 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/', + 'info_dict': { + 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB', + 'ext': 'mp4', + 'title': '"Steve Jobs: Man in the Machine" trailer', + 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', + }, + 'params': { + 'skip_download': True, + }, + }, # multiple ooyala embeds on SBN network websites { 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', @@ -1178,13 +1206,15 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) - # Is it an RSS feed or a SMIL file? + # Is it an RSS feed, a SMIL file or a XSPF playlist? try: doc = parse_xml(webpage) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): return self._parse_smil(doc, url, video_id) + elif doc.tag == '{http://xspf.org/ns/0/}playlist': + return self.playlist_result(self._parse_xspf(doc, video_id), video_id) except compat_xml_parse_error: pass @@ -1390,7 +1420,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) # Look for Ooyala videos - mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or + mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage) or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage)) @@ -1725,7 +1755,7 @@ class GenericIE(InfoExtractor): if not found: # Broaden the findall a little bit: JWPlayer JS loader found = filter_video(re.findall( - r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)) + r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)) if not found: # Flow player found = filter_video(re.findall(r'''(?xs) @@ -1786,7 +1816,8 @@ class GenericIE(InfoExtractor): # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] - if determine_ext(video_url) == 'smil': + ext = determine_ext(video_url) + if ext == 'smil': entries.append({ 'id': video_id, 'formats': self._extract_smil_formats(video_url, video_id), @@ -1794,6 +1825,8 @@ class GenericIE(InfoExtractor): 'title': video_title, 'age_limit': age_limit, }) + elif ext == 'xspf': + return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) else: entries.append({ 'id': video_id,