X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fgeneric.py;h=a028c4ed4e47a780a3a2f04e03a37997af003e4e;hb=27de5625d4b75789afd288beb581bd54bc8bd623;hp=2d871f8b41a6334152ae4677e275fb650717b14e;hpb=cd791a5ea08b77dab37c15efa7e064c07144cb6a;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2d871f8b4..a028c4ed4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -17,6 +17,7 @@ from ..utils import ( ExtractorError, float_or_none, HEADRequest, + is_html, orderedSet, parse_xml, smuggle_url, @@ -488,6 +489,16 @@ class GenericIE(InfoExtractor): 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', } }, + # Cinerama player + { + 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm', + 'info_dict': { + 'id': '730m_DandD_1901_512k', + 'ext': 'mp4', + 'uploader': 'www.abc.net.au', + 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015', + } + } ] def report_following_redirect(self, new_url): @@ -647,7 +658,7 @@ class GenericIE(InfoExtractor): # Maybe it's a direct link to a video? # Be careful not to download the whole thing! first_bytes = full_response.read(512) - if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')): + if not is_html(first_bytes): self._downloader.report_warning( 'URL could be a direct video link, returning it as such.') upload_date = unified_strdate( @@ -926,7 +937,7 @@ class GenericIE(InfoExtractor): # Look for embedded TED player mobj = re.search( - r']+?src=(["\'])(?Phttps?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) + r']+?src=(["\'])(?Phttps?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'TED') @@ -1045,6 +1056,10 @@ class GenericIE(InfoExtractor): \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s* ["']?url["']?\s*:\s*["']([^"']+)["'] ''', webpage)) + if not found: + # Cinerama player + found = re.findall( + r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage) if not found: # Try to find twitter cards info found = filter_video(re.findall(