Detect Websense censorship (Fixes #2670)
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 3 Apr 2014 04:07:35 +0000 (06:07 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 3 Apr 2014 04:09:38 +0000 (06:09 +0200)
youtube_dl/extractor/common.py

index 78f238f8428c5df0fce2dcc26f66b1301595e62b..fe09817f9ea0df22920503a61b9a984a5c94c1be 100644 (file)
@@ -252,6 +252,17 @@ class InfoExtractor(object):
                 outf.write(webpage_bytes)
 
         content = webpage_bytes.decode(encoding, 'replace')
+
+        if (u'<title>Access to this site is blocked</title>' in content and
+                u'Websense' in content[:512]):
+            msg = u'Access to URL %s has been blocked by Websense filtering software in your network.' % urlh.geturl()
+            blocked_iframe = self._html_search_regex(
+                r'<iframe src="([^"]+)"', content,
+                u'Websense information URL', default=None)
+            if blocked_iframe:
+                msg += u' Visit %s for more details' % blocked_iframe
+            raise ExtractorError(msg, expected=True)
+
         return (content, urlh)
 
     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):