X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=da4193734971122c2ef72f3dca4acecd93e5f784;hb=2a9e1e453ac1543426637d45bd59afe2783dc923;hp=7ee95fe391ad9cac97c9cf0b0364c5a2a88b6f84;hpb=9f62eaf4ef87cd379318a1330373317cd6d4d63c;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7ee95fe39..da4193734 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -74,7 +74,7 @@ class InfoExtractor(object): "http", "https", "rtsp", "rtmp", "m3u8" or so. * preference Order number of this format. If this field is present and not None, the formats get sorted - by this field. + by this field, regardless of all other values. -1 for default (order by other properties), -2 or smaller for less than default. * quality Order number of the video quality of this @@ -97,7 +97,9 @@ class InfoExtractor(object): thumbnail: Full URL to a video thumbnail image. description: One-line video description. uploader: Full name of the video uploader. + timestamp: UNIX timestamp of the moment the video became available. upload_date: Video upload date (YYYYMMDD). + If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. location: Physical location of the video. subtitles: The subtitle file contents as a dictionary in the format @@ -118,9 +120,6 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. - _real_extract() must return a *list* of information dictionaries as - described above. - Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ @@ -253,6 +252,17 @@ class InfoExtractor(object): outf.write(webpage_bytes) content = webpage_bytes.decode(encoding, 'replace') + + if (u'Access to this site is blocked' in content and + u'Websense' in content[:512]): + msg = u'Access to this webpage has been blocked by Websense filtering software in your network.' + blocked_iframe = self._html_search_regex( + r'