X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcommon.py;h=021945a89e16c80262728a4a9a400674735389a7;hb=HEAD;hp=55ce1a888bb4dc6c8c3172c78920636756c21a28;hpb=d9eb580a796ef6c9a248fdd8896ccf85349c35eb;p=youtube-dl diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 55ce1a888..021945a89 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -10,14 +10,15 @@ import os import random import re import socket +import ssl import sys import time import math -import xml from ..compat import ( - compat_cookiejar, + compat_cookiejar_Cookie, compat_cookies, + compat_etree_Element, compat_etree_fromstring, compat_getpass, compat_integer_types, @@ -44,6 +45,7 @@ from ..utils import ( compiled_regex_type, determine_ext, determine_protocol, + dict_get, error_to_compat_str, ExtractorError, extract_attributes, @@ -56,13 +58,18 @@ from ..utils import ( JSON_LD_RE, mimetype2ext, orderedSet, + parse_bitrate, parse_codecs, parse_duration, parse_iso8601, parse_m3u8_attributes, + parse_resolution, RegexNotFoundError, sanitized_Request, sanitize_filename, + str_or_none, + str_to_int, + strip_or_none, unescapeHTML, unified_strdate, unified_timestamp, @@ -108,7 +115,13 @@ class InfoExtractor(object): for RTMP - RTMP URL, for HLS - URL of the M3U8 media playlist, for HDS - URL of the F4M manifest, - for DASH - URL of the MPD manifest, + for DASH + - HTTP URL to plain file media (in case of + unfragmented media) + - URL of the MPD manifest or base URL + representing the media if MPD manifest + is parsed from a string (in case of + fragmented media) for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of @@ -209,7 +222,7 @@ class InfoExtractor(object): * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) - * "resolution" (optional, string "{width}x{height"}, + * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) thumbnail: Full URL to a video thumbnail image. @@ -532,11 +545,11 @@ class InfoExtractor(object): raise ExtractorError('An extractor error has occurred.', cause=e) def __maybe_fake_ip_and_retry(self, countries): - if (not self._downloader.params.get('geo_bypass_country', None) and - self._GEO_BYPASS and - self._downloader.params.get('geo_bypass', True) and - not self._x_forwarded_for_ip and - countries): + if (not self._downloader.params.get('geo_bypass_country', None) + and self._GEO_BYPASS + and self._downloader.params.get('geo_bypass', True) + and not self._x_forwarded_for_ip + and countries): country_code = random.choice(countries) self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._x_forwarded_for_ip: @@ -612,9 +625,12 @@ class InfoExtractor(object): url_or_request = update_url_query(url_or_request, query) if data is not None or headers: url_or_request = sanitized_Request(url_or_request, data, headers) + exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error] + if hasattr(ssl, 'CertificateError'): + exceptions.append(ssl.CertificateError) try: return self._downloader.urlopen(url_or_request) - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + except tuple(exceptions) as err: if isinstance(err, compat_urllib_error.HTTPError): if self.__can_accept_status_code(err, expected_status): # Retain reference to error to prevent file object from @@ -672,8 +688,8 @@ class InfoExtractor(object): def __check_blocked(self, content): first_block = content[:512] - if ('Access to this site is blocked' in content and - 'Websense' in first_block): + if ('Access to this site is blocked' in content + and 'Websense' in first_block): msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' blocked_iframe = self._html_search_regex( r'