[openload] fix extraction(closes #10408)
[youtube-dl] / youtube_dl / extractor / common.py
index 96815099d0a2ef073cb724414ebe7629eca2dabe..4252d682563f9831a33225ff3dc906aa27f0eb01 100644 (file)
@@ -323,10 +323,18 @@ class InfoExtractor(object):
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
 
-    _BYPASS_GEO attribute may be set to False in order to disable
+    _GEO_BYPASS attribute may be set to False in order to disable
     geo restriction bypass mechanisms for a particular extractor.
     Though it won't disable explicit geo restriction bypass based on
-    country code provided with bypass_geo_restriction_as_country.
+    country code provided with geo_bypass_country. (experimental)
+
+    _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+    countries for this extractor. One of these countries will be used by
+    geo restriction bypass mechanism right away in order to bypass
+    geo restriction, of course, if the mechanism is not disabled. (experimental)
+
+    NB: both these geo attributes are experimental and may change in future
+    or be completely removed.
 
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
@@ -335,7 +343,8 @@ class InfoExtractor(object):
     _ready = False
     _downloader = None
     _x_forwarded_for_ip = None
-    _BYPASS_GEO = True
+    _GEO_BYPASS = True
+    _GEO_COUNTRIES = None
     _WORKING = True
 
     def __init__(self, downloader=None):
@@ -370,32 +379,58 @@ class InfoExtractor(object):
 
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
-        if not self._x_forwarded_for_ip:
-            country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None)
-            if country_code:
-                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+        self._initialize_geo_bypass(self._GEO_COUNTRIES)
         if not self._ready:
             self._real_initialize()
             self._ready = True
 
+    def _initialize_geo_bypass(self, countries):
+        """
+        Initialize geo restriction bypass mechanism.
+
+        This method is used to initialize geo bypass mechanism based on faking
+        X-Forwarded-For HTTP header. A random country from provided country list
+        is selected and a random IP belonging to this country is generated. This
+        IP will be passed as X-Forwarded-For HTTP header in all subsequent
+        HTTP requests.
+
+        This method will be used for initial geo bypass mechanism initialization
+        during the instance initialization with _GEO_COUNTRIES.
+
+        You may also manually call it from extractor's code if geo countries
+        information is not available beforehand (e.g. obtained during
+        extraction) or due to some another reason.
+        """
+        if not self._x_forwarded_for_ip:
+            country_code = self._downloader.params.get('geo_bypass_country', None)
+            # If there is no explicit country for geo bypass specified and
+            # the extractor is known to be geo restricted let's fake IP
+            # as X-Forwarded-For right away.
+            if (not country_code and
+                    self._GEO_BYPASS and
+                    self._downloader.params.get('geo_bypass', True) and
+                    countries):
+                country_code = random.choice(countries)
+            if country_code:
+                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+                if self._downloader.params.get('verbose', False):
+                    self._downloader.to_stdout(
+                        '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
+                        % (self._x_forwarded_for_ip, country_code.upper()))
+
     def extract(self, url):
         """Extracts URL information and returns it in list of dicts."""
         try:
             for _ in range(2):
                 try:
                     self.initialize()
-                    return self._real_extract(url)
+                    ie_result = self._real_extract(url)
+                    if self._x_forwarded_for_ip:
+                        ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+                    return ie_result
                 except GeoRestrictedError as e:
-                    if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and
-                            self._BYPASS_GEO and
-                            self._downloader.params.get('bypass_geo_restriction', True) and
-                            not self._x_forwarded_for_ip and
-                            e.countries):
-                        self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
-                        if self._x_forwarded_for_ip:
-                            self.report_warning(
-                                'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
-                            continue
+                    if self.__maybe_fake_ip_and_retry(e.countries):
+                        continue
                     raise
         except ExtractorError:
             raise
@@ -404,6 +439,21 @@ class InfoExtractor(object):
         except (KeyError, StopIteration) as e:
             raise ExtractorError('An extractor error has occurred.', cause=e)
 
+    def __maybe_fake_ip_and_retry(self, countries):
+        if (not self._downloader.params.get('geo_bypass_country', None) and
+                self._GEO_BYPASS and
+                self._downloader.params.get('geo_bypass', True) and
+                not self._x_forwarded_for_ip and
+                countries):
+            country_code = random.choice(countries)
+            self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+            if self._x_forwarded_for_ip:
+                self.report_warning(
+                    'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+                    % (self._x_forwarded_for_ip, country_code.upper()))
+                return True
+        return False
+
     def set_downloader(self, downloader):
         """Sets the downloader for this IE."""
         self._downloader = downloader