Improve geo bypass mechanism
authorSergey M․ <dstftw@gmail.com>
Sat, 18 Feb 2017 20:53:23 +0000 (03:53 +0700)
committerSergey M <dstftw@gmail.com>
Sat, 18 Feb 2017 21:10:08 +0000 (05:10 +0800)
* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction

13 files changed:
youtube_dl/extractor/common.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/go.py
youtube_dl/extractor/itv.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/ondemandkorea.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/srgssr.py
youtube_dl/extractor/svt.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/viki.py
youtube_dl/utils.py

index 6eb6a25b8ef85678681eef1e58468259bedb166f..272da74b65997fcc02153159cd512c2fb7c0f36a 100644 (file)
@@ -323,10 +323,15 @@ class InfoExtractor(object):
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
 
-    _BYPASS_GEO attribute may be set to False in order to disable
+    _GEO_BYPASS attribute may be set to False in order to disable
     geo restriction bypass mechanisms for a particular extractor.
     Though it won't disable explicit geo restriction bypass based on
-    country code provided with geo_bypass_country.
+    country code provided with geo_bypass_country. (experimental)
+
+    _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+    countries for this extractor. One of these countries will be used by
+    geo restriction bypass mechanism right away in order to bypass
+    geo restriction, of course, if the mechanism is not disabled. (experimental)
 
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
@@ -335,7 +340,8 @@ class InfoExtractor(object):
     _ready = False
     _downloader = None
     _x_forwarded_for_ip = None
-    _BYPASS_GEO = True
+    _GEO_BYPASS = True
+    _GEO_COUNTRIES = None
     _WORKING = True
 
     def __init__(self, downloader=None):
@@ -370,13 +376,27 @@ class InfoExtractor(object):
 
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
+        self.__initialize_geo_bypass()
+        if not self._ready:
+            self._real_initialize()
+            self._ready = True
+
+    def __initialize_geo_bypass(self):
         if not self._x_forwarded_for_ip:
             country_code = self._downloader.params.get('geo_bypass_country', None)
+            # If there is no explicit country for geo bypass specified and
+            # the extractor is known to be geo restricted let's fake IP
+            # as X-Forwarded-For right away.
+            if (not country_code and
+                    self._GEO_BYPASS and
+                    self._downloader.params.get('geo_bypass', True) and
+                    self._GEO_COUNTRIES):
+                country_code = random.choice(self._GEO_COUNTRIES)
             if country_code:
                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
-        if not self._ready:
-            self._real_initialize()
-            self._ready = True
+                if self._downloader.params.get('verbose', False):
+                    self._downloader.to_stdout(
+                        '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
 
     def extract(self, url):
         """Extracts URL information and returns it in list of dicts."""
@@ -389,16 +409,8 @@ class InfoExtractor(object):
                         ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
                     return ie_result
                 except GeoRestrictedError as e:
-                    if (not self._downloader.params.get('geo_bypass_country', None) and
-                            self._BYPASS_GEO and
-                            self._downloader.params.get('geo_bypass', True) and
-                            not self._x_forwarded_for_ip and
-                            e.countries):
-                        self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
-                        if self._x_forwarded_for_ip:
-                            self.report_warning(
-                                'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
-                            continue
+                    if self.__maybe_fake_ip_and_retry(e.countries):
+                        continue
                     raise
         except ExtractorError:
             raise
@@ -407,6 +419,19 @@ class InfoExtractor(object):
         except (KeyError, StopIteration) as e:
             raise ExtractorError('An extractor error has occurred.', cause=e)
 
+    def __maybe_fake_ip_and_retry(self, countries):
+        if (not self._downloader.params.get('geo_bypass_country', None) and
+                self._GEO_BYPASS and
+                self._downloader.params.get('geo_bypass', True) and
+                not self._x_forwarded_for_ip and
+                countries):
+            self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
+            if self._x_forwarded_for_ip:
+                self.report_warning(
+                    'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
+                return True
+        return False
+
     def set_downloader(self, downloader):
         """Sets the downloader for this IE."""
         self._downloader = downloader
index 755db806aba06e2cb1f46fc6b4dc5ff9c275976e..e7abc888988e9807ee46abc4d4e44f8276b9a084 100644 (file)
@@ -20,6 +20,7 @@ from ..utils import (
 class DramaFeverBaseIE(AMPIE):
     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
     _NETRC_MACHINE = 'dramafever'
+    _GEO_COUNTRIES = ['US', 'CA']
 
     _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
 
@@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE):
             if isinstance(e.cause, compat_HTTPError):
                 self.raise_geo_restricted(
                     msg='Currently unavailable in your country',
-                    countries=['US', 'CA'])
+                    countries=self._GEO_COUNTRIES)
             raise
 
         series_id, episode_number = video_id.split('.')
index ec902c670174eb7aedca3417b0cedc8621baea42..b205bfc7c99922bda00326bf4ac3caeab7ac2c95 100644 (file)
@@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
         }
     }
     _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+    _GEO_COUNTRIES = ['US']
     _TESTS = [{
         'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
         'info_dict': {
@@ -104,7 +105,7 @@ class GoIE(AdobePassIE):
                         for error in errors:
                             if error.get('code') == 1002:
                                 self.raise_geo_restricted(
-                                    error['message'], countries=['US'])
+                                    error['message'], countries=self._GEO_COUNTRIES)
                         error_message = ', '.join([error['message'] for error in errors])
                         raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
                     asset_url += '?' + entitlement['uplynkData']['sessionKey']
index aabde15f3ff2acf09d5746b749388d5f29eb1d74..021c6b2787d8748544a1e2eb2ccd6161ffcd8266 100644 (file)
@@ -24,6 +24,7 @@ from ..utils import (
 
 class ITVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+    _GEO_COUNTRIES = ['GB']
     _TEST = {
         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
         'info_dict': {
@@ -101,7 +102,8 @@ class ITVIE(InfoExtractor):
             fault_code = xpath_text(resp_env, './/faultcode')
             fault_string = xpath_text(resp_env, './/faultstring')
             if fault_code == 'InvalidGeoRegion':
-                self.raise_geo_restricted(msg=fault_string, countries=['GB'])
+                self.raise_geo_restricted(
+                    msg=fault_string, countries=self._GEO_COUNTRIES)
             raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
         title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
         video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
index 78ece33e12ab869f746991137c7b7b456eb08c64..13af9ed1f7395e178774036ea8ae68dab62c8570 100644 (file)
@@ -14,6 +14,7 @@ from ..utils import (
 
 
 class NRKBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['NO']
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor):
             # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
             if 'IsGeoBlocked' in message_type:
                 self.raise_geo_restricted(
-                    msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
+                    msg=MESSAGES.get('ProgramIsGeoBlocked'),
+                    countries=self._GEO_COUNTRIES)
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, MESSAGES.get(
                     message_type, message_type)),
index 0c85d549eae76134337abed9eb1901ef4388258a..df1ce3c1db1eaa22d03609ddb55748e404b1f4a9 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
 
 class OnDemandKoreaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
+    _GEO_COUNTRIES = ['US', 'CA']
     _TEST = {
         'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
         'info_dict': {
@@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor):
         if 'msg_block_01.png' in webpage:
             self.raise_geo_restricted(
                 msg='This content is not available in your region',
-                countries=['US', 'CA'])
+                countries=self._GEO_COUNTRIES)
 
         if 'This video is only available to ODK PLUS members.' in webpage:
             raise ExtractorError(
index 64f47bae303addd16573a36348d61aa08c133dd7..3e51b4dd746d88ead54bc9ca469113f1f3cf14d2 100644 (file)
@@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
         )
     ''' % '|'.join(list(zip(*_STATIONS))[0])
 
+    _GEO_COUNTRIES = ['US']
+
     _TESTS = [
         {
             'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@@ -492,7 +494,8 @@ class PBSIE(InfoExtractor):
                 message = self._ERRORS.get(
                     redirect_info['http_code'], redirect_info['message'])
                 if redirect_info['http_code'] == 403:
-                    self.raise_geo_restricted(msg=message, countries=['US'])
+                    self.raise_geo_restricted(
+                        msg=message, countries=self._GEO_COUNTRIES)
                 raise ExtractorError(
                     '%s said: %s' % (self.IE_NAME, message), expected=True)
 
index a35a0a5388753a5d845ac12b22dbcb314dcee72e..bb73eb1d5c2adb848b1b8af13c7b70a0d10f919d 100644 (file)
@@ -14,7 +14,8 @@ from ..utils import (
 
 class SRGSSRIE(InfoExtractor):
     _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
+    _GEO_COUNTRIES = ['CH']
 
     _ERRORS = {
         'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor):
         if media_data.get('block') and media_data['block'] in self._ERRORS:
             message = self._ERRORS[media_data['block']]
             if media_data['block'] == 'GEOBLOCK':
-                self.raise_geo_restricted(msg=message, countries=['CH'])
+                self.raise_geo_restricted(
+                    msg=message, countries=self._GEO_COUNTRIES)
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, message), expected=True)
 
index f2a2200bf87180f47ada72857a7eb617e0e0138e..9e2c9fcc69946b1933d3aa57c18a1a3b9a4c3dd4 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
 
 
 class SVTBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['SE']
     def _extract_video(self, video_info, video_id):
         formats = []
         for vr in video_info['videoReferences']:
@@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor):
                 })
         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
             self.raise_geo_restricted(
-                'This video is only available in Sweden', countries=['SE'])
+                'This video is only available in Sweden',
+                countries=self._GEO_COUNTRIES)
         self._sort_formats(formats)
 
         subtitles = {}
index f86d804c1677e6cc1eceff03cfe3d76affbf2e7a..8152acefd099da43e18bca3e5be579d24dd29062 100644 (file)
@@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
                         )
                         (?P<id>[\da-fA-F]+)
                     '''
+    _GEO_COUNTRIES = ['BG']
     _TESTS = [{
         'url': 'http://vbox7.com/play:0946fff23c',
         'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
         video_url = video['src']
 
         if '/na.mp4' in video_url:
-            self.raise_geo_restricted(countries=['BG'])
+            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
         uploader = video.get('uploader')
 
index 1709fd6bbe779a7a7b4315c1016a67d536aa14b3..0f8c156a79ff7d67958b88e488ff50548bf3c998 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 class VGTVIE(XstreamIE):
     IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
 
     _HOST_TO_APPNAME = {
         'vgtv.no': 'vgtv',
@@ -218,7 +218,8 @@ class VGTVIE(XstreamIE):
             properties = try_get(
                 data, lambda x: x['streamConfiguration']['properties'], list)
             if properties and 'geoblocked' in properties:
-                raise self.raise_geo_restricted(countries=['NO'])
+                raise self.raise_geo_restricted(
+                    countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
 
         self._sort_formats(info['formats'])
 
index 68a74e2467852b8ec35cab6c8d5a1a6f1b2b7a42..e9c8bf824c099b1dbc5fadbf93de21043b88d8c5 100644 (file)
@@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
     _APP_VERSION = '2.2.5.1428709186'
     _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
 
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
     _NETRC_MACHINE = 'viki'
 
     _token = None
index cbf7639c56393f21bd259ee30e294d1db704fb6b..17b83794a2becf272005305442094223cd054638 100644 (file)
@@ -3291,7 +3291,7 @@ class GeoUtils(object):
         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
         addr_max = addr_min | (0xffffffff >> int(preflen))
         return compat_str(socket.inet_ntoa(
-            compat_struct_pack('!I', random.randint(addr_min, addr_max))))
+            compat_struct_pack('!L', random.randint(addr_min, addr_max))))
 
 
 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):