Improve geo bypass mechanism
[youtube-dl] / youtube_dl / extractor / itv.py
index d029609c3fdc56a5372e938baa5e90acb5290c45..021c6b2787d8748544a1e2eb2ccd6161ffcd8266 100644 (file)
@@ -6,7 +6,10 @@ import xml.etree.ElementTree as etree
 import json
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_str,
+    compat_etree_register_namespace,
+)
 from ..utils import (
     extract_attributes,
     xpath_with_ns,
@@ -20,7 +23,8 @@ from ..utils import (
 
 
 class ITVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-z]+)'
+    _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+    _GEO_COUNTRIES = ['GB']
     _TEST = {
         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
         'info_dict': {
@@ -47,7 +51,7 @@ class ITVIE(InfoExtractor):
             'com': 'http://schemas.itv.com/2009/05/Common',
         }
         for ns, full_ns in ns_map.items():
-            etree.register_namespace(ns, full_ns)
+            compat_etree_register_namespace(ns, full_ns)
 
         def _add_ns(name):
             return xpath_with_ns(name, ns_map)
@@ -95,10 +99,15 @@ class ITVIE(InfoExtractor):
             headers=headers, data=etree.tostring(req_env))
         playlist = xpath_element(resp_env, './/Playlist')
         if playlist is None:
+            fault_code = xpath_text(resp_env, './/faultcode')
             fault_string = xpath_text(resp_env, './/faultstring')
+            if fault_code == 'InvalidGeoRegion':
+                self.raise_geo_restricted(
+                    msg=fault_string, countries=self._GEO_COUNTRIES)
             raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
         title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
-        media_files = xpath_element(playlist, 'VideoEntries/Video/MediaFiles', fatal=True)
+        video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
+        media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
         rtmp_url = media_files.attrib['base']
 
         formats = []
@@ -170,10 +179,21 @@ class ITVIE(InfoExtractor):
                         })
         self._sort_formats(formats)
 
+        subtitles = {}
+        for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
+            if not caption_url.text:
+                continue
+            ext = determine_ext(caption_url.text, 'ttml')
+            subtitles.setdefault('en', []).append({
+                'url': caption_url.text,
+                'ext': 'ttml' if ext == 'xml' else ext,
+            })
+
         return {
             'id': video_id,
             'title': title,
             'formats': formats,
+            'subtitles': subtitles,
             'episode_title': title,
             'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
             'series': xpath_text(playlist, 'ProgrammeTitle'),