Fix "invalid escape sequences" error on Python 3.6
[youtube-dl] / youtube_dl / extractor / pbs.py
index 09aef7fb9f455d8d3247350b26a6d66043877605..6baed773fc6bf741a69f1baf222148065ef169c4 100644 (file)
@@ -236,7 +236,7 @@ class PBSIE(InfoExtractor):
                 'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
                 'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
                 'duration': 6559,
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
         {
@@ -249,7 +249,7 @@ class PBSIE(InfoExtractor):
                 'description': 'md5:c741d14e979fc53228c575894094f157',
                 'title': 'NOVA - Killer Typhoon',
                 'duration': 3172,
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                 'upload_date': '20140122',
                 'age_limit': 10,
             },
@@ -270,7 +270,7 @@ class PBSIE(InfoExtractor):
                 'title': 'American Experience - Death and the Civil War, Chapter 1',
                 'description': 'md5:67fa89a9402e2ee7d08f53b920674c18',
                 'duration': 682,
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
             },
             'params': {
                 'skip_download': True,  # requires ffmpeg
@@ -286,7 +286,7 @@ class PBSIE(InfoExtractor):
                 'title': 'FRONTLINE - United States of Secrets (Part One)',
                 'description': 'md5:55756bd5c551519cc4b7703e373e217e',
                 'duration': 6851,
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
         {
@@ -302,7 +302,7 @@ class PBSIE(InfoExtractor):
                 'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
                 'description': 'md5:c0ff7475a4b70261c7e58f493c2792a5',
                 'duration': 1480,
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
             },
         },
         {
@@ -315,7 +315,7 @@ class PBSIE(InfoExtractor):
                 'title': 'FRONTLINE - The Atomic Artists',
                 'description': 'md5:f677e4520cfacb4a5ce1471e31b57800',
                 'duration': 723,
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
             },
             'params': {
                 'skip_download': True,  # requires ffmpeg
@@ -330,7 +330,7 @@ class PBSIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'FRONTLINE - Netanyahu at War',
                 'duration': 6852,
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                 'formats': 'mincount:8',
             },
         },
@@ -350,6 +350,15 @@ class PBSIE(InfoExtractor):
         410: 'This video has expired and is no longer available for online streaming.',
     }
 
+    def _real_initialize(self):
+        cookie = (self._download_json(
+            'http://localization.services.pbs.org/localize/auto/cookie/',
+            None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie')
+        if cookie:
+            station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station')
+            if station:
+                self._set_cookie('.pbs.org', 'pbsol.station', station)
+
     def _extract_webpage(self, url):
         mobj = re.match(self._VALID_URL, url)
 
@@ -476,7 +485,8 @@ class PBSIE(InfoExtractor):
 
             redirect_info = self._download_json(
                 '%s?format=json' % redirect['url'], display_id,
-                'Downloading %s video url info' % (redirect_id or num))
+                'Downloading %s video url info' % (redirect_id or num),
+                headers=self.geo_verification_headers())
 
             if redirect_info['status'] == 'error':
                 raise ExtractorError(
@@ -506,12 +516,12 @@ class PBSIE(InfoExtractor):
         if http_url:
             for m3u8_format in m3u8_formats:
                 bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None)
-                # lower qualities(150k and 192k) are not available as http formats
-                # https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
-                # we will try to extract any http format higher than than the lowest quality documented in
-                # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
-                # as there also undocumented http formats formats(4500k and 6500k)
-                # http://www.pbs.org/video/2365815229/
+                # Lower qualities (150k and 192k) are not available as HTTP formats (see [1]),
+                # we won't try extracting them.
+                # Since summer 2016 higher quality formats (4500k and 6500k) are also available
+                # albeit they are not documented in [2].
+                # 1. https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
+                # 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
                 if not bitrate or int(bitrate) < 400:
                     continue
                 f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url)
@@ -558,7 +568,7 @@ class PBSIE(InfoExtractor):
         # Try turning it to 'program - title' naming scheme if possible
         alt_title = info.get('program', {}).get('title')
         if alt_title:
-            info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title'])
+            info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + r'[\s\-:]+', '', info['title'])
 
         description = info.get('description') or info.get(
             'program', {}).get('description') or description