Fix "invalid escape sequences" error on Python 3.6

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index bde65fa270fb399140e85ac63395060bd7007d2e..86dc7930771a959cc2b6f28700fc840a4893d242 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -56,10 +56,10 @@ from .dailymotion import (
  )
  from .onionstudios import OnionStudiosIE
  from .viewlift import ViewLiftEmbedIE
-from .screenwavemedia import ScreenwaveMediaIE
  from .mtv import MTVServicesEmbeddedIE
  from .pladform import PladformIE
  from .videomore import VideomoreIE
+from .webcaster import WebcasterFeedIE
  from .googledrive import GoogleDriveIE
  from .jwplatform import JWPlatformIE
  from .digiteka import DigitekaIE
@@ -73,8 +73,11 @@ from .kaltura import KalturaIE
  from .eagleplatform import EaglePlatformIE
  from .facebook import FacebookIE
  from .soundcloud import SoundcloudIE
+from .tunein import TuneInBaseIE
  from .vbox7 import Vbox7IE
  from .dbtv import DBTVIE
+from .piksel import PikselIE
+from .videa import VideaIE
  
  
  class GenericIE(InfoExtractor):
@@ -236,7 +239,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Tikibad ontruimd wegens brand',
                  'description': 'md5:05ca046ff47b931f9b04855015e163a4',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 33,
              },
              'params': {
@@ -297,7 +300,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'upload_date': '20130224',
                  'uploader_id': 'TheVerge',
-                'description': 're:^Chris Ziegler takes a look at the\.*',
+                'description': r're:^Chris Ziegler takes a look at the\.*',
                  'uploader': 'The Verge',
                  'title': 'First Firefox OS phones side-by-side',
              },
@@ -343,10 +346,10 @@ class GenericIE(InfoExtractor):
              },
              'skip': 'There is a limit of 200 free downloads / month for the test song',
          },
-        # embedded brightcove video
-        # it also tests brightcove videos that need to set the 'Referer' in the
-        # http requests
          {
+            # embedded brightcove video
+            # it also tests brightcove videos that need to set the 'Referer'
+            # in the http requests
              'add_ie': ['BrightcoveLegacy'],
              'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
              'info_dict': {
@@ -360,6 +363,24 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              },
          },
+        {
+            # embedded with itemprop embedURL and video id spelled as `idVideo`
+            'add_id': ['BrightcoveLegacy'],
+            'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+            'info_dict': {
+                'id': '5255628253001',
+                'ext': 'mp4',
+                'title': 'md5:37c519b1128915607601e75a87995fc0',
+                'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+                'uploader': 'BFM BUSINESS',
+                'uploader_id': '876450612001',
+                'timestamp': 1482255315,
+                'upload_date': '20161220',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
          {
              # https://github.com/rg3/youtube-dl/issues/2253
              'url': 'http://bcove.me/i6nfkrc3',
@@ -518,7 +539,7 @@ class GenericIE(InfoExtractor):
                  'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
                  'ext': 'mp4',
                  'title': 'Ужастики, русский трейлер (2015)',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 153,
              }
          },
@@ -738,7 +759,7 @@ class GenericIE(InfoExtractor):
                  'duration': 48,
                  'timestamp': 1401537900,
                  'upload_date': '20140531',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
              },
          },
          # Wistia embed
@@ -808,6 +829,21 @@ class GenericIE(InfoExtractor):
              },
              'playlist_mincount': 7,
          },
+        # TuneIn station embed
+        {
+            'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
+            'info_dict': {
+                'id': '204146',
+                'ext': 'mp3',
+                'title': 'CNRV',
+                'location': 'Paris, France',
+                'is_live': True,
+            },
+            'params': {
+                # Live stream
+                'skip_download': True,
+            },
+        },
          # Livestream embed
          {
              'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
@@ -972,6 +1008,20 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              }
          },
+        {
+            # Kaltura embedded, some fileExt broken (#11480)
+            'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
+            'info_dict': {
+                'id': '1_sgtvehim',
+                'ext': 'mp4',
+                'title': 'Our "Standard Models" of particle physics and cosmology',
+                'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
+                'timestamp': 1321158993,
+                'upload_date': '20111113',
+                'uploader_id': 'kps1',
+            },
+            'add_ie': ['Kaltura'],
+        },
          # Eagle.Platform embed (generic URL)
          {
              'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -981,7 +1031,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Навальный вышел на свободу',
                  'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 87,
                  'view_count': int,
                  'age_limit': 0,
@@ -995,7 +1045,7 @@ class GenericIE(InfoExtractor):
                  'id': '12820',
                  'ext': 'mp4',
                  'title': "'O Sole Mio",
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 216,
                  'view_count': int,
              },
@@ -1008,7 +1058,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
                  'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                  'duration': 694,
                  'age_limit': 0,
              },
@@ -1020,7 +1070,7 @@ class GenericIE(InfoExtractor):
                  'id': '3519514',
                  'ext': 'mp4',
                  'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
-                'thumbnail': 're:^https?://.*\.png$',
+                'thumbnail': r're:^https?://.*\.png$',
                  'duration': 45.115,
              },
          },
@@ -1103,7 +1153,7 @@ class GenericIE(InfoExtractor):
                  'id': '300346',
                  'ext': 'mp4',
                  'title': '中一中男師變性 全校師生力挺',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
              },
              'params': {
                  # m3u8 download
@@ -1149,7 +1199,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Sauvons les abeilles ! - Le débat',
                  'description': 'md5:d9082128b1c5277987825d684939ca26',
-                'thumbnail': 're:^https?://.*\.jpe?g$',
+                'thumbnail': r're:^https?://.*\.jpe?g$',
                  'timestamp': 1434970506,
                  'upload_date': '20150622',
                  'uploader': 'Public Sénat',
@@ -1163,7 +1213,7 @@ class GenericIE(InfoExtractor):
                  'id': '2855',
                  'ext': 'mp4',
                  'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
-                'thumbnail': 're:^https?://.*\.jpe?g$',
+                'thumbnail': r're:^https?://.*\.jpe?g$',
                  'uploader': 'ClickHole',
                  'uploader_id': 'clickhole',
              }
@@ -1189,16 +1239,6 @@ class GenericIE(InfoExtractor):
                  'duration': 248.667,
              },
          },
-        # ScreenwaveMedia embed
-        {
-            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
-            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
-            'info_dict': {
-                'id': 'cinemasnob-55d26273809dd',
-                'ext': 'mp4',
-                'title': 'cinemasnob',
-            },
-        },
          # BrightcoveInPageEmbed embed
          {
              'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
@@ -1399,6 +1439,15 @@ class GenericIE(InfoExtractor):
              },
              'playlist_mincount': 3,
          },
+        {
+            # Videa embeds
+            'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+            'info_dict': {
+                'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+                'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+            },
+            'playlist_mincount': 2,
+        },
          # {
          #     # TODO: find another test
          #     # http://schema.org/VideoObject
@@ -2055,6 +2104,11 @@ class GenericIE(InfoExtractor):
          if soundcloud_urls:
              return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
  
+        # Look for tunein player
+        tunein_urls = TuneInBaseIE._extract_urls(webpage)
+        if tunein_urls:
+            return _playlist_from_matches(tunein_urls)
+
          # Look for embedded mtvservices player
          mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
          if mtvservices_url:
@@ -2140,6 +2194,11 @@ class GenericIE(InfoExtractor):
          if videomore_url:
              return self.url_result(videomore_url)
  
+        # Look for Webcaster embeds
+        webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
+        if webcaster_url:
+            return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
+
          # Look for Playwire embeds
          mobj = re.search(
              r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
@@ -2206,11 +2265,6 @@ class GenericIE(InfoExtractor):
          if jwplatform_url:
              return self.url_result(jwplatform_url, 'JWPlatform')
  
-        # Look for ScreenwaveMedia embeds
-        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
-        if mobj is not None:
-            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
-
          # Look for Digiteka embeds
          digiteka_url = DigitekaIE._extract_url(webpage)
          if digiteka_url:
@@ -2221,6 +2275,11 @@ class GenericIE(InfoExtractor):
          if arkena_url:
              return self.url_result(arkena_url, ArkenaIE.ie_key())
  
+        # Look for Piksel embeds
+        piksel_url = PikselIE._extract_url(webpage)
+        if piksel_url:
+            return self.url_result(piksel_url, PikselIE.ie_key())
+
          # Look for Limelight embeds
          mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
          if mobj:
@@ -2232,6 +2291,16 @@ class GenericIE(InfoExtractor):
              return self.url_result('limelight:%s:%s' % (
                  lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
  
+        mobj = re.search(
+            r'''(?sx)
+                <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
+                    <param[^>]+
+                        name=(["\'])flashVars\2[^>]+
+                        value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
+            ''', webpage)
+        if mobj:
+            return self.url_result('limelight:media:%s' % mobj.group('id'))
+
          # Look for AdobeTVVideo embeds
          mobj = re.search(
              r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
@@ -2320,6 +2389,11 @@ class GenericIE(InfoExtractor):
          if dbtv_urls:
              return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
  
+        # Look for Videa embeds
+        videa_urls = VideaIE._extract_urls(webpage)
+        if videa_urls:
+            return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
+
          # Looking for http://schema.org/VideoObject
          json_ld = self._search_json_ld(
              webpage, video_id, default={}, expected_type='VideoObject')