Fix some regexes

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index a3d09a036a03761ababe08cb0bbe0a166d9faf0a..68b6338396dc84277792273db277154fe3cc4995 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1133,7 +1133,7 @@ class GenericIE(InfoExtractor):
              }
          },
          {
-            # Video.js embed
+            # Video.js embed, multiple formats
              'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
              'info_dict': {
                  'id': 'yygqldloqIk',
@@ -1148,6 +1148,19 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              },
          },
+        {
+            # Video.js embed, single format
+            'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
+            'info_dict': {
+                'id': 'watch',
+                'ext': 'mp4',
+                'title': 'Step 1 -  Good Foundation',
+                'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
          # rtl.nl embed
          {
              'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
@@ -2193,7 +2206,7 @@ class GenericIE(InfoExtractor):
          # And then there are the jokers who advertise that they use RTA,
          # but actually don't.
          AGE_LIMIT_MARKERS = [
-            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
+            r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
          ]
          if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
              age_limit = 18
@@ -2900,12 +2913,14 @@ class GenericIE(InfoExtractor):
  
          # Video.js embed
          mobj = re.search(
-            r'(?s)\bvideojs\s*\(.+?\bplayer\.src\s*\(\s*(\[.+?\])\s*\)\s*;',
+            r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
              webpage)
          if mobj is not None:
              sources = self._parse_json(
                  mobj.group(1), video_id, transform_source=js_to_json,
                  fatal=False) or []
+            if not isinstance(sources, list):
+                sources = [sources]
              formats = []
              for source in sources:
                  src = source.get('src')