Credit @AVerwer for showroomlive (#11458)

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 3d4951ad94948ac753b21812626f94be17d9e942..cf46711b9f5556c01c8a2eba5300dbe5df14ae94 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -128,7 +128,13 @@ DATE_FORMATS = (
      '%d %B %Y',
      '%d %b %Y',
      '%B %d %Y',
+    '%B %dst %Y',
+    '%B %dnd %Y',
+    '%B %dth %Y',
      '%b %d %Y',
+    '%b %dst %Y',
+    '%b %dnd %Y',
+    '%b %dth %Y',
      '%b %dst %Y %I:%M',
      '%b %dnd %Y %I:%M',
      '%b %dth %Y %I:%M',
@@ -137,6 +143,7 @@ DATE_FORMATS = (
      '%Y/%m/%d',
      '%Y/%m/%d %H:%M',
      '%Y/%m/%d %H:%M:%S',
+    '%Y-%m-%d %H:%M',
      '%Y-%m-%d %H:%M:%S',
      '%Y-%m-%d %H:%M:%S.%f',
      '%d.%m.%Y %H:%M',
@@ -501,7 +508,7 @@ def sanitize_path(s):
      if drive_or_unc:
          norm_path.pop(0)
      sanitized_path = [
-        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
+        path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
          for path_part in norm_path]
      if drive_or_unc:
          sanitized_path.insert(0, drive_or_unc + os.path.sep)
@@ -1183,7 +1190,7 @@ def date_from_str(date_str):
          return today
      if date_str == 'yesterday':
          return today - datetime.timedelta(days=1)
-    match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+    match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
          time = int(match.group('time'))
@@ -1700,6 +1707,16 @@ def base_url(url):
      return re.match(r'https?://[^?#&]+/', url).group()
  
  
+def urljoin(base, path):
+    if not isinstance(path, compat_str) or not path:
+        return None
+    if re.match(r'^(?:https?:)?//', path):
+        return path
+    if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
+        return None
+    return compat_urlparse.urljoin(base, path)
+
+
  class HEADRequest(compat_urllib_request.Request):
      def get_method(self):
          return 'HEAD'
@@ -1756,7 +1773,7 @@ def parse_duration(s):
      s = s.strip()
  
      days, hours, mins, secs, ms = [None] * 5
-    m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?$', s)
+    m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
      if m:
          days, hours, mins, secs, ms = m.groups()
      else:
@@ -1773,11 +1790,11 @@ def parse_duration(s):
                  )?
                  (?:
                      (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
-                )?$''', s)
+                )?Z?$''', s)
          if m:
              days, hours, mins, secs, ms = m.groups()
          else:
-            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s)
+            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
              if m:
                  hours, mins = m.groups()
              else: