[downloader/http] Retry download when urlopen times out (#26603) (refs #10935)

[youtube-dl] / youtube_dl / extractor / porncom.py
diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py

index d85e0294df62d7540304f2a8e87c4f989fcc2e07..5726cab3ae6763d7466d013898f71925966bed6d 100644 (file)
--- a/youtube_dl/extractor/porncom.py
+++ b/youtube_dl/extractor/porncom.py
@@ -22,7 +22,7 @@ class PornComIE(InfoExtractor):
              'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec',
              'ext': 'mp4',
              'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec',
              'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec',
              'ext': 'mp4',
              'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
              'duration': 551,
              'view_count': int,
              'age_limit': 18,
              'duration': 551,
              'view_count': int,
              'age_limit': 18,
@@ -43,7 +43,8 @@ class PornComIE(InfoExtractor):
  
          config = self._parse_json(
              self._search_regex(
  
          config = self._parse_json(
              self._search_regex(
-                r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=',
+                (r'=\s*({.+?})\s*;\s*v1ar\b',
+                 r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='),
                  webpage, 'config', default='{}'),
              display_id, transform_source=js_to_json, fatal=False)
  
                  webpage, 'config', default='{}'),
              display_id, transform_source=js_to_json, fatal=False)
  
@@ -69,7 +70,7 @@ class PornComIE(InfoExtractor):
                  'height': int(height),
                  'filesize_approx': parse_filesize(filesize),
              } for format_url, height, filesize in re.findall(
                  'height': int(height),
                  'filesize_approx': parse_filesize(filesize),
              } for format_url, height, filesize in re.findall(
-                r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<',
+                r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<',
                  webpage)]
              thumbnail = None
              duration = None
                  webpage)]
              thumbnail = None
              duration = None
@@ -77,12 +78,14 @@ class PornComIE(InfoExtractor):
          self._sort_formats(formats)
  
          view_count = str_to_int(self._search_regex(
          self._sort_formats(formats)
  
          view_count = str_to_int(self._search_regex(
-            r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
+            (r'Views:\s*</span>\s*<span>\s*([\d,.]+)',
+             r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage,
              'view count', fatal=False))
  
          def extract_list(kind):
              s = self._search_regex(
              'view count', fatal=False))
  
          def extract_list(kind):
              s = self._search_regex(
-                r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
+                (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(),
+                 r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()),
                  webpage, kind, fatal=False)
              return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
  
                  webpage, kind, fatal=False)
              return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')