[bitchute] Fix extraction (closes #18567)
authorSergey M․ <dstftw@gmail.com>
Tue, 1 Jan 2019 11:12:44 +0000 (18:12 +0700)
committerSergey M․ <dstftw@gmail.com>
Tue, 1 Jan 2019 11:12:44 +0000 (18:12 +0700)
youtube_dl/extractor/bitchute.py

index 43b4732aa0f1c1dcc721f93134d00b439562f418..aa034355a94b3142c1f3de663114780739767326 100644 (file)
@@ -5,7 +5,10 @@ import itertools
 import re
 
 from .common import InfoExtractor
-from ..utils import urlencode_postdata
+from ..utils import (
+    orderedSet,
+    urlencode_postdata,
+)
 
 
 class BitChuteIE(InfoExtractor):
@@ -43,10 +46,15 @@ class BitChuteIE(InfoExtractor):
             'description', webpage, 'title',
             default=None) or self._og_search_description(webpage)
 
+        format_urls = []
+        for mobj in re.finditer(
+                r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
+            format_urls.append(mobj.group('url'))
+        format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
+
         formats = [
-            {'url': mobj.group('url')}
-            for mobj in re.finditer(
-                r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
+            {'url': format_url}
+            for format_url in orderedSet(format_urls)]
         self._sort_formats(formats)
 
         description = self._html_search_regex(