Merge branch 'sohu_fix' of https://github.com/yan12125/youtube-dl into yan12125-sohu_fix

author Sergey M․ <dstftw@gmail.com>

Tue, 17 Mar 2015 15:18:36 +0000 (21:18 +0600)

committer Sergey M․ <dstftw@gmail.com>

Tue, 17 Mar 2015 15:18:36 +0000 (21:18 +0600)
author Sergey M․ <dstftw@gmail.com>
Tue, 17 Mar 2015 15:18:36 +0000 (21:18 +0600)
committer Sergey M․ <dstftw@gmail.com>
Tue, 17 Mar 2015 15:18:36 +0000 (21:18 +0600)
diff --combined test/test_utils.py

index 8f790bf0a7853e4fd3ed391a5169e50da2600a5b,e02069c4dfe3c9b9b4aec5d1838ac726776dbc66..4f0ffd4824506f2f171e8b0e374d034d6ae8ccea
--- 1/test/test_utils.py
--- 2/test/test_utils.py
+++ b/test/test_utils.py
@@@ -38,7 -38,6 +38,7 @@@ from youtube_dl.utils import 
       parse_iso8601,
       read_batch_urls,
       sanitize_filename,
+ +    sanitize_path,
       shell_quote,
       smuggle_url,
       str_to_int,
@@@ -55,6 -54,7 +55,7 @@@
       xpath_with_ns,
       render_table,
       match_str,
+     url_sanitize_consecutive_slashes,
   )
   
   
@@@ -132,42 -132,6 +133,42 @@@ class TestUtil(unittest.TestCase)
           self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
           self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
   
+ +    def test_sanitize_path(self):
+ +        if sys.platform != 'win32':
+ +            return
+ +
+ +        self.assertEqual(sanitize_path('abc'), 'abc')
+ +        self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
+ +        self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
+ +        self.assertEqual(sanitize_path('abc|def'), 'abc#def')
+ +        self.assertEqual(sanitize_path('<>:"|?*'), '#######')
+ +        self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
+ +        self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
+ +
+ +        self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
+ +        self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
+ +
+ +        self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+ +        self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
+ +        self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
+ +        self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+ +
+ +        self.assertEqual(
+ +            sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
+ +            'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
+ +
+ +        self.assertEqual(
+ +            sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
+ +            'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
+ +        self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
+ +        self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
+ +        self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+ +
+ +        self.assertEqual(sanitize_path('../abc'), '..\\abc')
+ +        self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
+ +        self.assertEqual(sanitize_path('./abc'), 'abc')
+ +        self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+ +
       def test_ordered_set(self):
           self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
           self.assertEqual(orderedSet([]), [])
@@@ -538,6 -502,21 +539,21 @@@ ffmpeg version 2.4.4 Copyright (c) 2000
               'like_count > 100 & dislike_count <? 50 & description',
               {'like_count': 190, 'dislike_count': 10}))
   
+     def test_url_sanitize_consecutive_slashes(self):
+         self.assertEqual(url_sanitize_consecutive_slashes(
+             'http://hostname/foo//bar/filename.html'),
+             'http://hostname/foo/bar/filename.html')
+         self.assertEqual(url_sanitize_consecutive_slashes(
+             'http://hostname//foo/bar/filename.html'),
+             'http://hostname/foo/bar/filename.html')
+         self.assertEqual(url_sanitize_consecutive_slashes(
+             'http://hostname//'), 'http://hostname/')
+         self.assertEqual(url_sanitize_consecutive_slashes(
+             'http://hostname/foo/bar/filename.html'),
+             'http://hostname/foo/bar/filename.html')
+         self.assertEqual(url_sanitize_consecutive_slashes(
+             'http://hostname/'), 'http://hostname/')
+ 
   
   if __name__ == '__main__':
       unittest.main()
diff --combined youtube_dl/utils.py

index c3135effc18d0df51982787933c06ceaa851e666,ef14f9a367197164ad15fc0ab6d15f47ef2c9b59..e82e3998a7c30d8ae14c9561b51946df0cbebcff
--- 1/youtube_dl/utils.py
--- 2/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@@ -252,12 -252,15 +252,12 @@@ def sanitize_open(filename, open_mode)
               raise
   
           # In case of error, try to remove win32 forbidden chars
- -        alt_filename = os.path.join(
- -            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
- -            for path_part in os.path.split(filename)
- -        )
+ +        alt_filename = sanitize_path(filename)
           if alt_filename == filename:
               raise
           else:
               # An exception here should be caught in the caller
- -            stream = open(encodeFilename(filename), open_mode)
+ +            stream = open(encodeFilename(alt_filename), open_mode)
               return (stream, alt_filename)
   
   
@@@ -308,24 -311,6 +308,24 @@@ def sanitize_filename(s, restricted=Fal
       return result
   
   
+ +def sanitize_path(s):
+ +    """Sanitizes and normalizes path on Windows"""
+ +    if sys.platform != 'win32':
+ +        return s
+ +    drive, _ = os.path.splitdrive(s)
+ +    unc, _ = os.path.splitunc(s)
+ +    unc_or_drive = unc or drive
+ +    norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
+ +    if unc_or_drive:
+ +        norm_path.pop(0)
+ +    sanitized_path = [
+ +        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+ +        for path_part in norm_path]
+ +    if unc_or_drive:
+ +        sanitized_path.insert(0, unc_or_drive + os.path.sep)
+ +    return os.path.join(*sanitized_path)
+ +
+ +
   def orderedSet(iterable):
       """ Remove all duplicates from the input iterable """
       res = []
@@@ -1804,3 -1789,18 +1804,18 @@@ class PerRequestProxyHandler(compat_url
               return None  # No Proxy
           return compat_urllib_request.ProxyHandler.proxy_open(
               self, req, proxy, type)
+ 
+ 
+ def url_sanitize_consecutive_slashes(url):
+     """Sanitize URLs with consecutive slashes
+ 
+     For example, transform both
+         http://hostname/foo//bar/filename.html
+     and
+         http://hostname//foo/bar/filename.html
+     into
+         http://hostname/foo/bar/filename.html
+     """
+     parsed_url = list(compat_urlparse.urlparse(url))
+     parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
+     return compat_urlparse.urlunparse(parsed_url)
author	Sergey M․ <dstftw@gmail.com>
	Tue, 17 Mar 2015 15:18:36 +0000 (21:18 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Tue, 17 Mar 2015 15:18:36 +0000 (21:18 +0600)
		1	2
test/test_utils.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/utils.py	patch \|	diff1 \|	diff2 \|	blob \| history