Merge remote-tracking branch 'chrisjrn/master'

author Philipp Hagemeister <phihag@phihag.de>

Tue, 27 Nov 2012 13:55:18 +0000 (14:55 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Tue, 27 Nov 2012 13:55:18 +0000 (14:55 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Tue, 27 Nov 2012 13:55:18 +0000 (14:55 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Tue, 27 Nov 2012 13:55:18 +0000 (14:55 +0100)
diff --git a/README.md b/README.md

index 14acddbd00cc9df81c5b444fc982f4f3f6ed64c7..5cf082a7ccb7b9659f51eb9514389fb497cbd4c2 100644 (file)
--- a/README.md
+++ b/README.md
@@ -47,8 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
                               %(extractor)s for the provider (youtube, metacafe,
                               etc), %(id)s for the video id and %% for a literal
                               percent. Use - to output to stdout.
-    --restrict-filenames     Avoid some characters such as "&" and spaces in
-                             filenames
+    --restrict-filenames     Restrict filenames to only ASCII characters, and
+                             avoid "&" and spaces in filenames
      -a, --batch-file FILE    file containing URLs to download ('-' for stdin)
      -w, --no-overwrites      do not overwrite files
      -c, --continue           resume partially downloaded files
diff --git a/test/test_download.py b/test/test_download.py

index 545afb922134ab11eaf4a80b02cf9ccfd15c7742..d1d6b119b9290fb60f1ce56fa68ff1e67ce034d9 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -7,6 +7,9 @@ import json
  from youtube_dl.FileDownloader import FileDownloader
  from youtube_dl.InfoExtractors  import YoutubeIE, DailymotionIE
  from youtube_dl.InfoExtractors import  MetacafeIE, BlipTVIE
+from youtube_dl.InfoExtractors import  XVideosIE, VimeoIE
+from youtube_dl.InfoExtractors import  SoundcloudIE, StanfordOpenClassroomIE
+from youtube_dl.InfoExtractors import  CollegeHumorIE, XNXXIE
  
  
  class DownloadTest(unittest.TestCase):
@@ -30,10 +33,33 @@ class DownloadTest(unittest.TestCase):
         BLIP_URL = "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352"
         BLIP_FILE = "5779306.m4v"
  
-       XVIDEO_MD5 = ""
-       XVIDEO_URL = ""
-       XVIDEO_FILE = ""
+       XVIDEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf"
+       XVIDEO_URL = "http://www.xvideos.com/video939581/funny_porns_by_s_-1"
+       XVIDEO_FILE = "939581.flv"
  
+       VIMEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf"
+       VIMEO_URL = "http://vimeo.com/14160053"
+       VIMEO_FILE = ""
+
+       VIMEO2_MD5 = ""
+       VIMEO2_URL = "http://player.vimeo.com/video/47019590"
+       VIMEO2_FILE = ""
+
+       SOUNDCLOUD_MD5 = "ce3775768ebb6432fa8495d446a078ed"
+       SOUNDCLOUD_URL = "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy"
+       SOUNDCLOUD_FILE = "n6FLbx6ZzMiu.mp3"
+
+       STANDFORD_MD5 = "22c8206291368c4e2c9c1a307f0ea0f4"
+       STANDFORD_URL = "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100"
+       STANDFORD_FILE = "PracticalUnix_intro-environment.mp4"
+
+       COLLEGEHUMOR_MD5 = ""
+       COLLEGEHUMOR_URL = "http://www.collegehumor.com/video/6830834/mitt-romney-style-gangnam-style-parody"
+       COLLEGEHUMOR_FILE = ""
+
+       XNXX_MD5 = "5f0469c8d1dfd1bc38c8e6deb5e0a21d"
+       XNXX_URL = "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_"
+       XNXX_FILE = "1135332.flv"
  
         def test_youtube(self):
                 #let's download a file from youtube
@@ -72,6 +98,73 @@ class DownloadTest(unittest.TestCase):
                 md5_down_file = md5_for_file(DownloadTest.BLIP_FILE)
                 self.assertEqual(md5_down_file, DownloadTest.BLIP_MD5)
  
+       def test_xvideo(self):
+               with open(DownloadTest.PARAMETERS_FILE) as f:
+                       fd = FileDownloader(json.load(f))
+               fd.add_info_extractor(XVideosIE())
+               fd.download([DownloadTest.XVIDEO_URL])
+               self.assertTrue(os.path.exists(DownloadTest.XVIDEO_FILE))
+               md5_down_file = md5_for_file(DownloadTest.XVIDEO_FILE)
+               self.assertEqual(md5_down_file, DownloadTest.XVIDEO_MD5)
+
+       def test_vimeo(self):
+               #skipped for the moment produce an error
+               return
+               with open(DownloadTest.PARAMETERS_FILE) as f:
+                       fd = FileDownloader(json.load(f))
+               fd.add_info_extractor(VimeoIE())
+               fd.download([DownloadTest.VIMEO_URL])
+               self.assertTrue(os.path.exists(DownloadTest.VIMEO_FILE))
+               md5_down_file = md5_for_file(DownloadTest.VIMEO_FILE)
+               self.assertEqual(md5_down_file, DownloadTest.VIMEO_MD5)
+
+       def test_vimeo2(self):
+               #skipped for the moment produce an error
+               return
+               with open(DownloadTest.PARAMETERS_FILE) as f:
+                       fd = FileDownloader(json.load(f))
+               fd.add_info_extractor(VimeoIE())
+               fd.download([DownloadTest.VIMEO2_URL])
+               self.assertTrue(os.path.exists(DownloadTest.VIMEO2_FILE))
+               md5_down_file = md5_for_file(DownloadTest.VIMEO2_FILE)
+               self.assertEqual(md5_down_file, DownloadTest.VIMEO2_MD5)
+
+       def test_soundcloud(self):
+               with open(DownloadTest.PARAMETERS_FILE) as f:
+                       fd = FileDownloader(json.load(f))
+               fd.add_info_extractor(SoundcloudIE())
+               fd.download([DownloadTest.SOUNDCLOUD_URL])
+               self.assertTrue(os.path.exists(DownloadTest.SOUNDCLOUD_FILE))
+               md5_down_file = md5_for_file(DownloadTest.SOUNDCLOUD_FILE)
+               self.assertEqual(md5_down_file, DownloadTest.SOUNDCLOUD_MD5)
+
+       def test_standford(self):
+               with open(DownloadTest.PARAMETERS_FILE) as f:
+                       fd = FileDownloader(json.load(f))
+               fd.add_info_extractor(StanfordOpenClassroomIE())
+               fd.download([DownloadTest.STANDFORD_URL])
+               self.assertTrue(os.path.exists(DownloadTest.STANDFORD_FILE))
+               md5_down_file = md5_for_file(DownloadTest.STANDFORD_FILE)
+               self.assertEqual(md5_down_file, DownloadTest.STANDFORD_MD5)
+
+       def test_collegehumor(self):
+               with open(DownloadTest.PARAMETERS_FILE) as f:
+                       fd = FileDownloader(json.load(f))
+               fd.add_info_extractor(CollegeHumorIE())
+               fd.download([DownloadTest.COLLEGEHUMOR_URL])
+               self.assertTrue(os.path.exists(DownloadTest.COLLEGEHUMOR_FILE))
+               md5_down_file = md5_for_file(DownloadTest.COLLEGEHUMOR_FILE)
+               self.assertEqual(md5_down_file, DownloadTest.COLLEGEHUMOR_MD5)
+
+       def test_xnxx(self):
+               with open(DownloadTest.PARAMETERS_FILE) as f:
+                       fd = FileDownloader(json.load(f))
+               fd.add_info_extractor(XNXXIE())
+               fd.download([DownloadTest.XNXX_URL])
+               self.assertTrue(os.path.exists(DownloadTest.XNXX_FILE))
+               md5_down_file = md5_for_file(DownloadTest.XNXX_FILE)
+               self.assertEqual(md5_down_file, DownloadTest.XNXX_MD5)
+
         def tearDown(self):
                 if os.path.exists(DownloadTest.YOUTUBE_FILE):
                         os.remove(DownloadTest.YOUTUBE_FILE)
@@ -81,13 +174,25 @@ class DownloadTest(unittest.TestCase):
                         os.remove(DownloadTest.METACAFE_FILE)
                 if os.path.exists(DownloadTest.BLIP_FILE):
                         os.remove(DownloadTest.BLIP_FILE)
+               if os.path.exists(DownloadTest.XVIDEO_FILE):
+                       os.remove(DownloadTest.XVIDEO_FILE)
+               if os.path.exists(DownloadTest.VIMEO_FILE):
+                       os.remove(DownloadTest.VIMEO_FILE)
+               if os.path.exists(DownloadTest.SOUNDCLOUD_FILE):
+                       os.remove(DownloadTest.SOUNDCLOUD_FILE)
+               if os.path.exists(DownloadTest.STANDFORD_FILE):
+                       os.remove(DownloadTest.STANDFORD_FILE)
+               if os.path.exists(DownloadTest.COLLEGEHUMOR_FILE):
+                       os.remove(DownloadTest.COLLEGEHUMOR_FILE)
+               if os.path.exists(DownloadTest.XNXX_FILE):
+                       os.remove(DownloadTest.XNXX_FILE)
  
  def md5_for_file(filename, block_size=2**20):
-    with open(filename) as f:
-        md5 = hashlib.md5()
-        while True:
-            data = f.read(block_size)
-            if not data:
-                break
-            md5.update(data)
-            return md5.hexdigest()
+       with open(filename) as f:
+               md5 = hashlib.md5()
+               while True:
+                       data = f.read(block_size)
+                       if not data:
+                               break
+                       md5.update(data)
+                       return md5.hexdigest()
diff --git a/test/test_utils.py b/test/test_utils.py

index 0a435ddc547ef28a5aab9444ce466c4ccd75beaa..4208ee653243562b53fc011329907d471e76b712 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -22,10 +22,10 @@ class TestUtil(unittest.TestCase):
  
                 self.assertEqual(sanitize_filename(u'123'), u'123')
  
-               self.assertEqual(u'abc-de', sanitize_filename(u'abc/de'))
+               self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
                 self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
  
-               self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de'))
+               self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de'))
                 self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
                 self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
                 self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
@@ -45,17 +45,19 @@ class TestUtil(unittest.TestCase):
  
                 self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
  
-               self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
+               self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True))
                 self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
  
-               self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
+               self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
                 self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
                 self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
                 self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
  
+               self.assertEqual(sanitize_filename(u'aäb', restricted=True), u'a_b')
+               self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
+
                 forbidden = u'"\0\\/&: \'\t\n'
                 for fc in forbidden:
-                       print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
                         for fbc in forbidden:
                                 self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
  
diff --git a/youtube-dl.1 b/youtube-dl.1

index 64120a8d24a3f397108d9433400d8b6e0f2e3a4d..ae303b6727a38007e227ea76660805e1c49f6c4e 100644 (file)
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -59,8 +59,8 @@ redistribute it or use it however you like.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
---restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
+--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
  -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
  -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
  -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index 4c79be4325b3157942956cffbf1e930fa22550d3..b6aebe4ac24f5dfcbdaf044730365b833eeab091 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -94,6 +94,9 @@ class FileDownloader(object):
                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
                 self.params = params
  
+               if '%(stitle)s' in self.params['outtmpl']:
+                       self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+
         @staticmethod
         def format_bytes(bytes):
                 if bytes is None:
@@ -322,9 +325,8 @@ class FileDownloader(object):
                 """Generate the output filename."""
                 try:
                         template_dict = dict(info_dict)
-                       template_dict['epoch'] = unicode(long(time.time()))
+                       template_dict['epoch'] = unicode(int(time.time()))
                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
-                       template_dict['title'] = template_dict['stitle'] # Keep both for backwards compatibility
                         filename = self.params['outtmpl'] % template_dict
                         return filename
                 except (ValueError, KeyError), err:
@@ -350,7 +352,8 @@ class FileDownloader(object):
         def process_info(self, info_dict):
                 """Process a single dictionary returned by an InfoExtractor."""
  
-               info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
+               # Keep for backwards compatibility
+               info_dict['stitle'] = info_dict['title']
  
                 reason = self._match_entry(info_dict)
                 if reason is not None:
@@ -363,6 +366,7 @@ class FileDownloader(object):
                                 raise MaxDownloadsReached()
  
                 filename = self.prepare_filename(info_dict)
+               filename = sanitize_filename(filename, self.params.get('restrictfilenames'))
  
                 # Forced printings
                 if self.params.get('forcetitle', False):
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index e9d8ad5b3968966630a72e25ba5e1b36f2a50e53..13b04ab5bcce4ee1e57e46afab0b198f1a477991 100644 (file)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -2291,7 +2291,6 @@ class ComedyCentralIE(InfoExtractor):
                         print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
  
  
-
         def _real_extract(self, url):
                 mobj = re.match(self._VALID_URL, url)
                 if mobj is None:
@@ -2395,8 +2394,8 @@ class ComedyCentralIE(InfoExtractor):
                                 continue
                         
                         if self._downloader.params.get('listformats', None):
-                           self._print_formats([i[0] for i in turls])
-                           return
+                               self._print_formats([i[0] for i in turls])
+                               return
  
                         # For now, just pick the highest bitrate
                         format,video_url = turls[-1]
@@ -2406,20 +2405,17 @@ class ComedyCentralIE(InfoExtractor):
  
                         # Select format if we can find one
                         for f,v in turls:
-                           if f == req_format:
-                             format, video_url = f, v
-                             break
-
-                       # Patch to download from alternative CDN, which does not 
-                        # break on current RTMPDump builds
-            
+                               if f == req_format:
+                                       format, video_url = f, v
+                                       break
  
+                       # Patch to download from alternative CDN, which does not
+                       # break on current RTMPDump builds
                         broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
                         better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
-            
+
                         if video_url.startswith(broken_cdn):
-                            video_url = video_url.replace(broken_cdn, better_cdn)
-                    
+                               video_url = video_url.replace(broken_cdn, better_cdn)
  
                         effTitle = showId + u'-' + epTitle
                         info = {
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index cbf1dd1a722570d5709b6a0dd741d33337201e07..7cc17af9309165032074284dca87a88c8ad5a11c 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -274,7 +274,7 @@ def parseOpts():
                         dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
         filesystem.add_option('--restrict-filenames',
                         action='store_true', dest='restrictfilenames',
-                       help='Avoid some characters such as "&" and spaces in filenames', default=False)
+                       help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
         filesystem.add_option('-a', '--batch-file',
                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
         filesystem.add_option('-w', '--no-overwrites',
@@ -532,7 +532,7 @@ def _real_main():
                         parser.error(u'you must provide at least one URL')
                 else:
                         sys.exit()
-       
+
         try:
                 retcode = fd.download(all_urls)
         except MaxDownloadsReached:
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 1f60d34ae23ad2f36b54072e0b133d3cd2a22149..3339f56ec114fab13b39afeef2fcef60506c8ec5 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -207,15 +207,20 @@ def sanitize_filename(s, restricted=False):
                 elif char == ':':
                         return '_-' if restricted else ' -'
                 elif char in '\\/|*<>':
-                       return '-'
+                       return '_'
                 if restricted and (char in '&\'' or char.isspace()):
                         return '_'
+               if restricted and ord(char) > 127:
+                       return '_'
                 return char
  
         result = u''.join(map(replace_insane, s))
-       while '--' in result:
-               result = result.replace('--', '-')
-       return result.strip('-')
+       while '__' in result:
+               result = result.replace('__', '_')
+       result = result.strip('_')
+       if not result:
+               result = '_'
+       return result
  
  def orderedSet(iterable):
         """ Remove all duplicates from the input iterable """
author	Philipp Hagemeister <phihag@phihag.de>
	Tue, 27 Nov 2012 13:55:18 +0000 (14:55 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Tue, 27 Nov 2012 13:55:18 +0000 (14:55 +0100)
README.md		patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube-dl.1		patch \| blob \| history
youtube_dl/FileDownloader.py		patch \| blob \| history
youtube_dl/InfoExtractors.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history