Merge remote-tracking branch 'origin/master'

author Philipp Hagemeister <phihag@phihag.de>

Tue, 25 Feb 2014 00:45:00 +0000 (01:45 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Tue, 25 Feb 2014 00:45:00 +0000 (01:45 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Tue, 25 Feb 2014 00:45:00 +0000 (01:45 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Tue, 25 Feb 2014 00:45:00 +0000 (01:45 +0100)
diff --git a/test/test_utils.py b/test/test_utils.py

index 84553b94386236352e9ad856782c4b3428f3587b..4e3c37fb4c7e3a762ad2f049db6fdf1297145824 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  
  # Various small unit tests
+import io
  import xml.etree.ElementTree
  
  #from youtube_dl.utils import htmlentity_transform
@@ -21,6 +22,7 @@ from youtube_dl.utils import (
      orderedSet,
      PagedList,
      parse_duration,
+    read_batch_urls,
      sanitize_filename,
      shell_quote,
      smuggle_url,
@@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):
      def test_struct_unpack(self):
          self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
  
+    def test_read_batch_urls(self):
+        f = io.StringIO(u'''\xef\xbb\xbf foo
+            bar\r
+            baz
+            # More after this line\r
+            ; or after this
+            bam''')
+        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 84f29a1a5cb638b4dd92174893d28fce17d1f85e..2aaafd37a30e5b51af74b8bdfaddd53c4a20b895 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -71,6 +71,7 @@ from .utils import (
      get_cachedir,
      MaxDownloadsReached,
      preferredencoding,
+    read_batch_urls,
      SameFileError,
      setproctitle,
      std_headers,
@@ -552,21 +553,19 @@ def _real_main(argv=None):
          sys.exit(0)
  
      # Batch file verification
-    batchurls = []
+    batch_urls = []
      if opts.batchfile is not None:
          try:
              if opts.batchfile == '-':
                  batchfd = sys.stdin
              else:
-                batchfd = open(opts.batchfile, 'r')
-            batchurls = batchfd.readlines()
-            batchurls = [x.strip() for x in batchurls]
-            batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
+                batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
+            batch_urls = read_batch_urls(batchfd)
              if opts.verbose:
-                write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+                write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
          except IOError:
              sys.exit(u'ERROR: batch file could not be read')
-    all_urls = batchurls + args
+    all_urls = batch_urls + args
      all_urls = [url.strip() for url in all_urls]
      _enc = preferredencoding()
      all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py

index 58200971bece7664e18b94eccb52b368ef5a999b..19ad45c9898f2764bde056c9285078cfd6d25b9f 100644 (file)
--- a/youtube_dl/extractor/podomatic.py
+++ b/youtube_dl/extractor/podomatic.py
@@ -1,7 +1,10 @@
+from __future__ import unicode_literals
+
  import json
  import re
  
  from .common import InfoExtractor
+from ..utils import int_or_none
  
  
  class PodomaticIE(InfoExtractor):
@@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor):
      _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
  
      _TEST = {
-        u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
-        u"file": u"2009-01-02T16_03_35-08_00.mp3",
-        u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
-        u"info_dict": {
-            u"uploader": u"Science Teaching Tips",
-            u"uploader_id": u"scienceteachingtips",
-            u"title": u"64.  When the Moon Hits Your Eye",
-            u"duration": 446,
+        "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
+        "file": "2009-01-02T16_03_35-08_00.mp3",
+        "md5": "84bb855fcf3429e6bf72460e1eed782d",
+        "info_dict": {
+            "uploader": "Science Teaching Tips",
+            "uploader_id": "scienceteachingtips",
+            "title": "64.  When the Moon Hits Your Eye",
+            "duration": 446,
          }
      }
  
@@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor):
          uploader = data['podcast']
          title = data['title']
          thumbnail = data['imageLocation']
-        duration = int(data['length'] / 1000.0)
+        duration = int_or_none(data.get('length'), 1000)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 25e40a837bc9f5e6e124f1d89b013ce6a1299b97..0c482631a2979f9081730495675b444b9e99048d 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,6 +1,7 @@
  #!/usr/bin/env python
  # -*- coding: utf-8 -*-
  
+import contextlib
  import ctypes
  import datetime
  import email.utils
@@ -1245,3 +1246,19 @@ except TypeError:
  else:
      struct_pack = struct.pack
      struct_unpack = struct.unpack
+
+
+def read_batch_urls(batch_fd):
+    def fixup(url):
+        if not isinstance(url, compat_str):
+            url = url.decode('utf-8', 'replace')
+        BOM_UTF8 = u'\xef\xbb\xbf'
+        if url.startswith(BOM_UTF8):
+            url = url[len(BOM_UTF8):]
+        url = url.strip()
+        if url.startswith(('#', ';', ']')):
+            return False
+        return url
+
+    with contextlib.closing(batch_fd) as fd:
+        return [url for url in map(fixup, fd) if url]
author	Philipp Hagemeister <phihag@phihag.de>
	Tue, 25 Feb 2014 00:45:00 +0000 (01:45 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Tue, 25 Feb 2014 00:45:00 +0000 (01:45 +0100)
test/test_utils.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/extractor/podomatic.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history