X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=01c8c017d53a90477871f0ad4b6ef15250cc5e5b;hb=bf3a2fe923a7e9e1d25fb74170aa4cd5223c1632;hp=9ab7288cc84668f8533716ee4866d97a0ff3b6f7;hpb=00122de6a9d215651154274ad01ac799d580ed96;p=youtube-dl

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 9ab7288cc..01c8c017d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6,6 +6,7 @@ import datetime
 import email.utils
 import errno
 import gzip
+import itertools
 import io
 import json
 import locale
@@ -224,7 +225,7 @@ if sys.version_info >= (2,7):
     def find_xpath_attr(node, xpath, key, val):
         """ Find the xpath xpath[@key=val] """
         assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z0-9@\s:.]*$', val)
+        assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
         expr = xpath + u"[@%s='%s']" % (key, val)
         return node.find(expr)
 else:
@@ -750,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     https_request = http_request
     https_response = http_response
 
+
 def unified_strdate(date_str):
     """Return a string with the date in the format YYYYMMDD"""
     upload_date = None
     #Replace commas
     date_str = date_str.replace(',',' ')
     # %z (UTC offset) is only supported in python>=3.2
-    date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
+    date_str = re.sub(r' ?(\+|-)[0-9:]*$', '', date_str)
     format_expressions = [
         '%d %B %Y',
         '%B %d %Y',
@@ -770,11 +772,12 @@ def unified_strdate(date_str):
         '%Y-%m-%dT%H:%M:%S.%fZ',
         '%Y-%m-%dT%H:%M:%S.%f0Z',
         '%Y-%m-%dT%H:%M:%S',
+        '%Y-%m-%dT%H:%M',
     ]
     for expression in format_expressions:
         try:
             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
-        except:
+        except ValueError:
             pass
     if upload_date is None:
         timetuple = email.utils.parsedate_tz(date_str)
@@ -1131,8 +1134,8 @@ class HEADRequest(compat_urllib_request.Request):
         return "HEAD"
 
 
-def int_or_none(v):
-    return v if v is None else int(v)
+def int_or_none(v, scale=1):
+    return v if v is None else (int(v) // scale)
 
 
 def parse_duration(s):
@@ -1164,3 +1167,50 @@ def check_executable(exe, args=[]):
     except OSError:
         return False
     return exe
+
+
+class PagedList(object):
+    def __init__(self, pagefunc, pagesize):
+        self._pagefunc = pagefunc
+        self._pagesize = pagesize
+
+    def __len__(self):
+        # This is only useful for tests
+        return len(self.getslice())
+
+    def getslice(self, start=0, end=None):
+        res = []
+        for pagenum in itertools.count(start // self._pagesize):
+            firstid = pagenum * self._pagesize
+            nextfirstid = pagenum * self._pagesize + self._pagesize
+            if start >= nextfirstid:
+                continue
+
+            page_results = list(self._pagefunc(pagenum))
+
+            startv = (
+                start % self._pagesize
+                if firstid <= start < nextfirstid
+                else 0)
+
+            endv = (
+                ((end - 1) % self._pagesize) + 1
+                if (end is not None and firstid <= end <= nextfirstid)
+                else None)
+
+            if startv != 0 or endv is not None:
+                page_results = page_results[startv:endv]
+            res.extend(page_results)
+
+            # A little optimization - if current page is not "full", ie. does
+            # not contain page_size videos then we can assume that this page
+            # is the last one - there are no more ids on further pages -
+            # i.e. no need to query again.
+            if len(page_results) + startv < self._pagesize:
+                break
+
+            # If we got the whole page, but the next page is not interesting,
+            # break out early as well
+            if end == nextfirstid:
+                break
+        return res