Allow to select videos to download by their upload dates (related #137)

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 08be9e6373f6d4129327a6125b7d3b8c863273c0..e5d756b8b10174a3aa3a2d6dbf94a376e0284dac 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -12,6 +12,7 @@ import traceback
  import zlib
  import email.utils
  import json
+import datetime
  
  try:
      import urllib.request as compat_urllib_request
@@ -280,6 +281,12 @@ class AttrParser(compat_html_parser.HTMLParser):
              lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
          lines[-1] = lines[-1][:self.result[2][1]]
          return '\n'.join(lines).strip()
+# Hack for https://github.com/rg3/youtube-dl/issues/662
+if sys.version_info < (2, 7, 3):
+    AttrParser.parse_endtag = (lambda self, i:
+        i + len("</scr'+'ipt>")
+        if self.rawdata[i:].startswith("</scr'+'ipt>")
+        else compat_html_parser.HTMLParser.parse_endtag(self, i))
  
  def get_element_by_id(id, html):
      """Return the content of the tag with the specified ID in the passed HTML document"""
@@ -305,7 +312,7 @@ def clean_html(html):
      html = re.sub('<.*?>', '', html)
      # Replace html entities
      html = unescapeHTML(html)
-    return html
+    return html.strip()
  
  
  def sanitize_open(filename, open_mode):
@@ -323,7 +330,7 @@ def sanitize_open(filename, open_mode):
              if sys.platform == 'win32':
                  import msvcrt
                  msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
-            return (sys.stdout, filename)
+            return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
          stream = open(encodeFilename(filename), open_mode)
          return (stream, filename)
      except (IOError, OSError) as err:
@@ -409,8 +416,19 @@ def encodeFilename(s):
          # match Windows 9x series as well. Besides, NT 4 is obsolete.)
          return s
      else:
-        return s.encode(sys.getfilesystemencoding(), 'ignore')
+        encoding = sys.getfilesystemencoding()
+        if encoding is None:
+            encoding = 'utf-8'
+        return s.encode(encoding, 'ignore')
+
+def decodeOption(optval):
+    if optval is None:
+        return optval
+    if isinstance(optval, bytes):
+        optval = optval.decode(preferredencoding())
  
+    assert isinstance(optval, compat_str)
+    return optval
  
  class ExtractorError(Exception):
      """Error during info extraction."""
@@ -418,6 +436,7 @@ class ExtractorError(Exception):
          """ tb, if given, is the original traceback (so that it can be printed out). """
          super(ExtractorError, self).__init__(msg)
          self.traceback = tb
+        self.exc_info = sys.exc_info()  # preserve original exception
  
      def format_traceback(self):
          if self.traceback is None:
@@ -432,7 +451,10 @@ class DownloadError(Exception):
      configured to continue on errors. They will contain the appropriate
      error message.
      """
-    pass
+    def __init__(self, msg, exc_info=None):
+        """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
+        super(DownloadError, self).__init__(msg)
+        self.exc_info = exc_info
  
  
  class SameFileError(Exception):
@@ -516,18 +538,18 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
          return ret
  
      def http_request(self, req):
-        for h in std_headers:
+        for h,v in std_headers.items():
              if h in req.headers:
                  del req.headers[h]
-            req.add_header(h, std_headers[h])
+            req.add_header(h, v)
          if 'Youtubedl-no-compression' in req.headers:
              if 'Accept-encoding' in req.headers:
                  del req.headers['Accept-encoding']
              del req.headers['Youtubedl-no-compression']
          if 'Youtubedl-user-agent' in req.headers:
-            if 'User-Agent' in req.headers:
-                del req.headers['User-Agent']
-            req.headers['User-Agent'] = req.headers['Youtubedl-user-agent']
+            if 'User-agent' in req.headers:
+                del req.headers['User-agent']
+            req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
              del req.headers['Youtubedl-user-agent']
          return req
  
@@ -547,3 +569,32 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
  
      https_request = http_request
      https_response = http_response
+    
+def date_from_str(date_str):
+    """Return a datetime object from a string in the format YYYYMMDD"""
+    return datetime.datetime.strptime(date_str, "%Y%m%d").date()
+    
+class DateRange(object):
+    """Represents a time interval between two dates"""
+    def __init__(self, start=None, end=None):
+        """start and end must be strings in the format accepted by date"""
+        if start is not None:
+            self.start = date_from_str(start)
+        else:
+            self.start = datetime.datetime.min.date()
+        if end is not None:
+            self.end = date_from_str(end)
+        else:
+            self.end = datetime.datetime.max.date()
+        if self.start >= self.end:
+            raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+    @classmethod
+    def day(cls, day):
+        """Returns a range that only contains the given day"""
+        return cls(day,day)
+    def __contains__(self, date):
+        """Check if the date is in the range"""
+        date = date_from_str(date)
+        return self.start <= date and date <= self.end
+    def __str__(self):
+        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())