Merge branch 'master' of github.com:rg3/youtube-dl
authorPhilipp Hagemeister <phihag@phihag.de>
Sat, 27 Apr 2013 18:26:42 +0000 (20:26 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sat, 27 Apr 2013 18:26:42 +0000 (20:26 +0200)
test/test_utils.py
test/test_youtube_lists.py
youtube_dl/FileDownloader.py
youtube_dl/InfoExtractors.py
youtube_dl/__init__.py
youtube_dl/utils.py

index eeaaa7fad7a47d9cc376a73368d65c25d31841b0..343409a7a14b242abea9618ffad2a7274f7e8b8a 100644 (file)
@@ -14,6 +14,8 @@ from youtube_dl.utils import timeconvert
 from youtube_dl.utils import sanitize_filename
 from youtube_dl.utils import unescapeHTML
 from youtube_dl.utils import orderedSet
+from youtube_dl.utils import DateRange
+from youtube_dl.utils import unified_strdate
 
 if sys.version_info < (3, 0):
     _compat_str = lambda b: b.decode('unicode-escape')
@@ -95,6 +97,20 @@ class TestUtil(unittest.TestCase):
 
     def test_unescape_html(self):
         self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
+        
+    def test_daterange(self):
+        _20century = DateRange("19000101","20000101")
+        self.assertFalse("17890714" in _20century)
+        _ac = DateRange("00010101")
+        self.assertTrue("19690721" in _ac)
+        _firstmilenium = DateRange(end="10000101")
+        self.assertTrue("07110427" in _firstmilenium)
+        
+    def test_unified_dates(self):
+        self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
+        self.assertEqual(unified_strdate('8/7/2009'), '20090708')
+        self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
+        self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
 
 if __name__ == '__main__':
     unittest.main()
index c7f00af3216e0ee8292618a4ea24de062ac8d983..b11e6ccaa246b5f4ddadc44d677573aa7c502399 100644 (file)
@@ -71,6 +71,13 @@ class TestYoutubeLists(unittest.TestCase):
         ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
         self.assertFalse('pElCt5oNDuI' in ytie_results)
         self.assertFalse('KdPEApIVdWM' in ytie_results)
+        
+    def test_youtube_playlist_empty(self):
+        dl = FakeDownloader()
+        ie = YoutubePlaylistIE(dl)
+        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
+        self.assertIsPlaylist(result)
+        self.assertEqual(len(result['entries']), 0)
 
     def test_youtube_course(self):
         dl = FakeDownloader()
index d0378fb148cfee2f638184fb48cd882834d34ac9..2db686d62e571cd01d397319858f8bc5f5f77634 100644 (file)
@@ -89,6 +89,7 @@ class FileDownloader(object):
     keepvideo:         Keep the video file after post-processing
     min_filesize:      Skip files smaller than this size
     max_filesize:      Skip files larger than this size
+    daterange:         A DateRange object, download only if the upload_date is in the range.
     """
 
     params = None
@@ -424,6 +425,11 @@ class FileDownloader(object):
         if rejecttitle:
             if re.search(rejecttitle, title, re.IGNORECASE):
                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+        date = info_dict.get('upload_date', None)
+        if date is not None:
+            dateRange = self.params.get('daterange', DateRange())
+            if date not in dateRange:
+                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
         return None
         
     def extract_info(self, url, download = True, ie_name = None):
index 0e2c7795dd4a8c838265b2dd09c64aa2335f05bf..620cce1893788e9ad1abc326896a878c63524d13 100755 (executable)
@@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor):
         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
         if mobj is not None:
             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
-            format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
-            for expression in format_expressions:
-                try:
-                    upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
-                except:
-                    pass
+            upload_date = unified_strdate(upload_date)
 
         # description
         video_description = get_element_by_id("eow-description", video_webpage)
@@ -1723,12 +1718,11 @@ class YoutubePlaylistIE(InfoExtractor):
             if 'feed' not in response:
                 self._downloader.report_error(u'Got a malformed response from YouTube API')
                 return
+            playlist_title = response['feed']['title']['$t']
             if 'entry' not in response['feed']:
                 # Number of videos is a multiple of self._MAX_RESULTS
                 break
 
-            playlist_title = response['feed']['title']['$t']
-
             videos += [ (entry['yt$position']['$t'], entry['content']['src'])
                         for entry in response['feed']['entry']
                         if 'content' in entry ]
@@ -2386,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor):
             shortMediaId = mediaId.split(':')[-1]
             showId = mediaId.split(':')[-2].replace('.com', '')
             officialTitle = itemEl.findall('./title')[0].text
-            officialDate = itemEl.findall('./pubDate')[0].text
+            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
 
             configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
                         compat_urllib_parse.urlencode({'uri': mediaId}))
@@ -2696,12 +2690,13 @@ class SoundcloudIE(InfoExtractor):
 
         streams = json.loads(stream_json)
         mediaURL = streams['http_mp3_128_url']
+        upload_date = unified_strdate(info['created_at'])
 
         return [{
             'id':       info['id'],
             'url':      mediaURL,
             'uploader': info['user']['username'],
-            'upload_date':  info['created_at'],
+            'upload_date': upload_date,
             'title':    info['title'],
             'ext':      u'mp3',
             'description': info['description'],
@@ -3561,6 +3556,7 @@ class FunnyOrDieIE(InfoExtractor):
 
 class SteamIE(InfoExtractor):
     _VALID_URL = r"""http://store.steampowered.com/
+                (agecheck/)?
                 (?P<urltype>video|app)/ #If the page is only for videos or for a game
                 (?P<gameID>\d+)/?
                 (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
@@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor):
             self._downloader.report_warning(u'unable to extract video date')
             upload_date = None
         else:
-            upload_date = result.group('date').strip()
+            upload_date = unified_strdate(result.group('date').strip())
 
         # Get the video uploader
         result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
@@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor):
         if result is None:
             self._downloader.report_error(u'unable to extract video title')
             return
-        upload_date = result.group('date')
+        upload_date = unified_strdate(result.group('date'))
 
         info = {'id': video_id,
                 'url': video_url,
index d491402c6a9702c57dbbd4122924b22f2ede2177..ce754ffd30ac0b7373dd22443373ad506b500b81 100644 (file)
@@ -157,6 +157,9 @@ def parseOpts(overrideArguments=None):
     selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
     selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
     selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
+    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
+    selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
+    selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
 
 
     authentication.add_option('-u', '--username',
@@ -447,6 +450,10 @@ def _real_main(argv=None):
     if opts.recodevideo is not None:
         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
             parser.error(u'invalid video recode format specified')
+    if opts.date is not None:
+        date = DateRange.day(opts.date)
+    else:
+        date = DateRange(opts.dateafter, opts.datebefore)
 
     if sys.version_info < (3,):
         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
@@ -513,7 +520,8 @@ def _real_main(argv=None):
         'test': opts.test,
         'keepvideo': opts.keepvideo,
         'min_filesize': opts.min_filesize,
-        'max_filesize': opts.max_filesize
+        'max_filesize': opts.max_filesize,
+        'daterange': date
         })
 
     if opts.verbose:
index 017f06c42e9a019e18e25480c5e5d8d3aaaef335..3a2f0022fb87f9c6426f295cbc5da214e6370b72 100644 (file)
@@ -12,6 +12,7 @@ import traceback
 import zlib
 import email.utils
 import json
+import datetime
 
 try:
     import urllib.request as compat_urllib_request
@@ -568,3 +569,47 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
     https_request = http_request
     https_response = http_response
+
+def unified_strdate(date_str):
+    """Return a string with the date in the format YYYYMMDD"""
+    upload_date = None
+    #Replace commas
+    date_str = date_str.replace(',',' ')
+    # %z (UTC offset) is only supported in python>=3.2
+    date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
+    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
+    for expression in format_expressions:
+        try:
+            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+        except:
+            pass
+    return upload_date
+
+def date_from_str(date_str):
+    """Return a datetime object from a string in the format YYYYMMDD"""
+    return datetime.datetime.strptime(date_str, "%Y%m%d").date()
+    
+class DateRange(object):
+    """Represents a time interval between two dates"""
+    def __init__(self, start=None, end=None):
+        """start and end must be strings in the format accepted by date"""
+        if start is not None:
+            self.start = date_from_str(start)
+        else:
+            self.start = datetime.datetime.min.date()
+        if end is not None:
+            self.end = date_from_str(end)
+        else:
+            self.end = datetime.datetime.max.date()
+        if self.start >= self.end:
+            raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+    @classmethod
+    def day(cls, day):
+        """Returns a range that only contains the given day"""
+        return cls(day,day)
+    def __contains__(self, date):
+        """Check if the date is in the range"""
+        date = date_from_str(date)
+        return self.start <= date and date <= self.end
+    def __str__(self):
+        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())