Fix some IEs that didn't return the uploade_date in the YYYYMMDD format
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sat, 27 Apr 2013 13:14:20 +0000 (15:14 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Sat, 27 Apr 2013 13:14:20 +0000 (15:14 +0200)
Create a function unified_strdate in utils.py to fix these problems

test/test_utils.py
youtube_dl/InfoExtractors.py
youtube_dl/utils.py

index f9d58268badcdd1b8e66fc88354fa9cf11464005..343409a7a14b242abea9618ffad2a7274f7e8b8a 100644 (file)
@@ -15,6 +15,7 @@ from youtube_dl.utils import sanitize_filename
 from youtube_dl.utils import unescapeHTML
 from youtube_dl.utils import orderedSet
 from youtube_dl.utils import DateRange
+from youtube_dl.utils import unified_strdate
 
 if sys.version_info < (3, 0):
     _compat_str = lambda b: b.decode('unicode-escape')
@@ -104,6 +105,12 @@ class TestUtil(unittest.TestCase):
         self.assertTrue("19690721" in _ac)
         _firstmilenium = DateRange(end="10000101")
         self.assertTrue("07110427" in _firstmilenium)
+        
+    def test_unified_dates(self):
+        self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
+        self.assertEqual(unified_strdate('8/7/2009'), '20090708')
+        self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
+        self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
 
 if __name__ == '__main__':
     unittest.main()
index 936af9cb4bf32cedace3e90b4809a1ce507f2898..88ea567f86293561f7df15c6e74715ab22fbafd4 100755 (executable)
@@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor):
         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
         if mobj is not None:
             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
-            format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
-            for expression in format_expressions:
-                try:
-                    upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
-                except:
-                    pass
+            upload_date = unified_strdate(upload_date)
 
         # description
         video_description = get_element_by_id("eow-description", video_webpage)
@@ -2385,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor):
             shortMediaId = mediaId.split(':')[-1]
             showId = mediaId.split(':')[-2].replace('.com', '')
             officialTitle = itemEl.findall('./title')[0].text
-            officialDate = itemEl.findall('./pubDate')[0].text
+            officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
 
             configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
                         compat_urllib_parse.urlencode({'uri': mediaId}))
@@ -2695,12 +2690,13 @@ class SoundcloudIE(InfoExtractor):
 
         streams = json.loads(stream_json)
         mediaURL = streams['http_mp3_128_url']
+        upload_date = unified_strdate(info['created_at'])
 
         return [{
             'id':       info['id'],
             'url':      mediaURL,
             'uploader': info['user']['username'],
-            'upload_date':  info['created_at'],
+            'upload_date': upload_date,
             'title':    info['title'],
             'ext':      u'mp3',
             'description': info['description'],
@@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor):
             self._downloader.report_warning(u'unable to extract video date')
             upload_date = None
         else:
-            upload_date = result.group('date').strip()
+            upload_date = unified_strdate(result.group('date').strip())
 
         # Get the video uploader
         result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
@@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor):
         if result is None:
             self._downloader.report_error(u'unable to extract video title')
             return
-        upload_date = result.group('date')
+        upload_date = unified_strdate(result.group('date'))
 
         info = {'id': video_id,
                 'url': video_url,
index e5d756b8b10174a3aa3a2d6dbf94a376e0284dac..3a2f0022fb87f9c6426f295cbc5da214e6370b72 100644 (file)
@@ -569,7 +569,22 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
     https_request = http_request
     https_response = http_response
-    
+
+def unified_strdate(date_str):
+    """Return a string with the date in the format YYYYMMDD"""
+    upload_date = None
+    #Replace commas
+    date_str = date_str.replace(',',' ')
+    # %z (UTC offset) is only supported in python>=3.2
+    date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
+    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
+    for expression in format_expressions:
+        try:
+            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+        except:
+            pass
+    return upload_date
+
 def date_from_str(date_str):
     """Return a datetime object from a string in the format YYYYMMDD"""
     return datetime.datetime.strptime(date_str, "%Y%m%d").date()