[youtube] Clarify keywords
[youtube-dl] / youtube_dl / extractor / huffpost.py
index b47114ab47f50f59f03eedd2c0781c9276eb441f..4ccf6b9b8a82c3ef28c1d9d04dcc6f26ce2a8f8d 100644 (file)
@@ -21,25 +21,27 @@ class HuffPostIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
-        'file': '52dd3e4b02a7602131000677.mp4',
-        'md5': 'TODO',
+        'md5': '55f5e8981c1c80a64706a44b74833de8',
         'info_dict': {
-            'title': 'TODO',
-            'description': 'TODO',
+            'id': '52dd3e4b02a7602131000677',
+            'ext': 'mp4',
+            'title': 'Legalese It! with @MikeSacksHP',
+            'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more.  ',
             'duration': 1549,
+            'upload_date': '20140124',
         }
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
         data = self._download_json(api_url, video_id)['data']
 
         video_title = data['title']
         duration = parse_duration(data['running_time'])
-        upload_date = unified_strdate(data['schedule']['started_at'])
+        upload_date = unified_strdate(data['schedule']['starts_at'])
+        description = data.get('description')
 
         thumbnails = []
         for url in data['images'].values():
@@ -58,11 +60,21 @@ class HuffPostIE(InfoExtractor):
             'url': url,
             'vcodec': 'none' if key.startswith('audio/') else None,
         } for key, url in data['sources']['live'].items()]
+        if data.get('fivemin_id'):
+            fid = data['fivemin_id']
+            fcat = str(int(fid) // 100 + 1)
+            furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
+            formats.append({
+                'format': 'fivemin',
+                'url': furl,
+                'preference': 1,
+            })
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': video_title,
+            'description': description,
             'formats': formats,
             'duration': duration,
             'upload_date': upload_date,