X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcbslocal.py;h=90852a9ef9b7a6707ebcd1ade6cccb9ff9bbde5a;hb=HEAD;hp=289709c97b61b2fd5ab29b82e426d17bb5b4d701;hpb=fb27d0ce5e91216296e3406d461fe5b7af78c477;p=youtube-dl diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 289709c97..90852a9ef 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse -from ..utils import unified_timestamp +from ..utils import ( + parse_iso8601, + unified_timestamp, +) class CBSLocalIE(AnvatoIE): - _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P[0-9a-z-]+)' + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P[0-9a-z-]+)' _TESTS = [{ # Anvato backend @@ -49,6 +52,31 @@ class CBSLocalIE(AnvatoIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', + 'info_dict': { + 'id': '3580809', + 'ext': 'mp4', + 'title': 'A Very Blue Anniversary', + 'description': 'CBS2’s Cindy Hsu has more.', + 'thumbnail': 're:^https?://.*', + 'timestamp': int, + 'upload_date': r're:^\d{8}$', + 'uploader': 'CBS', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\WCBSTV', + 'Syndication\\AOL', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\Yahoo', + 'Content\\News', + 'Content\\News\\Local News', + ], + 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], + }, }] def _real_extract(self, url): @@ -63,9 +91,10 @@ class CBSLocalIE(AnvatoIE): info_dict = self._extract_anvato_videos(webpage, display_id) - time_str = self._html_search_regex( - r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) - timestamp = unified_timestamp(time_str) + timestamp = unified_timestamp(self._html_search_regex( + r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage, + 'released date', default=None)) or parse_iso8601( + self._html_search_meta('uploadDate', webpage)) info_dict.update({ 'display_id': display_id,