- datetime_pattern = r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})'
- datetime_str = self._html_search_regex(datetime_pattern, page, 'date and time')
- time = (datetime_str + ':00+08:00').replace('/', '-')
- timestamp = parse_iso8601(time, delimiter=' ')
+ datetime_str = self._html_search_regex(
+ r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
+ # Transform into ISO 8601 format with timezone info
+ datetime_str = datetime_str.replace('/', '-') + ':00+0800'
+ timestamp = parse_iso8601(datetime_str, delimiter=' ')
+
+ # Note: the news count may decrease as time goes by
+ # It should be a bug in CTS website
+ req = compat_urllib_request.Request(
+ 'http://news.cts.com.tw/action/news_count.php?callback=cb&news_id=' + news_id)
+ req.add_header('Referer', url)
+ newscount_page = self._download_webpage(req, news_id)
+ news_count = self._search_regex(r'cb\((\d+)\)', newscount_page, 'news count')