- video_title = video_info.find('.//title').text
- video_extension = video_info.find('.//movie_type').text
- video_format = video_extension.upper()
- video_thumbnail = video_info.find('.//thumbnail_url').text
- video_description = video_info.find('.//description').text
- video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
- video_view_count = video_info.find('.//view_counter').text
- video_webpage_url = video_info.find('.//watch_url').text
-
- # uploader
- # No need to fetch extra resources...new API has field for uploader's name
- if video_info.find('.//ch_id') is not None:
- video_uploader_id = video_info.find('.//ch_id').text
- video_uploader = video_info.find('.//ch_name').text
- elif video_info.find('.//user_id') is not None:
- video_uploader_id = video_info.find('.//user_id').text
- video_uploader = video_info.find('.//user_nickname').text
+ title = get_video_info('title')
+ if not title:
+ title = self._og_search_title(webpage, default=None)
+ if not title:
+ title = self._html_search_regex(
+ r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
+ webpage, 'video title')
+
+ watch_api_data_string = self._html_search_regex(
+ r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
+ webpage, 'watch api data', default=None)
+ watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
+ video_detail = watch_api_data.get('videoDetail', {})
+
+ thumbnail = (
+ get_video_info(['thumbnail_url', 'thumbnailURL'])
+ or self._html_search_meta('image', webpage, 'thumbnail', default=None)
+ or video_detail.get('thumbnail'))
+
+ description = get_video_info('description')
+
+ timestamp = (parse_iso8601(get_video_info('first_retrieve'))
+ or unified_timestamp(get_video_info('postedDateTime')))
+ if not timestamp:
+ match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
+ if match:
+ timestamp = parse_iso8601(match.replace('+', ':00+'))
+ if not timestamp and video_detail.get('postedAt'):
+ timestamp = parse_iso8601(
+ video_detail['postedAt'].replace('/', '-'),
+ delimiter=' ', timezone=datetime.timedelta(hours=9))
+
+ view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
+ if not view_count:
+ match = self._html_search_regex(
+ r'>Views: <strong[^>]*>([^<]+)</strong>',
+ webpage, 'view count', default=None)
+ if match:
+ view_count = int_or_none(match.replace(',', ''))
+ view_count = view_count or video_detail.get('viewCount')
+
+ comment_count = (int_or_none(get_video_info('comment_num'))
+ or video_detail.get('commentCount')
+ or try_get(api_data, lambda x: x['thread']['commentCount']))
+ if not comment_count:
+ match = self._html_search_regex(
+ r'>Comments: <strong[^>]*>([^<]+)</strong>',
+ webpage, 'comment count', default=None)
+ if match:
+ comment_count = int_or_none(match.replace(',', ''))
+
+ duration = (parse_duration(
+ get_video_info('length')
+ or self._html_search_meta(
+ 'video:duration', webpage, 'video duration', default=None))
+ or video_detail.get('length')
+ or get_video_info('duration'))
+
+ webpage_url = get_video_info('watch_url') or url
+
+ # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
+ # in the JSON, which will cause None to be returned instead of {}.
+ owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
+ uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
+ uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')