- video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
-
- items_json = self._search_regex(r'mediaItems: ({.*?})$',
- webpage, 'items', flags=re.MULTILINE)
- items = json.loads(items_json)
- info = items['mediaItems']['query']['results']['mediaObj'][0]
- # The 'meta' field is not always in the video webpage, we request it
- # from another page
- long_id = info['id']
- return self._get_info(long_id, video_id)
-
- def _get_info(self, long_id, video_id):
- query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
- ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
- ' AND protocol="http"' % long_id)
- data = compat_urllib_parse.urlencode({
- 'q': query,
- 'env': 'prod',
- 'format': 'json',
- })
- query_result = self._download_json(
- 'http://video.query.yahoo.com/v1/public/yql?' + data,
- video_id, 'Downloading video info')
- info = query_result['query']['results']['mediaObj'][0]
- meta = info['meta']
+ display_id = mobj.group('display_id') or self._match_id(url)
+ page_id = mobj.group('id')
+ url = mobj.group('url')
+ host = mobj.group('host')
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+ if 'err=404' in urlh.geturl():
+ raise ExtractorError('Video gone', expected=True)
+
+ # Look for iframed media first
+ entries = []
+ iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
+ for idx, iframe_url in enumerate(iframe_urls):
+ entries.append(self.url_result(host + iframe_url, 'Yahoo'))
+ if entries:
+ return self.playlist_result(entries, page_id)
+
+ # Look for NBCSports iframes
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
+
+ # Look for Brightcove Legacy Studio embeds
+ bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
+
+ # Look for Brightcove New Studio embeds
+ bc_url = BrightcoveNewIE._extract_url(webpage)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveNewIE.ie_key())
+
+ # Query result is often embedded in webpage as JSON. Sometimes explicit requests
+ # to video API results in a failure with geo restriction reason therefore using
+ # embedded query result when present sounds reasonable.
+ config_json = self._search_regex(
+ r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
+ webpage, 'videoplayer applet', default=None)
+ if config_json:
+ config = self._parse_json(config_json, display_id, fatal=False)
+ if config:
+ sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
+ if sapi and 'query' in sapi:
+ info = self._extract_info(display_id, sapi, webpage)
+ self._sort_formats(info['formats'])
+ return info
+
+ items_json = self._search_regex(
+ r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
+ default=None)
+ if items_json is None:
+ alias = self._search_regex(
+ r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
+ if alias is not None:
+ alias_info = self._download_json(
+ 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
+ display_id, 'Downloading alias info')
+ video_id = alias_info[0]['id']
+ else:
+ CONTENT_ID_REGEXES = [
+ r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
+ r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
+ r'"first_videoid"\s*:\s*"([^"]+)"',
+ r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
+ r'<article[^>]data-uuid=["\']([^"\']+)',
+ r'yahoo://article/view\?.*\buuid=([^&"\']+)',
+ ]
+ video_id = self._search_regex(
+ CONTENT_ID_REGEXES, webpage, 'content ID')
+ else:
+ items = json.loads(items_json)
+ info = items['mediaItems']['query']['results']['mediaObj'][0]
+ # The 'meta' field is not always in the video webpage, we request it
+ # from another page
+ video_id = info['id']
+ return self._get_info(video_id, display_id, webpage)
+
+ def _extract_info(self, display_id, query, webpage):
+ info = query['query']['results']['mediaObj'][0]
+ meta = info.get('meta')
+ video_id = info.get('id')
+
+ if not meta:
+ msg = info['status'].get('msg')
+ if msg:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, msg), expected=True)
+ raise ExtractorError('Unable to extract media object meta')