Closes #10363
In the original mtvservices:embedded test case, config.xml is still used
to get the feed URL. Some other examples, including test_Generic_40
(http://www.vulture.com/2016/06/new-key-peele-sketches-released.html),
and the video mentioned in #10363, use another endpoint to get the feed
URL. The 'index.html' approach works for the original test case, too. So
I didn't keep the old approach.
+version <unreleased>
+
+Extractors
+* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
+
+
version 2016.08.24.1
Extractors
version 2016.08.24.1
Extractors
from .mtv import MTVServicesInfoExtractor
from ..utils import unified_strdate
from .mtv import MTVServicesInfoExtractor
from ..utils import unified_strdate
-from ..compat import compat_urllib_parse_urlencode
class BetIE(MTVServicesInfoExtractor):
class BetIE(MTVServicesInfoExtractor):
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
def _get_feed_query(self, uri):
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
def _get_feed_query(self, uri):
- return compat_urllib_parse_urlencode({
def _extract_mgid(self, webpage):
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
def _extract_mgid(self, webpage):
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
from .common import InfoExtractor
from ..compat import (
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlencode,
compat_str,
compat_xpath,
)
compat_str,
compat_xpath,
)
fix_xml_ampersands,
float_or_none,
HEADRequest,
fix_xml_ampersands,
float_or_none,
HEADRequest,
sanitized_Request,
strip_or_none,
timeconvert,
unescapeHTML,
sanitized_Request,
strip_or_none,
timeconvert,
unescapeHTML,
def _id_from_uri(uri):
return uri.split(':')[-1]
def _id_from_uri(uri):
return uri.split(':')[-1]
+ @staticmethod
+ def _remove_template_parameter(url):
+ # Remove the templates, like &device={device}
+ return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
+
# This was originally implemented for ComedyCentral, but it also works here
@classmethod
def _transform_rtmp_url(cls, rtmp_video_url):
# This was originally implemented for ComedyCentral, but it also works here
@classmethod
def _transform_rtmp_url(cls, rtmp_video_url):
video_id = self._id_from_uri(uri)
self.report_extraction(video_id)
content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
video_id = self._id_from_uri(uri)
self.report_extraction(video_id)
content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
- mediagen_url = content_el.attrib['url']
- # Remove the templates, like &device={device}
- mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
+ mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
if 'acceptMethods' not in mediagen_url:
mediagen_url += '&' if '?' in mediagen_url else '?'
mediagen_url += 'acceptMethods=fms'
if 'acceptMethods' not in mediagen_url:
mediagen_url += '&' if '?' in mediagen_url else '?'
mediagen_url += 'acceptMethods=fms'
data = {'uri': uri}
if self._LANG:
data['lang'] = self._LANG
data = {'uri': uri}
if self._LANG:
data['lang'] = self._LANG
- return compat_urllib_parse_urlencode(data)
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
- info_url = feed_url + '?' + self._get_feed_query(uri)
+ info_url = update_url_query(feed_url, self._get_feed_query(uri))
return self._get_videos_info_from_url(info_url, video_id)
def _get_videos_info_from_url(self, url, video_id):
return self._get_videos_info_from_url(info_url, video_id)
def _get_videos_info_from_url(self, url, video_id):
def _get_feed_url(self, uri):
video_id = self._id_from_uri(uri)
def _get_feed_url(self, uri):
video_id = self._id_from_uri(uri)
- site_id = uri.replace(video_id, '')
- config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/'
- 'context52/config.xml'.format(site_id))
- config_doc = self._download_xml(config_url, video_id)
- feed_node = config_doc.find('.//feed')
- feed_url = feed_node.text.strip().split('?')[0]
- return feed_url
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
-from ..compat import compat_urllib_parse_urlencode
from ..utils import update_url_query
from ..utils import update_url_query
}]
def _get_feed_query(self, uri):
}]
def _get_feed_query(self, uri):
- return compat_urllib_parse_urlencode({
'feed': 'nick_arc_player_prime',
'mgid': uri,
'feed': 'nick_arc_player_prime',
'mgid': uri,
def _extract_mgid(self, webpage):
return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
def _extract_mgid(self, webpage):
return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')