from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
class TudouIE(InfoExtractor):
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
- def _url_for_id(self, id, quality=None):
- info_url = "http://v2.tudou.com/f?id=" + str(id)
+ def _url_for_id(self, video_id, quality=None):
+ info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
if quality:
info_url += '&hd' + quality
- webpage = self._download_webpage(info_url, id, "Opening the info webpage")
- final_url = self._html_search_regex('>(.+?)</f>', webpage, 'video url')
+ xml_data = self._download_xml(info_url, video_id, "Opening the info XML page")
+ final_url = xml_data.text
return final_url
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
youku_vcode = self._search_regex(
- r'vcode:\s*[\'"](.+?)[\'"]', webpage, 'youku vcode', default=None)
+ r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None)
if youku_vcode:
return self.url_result('youku:' + youku_vcode, ie='Youku')
title = self._search_regex(
- r",kw:\s*['\"](.+?)[\"']", webpage, 'title')
+ r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title')
thumbnail_url = self._search_regex(
- r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
+ r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False)
player_url = self._search_regex(
- r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']",
+ r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]',
webpage, 'player URL', default=self._PLAYER_URL)
segments = self._parse_json(self._search_regex(
- r'segs: \'(.*)\'', webpage, 'segments'), video_id)
+ r'segs: \'([^\']+)\'', webpage, 'segments'), video_id)
# It looks like the keys are the arguments that have to be passed as
# the hd field in the request url, we pick the higher
# Also, filter non-number qualities (see issue #3643).