projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Credit @TRox1972 for tosh.cc (#9566) and localnews8 (#9539)
[youtube-dl]
/
youtube_dl
/
extractor
/
sohu.py
diff --git
a/youtube_dl/extractor/sohu.py
b/youtube_dl/extractor/sohu.py
index 29bd9ce6f76247b5ac997050075c7e720d8a3b2b..49e5d09ae450d11bb567a2fe95ecba55998c8b42 100644
(file)
--- a/
youtube_dl/extractor/sohu.py
+++ b/
youtube_dl/extractor/sohu.py
@@
-6,9
+6,12
@@
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
from .common import InfoExtractor
from ..compat import (
compat_str,
- compat_urllib_request
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ ExtractorError,
+ sanitized_Request,
)
)
-from ..utils import ExtractorError
class SohuIE(InfoExtractor):
class SohuIE(InfoExtractor):
@@
-26,7
+29,7
@@
class SohuIE(InfoExtractor):
'skip': 'On available in China',
}, {
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
'skip': 'On available in China',
}, {
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
- 'md5': '
ac9a5d322b4bf9ae184d53e4711e4f1a
',
+ 'md5': '
699060e75cf58858dd47fb9c03c42cfb
',
'info_dict': {
'id': '409385080',
'ext': 'mp4',
'info_dict': {
'id': '409385080',
'ext': 'mp4',
@@
-34,7
+37,7
@@
class SohuIE(InfoExtractor):
}
}, {
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
}
}, {
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
- 'md5': '
49308ff6dafde5ece51137d04aec311e
',
+ 'md5': '
9bf34be48f2f4dadcb226c74127e203c
',
'info_dict': {
'id': '78693464',
'ext': 'mp4',
'info_dict': {
'id': '78693464',
'ext': 'mp4',
@@
-48,7
+51,7
@@
class SohuIE(InfoExtractor):
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
},
'playlist': [{
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
},
'playlist': [{
- 'md5': '
492923eac023ba2f13ff69617c32754a
',
+ 'md5': '
bdbfb8f39924725e6589c146bc1883ad
',
'info_dict': {
'id': '78910339_part1',
'ext': 'mp4',
'info_dict': {
'id': '78910339_part1',
'ext': 'mp4',
@@
-56,7
+59,7
@@
class SohuIE(InfoExtractor):
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
}
}, {
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
}
}, {
- 'md5': '
de604848c0e8e9c4a4dde7e1347c0637
',
+ 'md5': '
3e1f46aaeb95354fd10e7fca9fc1804e
',
'info_dict': {
'id': '78910339_part2',
'ext': 'mp4',
'info_dict': {
'id': '78910339_part2',
'ext': 'mp4',
@@
-64,7
+67,7
@@
class SohuIE(InfoExtractor):
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
}
}, {
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
}
}, {
- 'md5': '
93584716ee0657c0b205b8aa3d27aa13
',
+ 'md5': '
8407e634175fdac706766481b9443450
',
'info_dict': {
'id': '78910339_part3',
'ext': 'mp4',
'info_dict': {
'id': '78910339_part3',
'ext': 'mp4',
@@
-93,7
+96,7
@@
class SohuIE(InfoExtractor):
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
- req =
compat_urllib_request.
Request(base_data_url + vid_id)
+ req =
sanitized_
Request(base_data_url + vid_id)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
@@
-139,21
+142,43
@@
class SohuIE(InfoExtractor):
for i in range(part_count):
formats = []
for format_id, format_data in formats_json.items():
for i in range(part_count):
formats = []
for format_id, format_data in formats_json.items():
+ allot = format_data['allot']
+
data = format_data['data']
data = format_data['data']
+ clips_url = data['clipsURL']
+ su = data['su']
- # URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
- # so retry until got a working URL
video_url = 'newflv.sohu.ccgslb.net'
video_url = 'newflv.sohu.ccgslb.net'
+ cdnId = None
retries = 0
retries = 0
- while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
- download_note = 'Download information from CDN gateway for format ' + format_id
+
+ while 'newflv.sohu.ccgslb.net' in video_url:
+ params = {
+ 'prot': 9,
+ 'file': clips_url[i],
+ 'new': su[i],
+ 'prod': 'flash',
+ 'rb': 1,
+ }
+
+ if cdnId is not None:
+ params['idc'] = cdnId
+
+ download_note = 'Downloading %s video URL part %d of %d' % (
+ format_id, i + 1, part_count)
+
if retries > 0:
download_note += ' (retry #%d)' % retries
if retries > 0:
download_note += ' (retry #%d)' % retries
+ part_info = self._parse_json(self._download_webpage(
+ 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)),
+ video_id, download_note), video_id)
+
+ video_url = part_info['url']
+ cdnId = part_info.get('nid')
+
retries += 1
retries += 1
- cdn_info = self._download_json(
- 'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
- video_id, download_note)
- video_url = cdn_info['url']
+ if retries > 5:
+ raise ExtractorError('Failed to get video URL')
formats.append({
'url': video_url,
formats.append({
'url': video_url,