projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[pornhub] Fix view count extraction (#26621) (refs #26614)
[youtube-dl]
/
youtube_dl
/
extractor
/
kaltura.py
diff --git
a/youtube_dl/extractor/kaltura.py
b/youtube_dl/extractor/kaltura.py
index 41c1f3d96be288957cbaba2c21e3477a2969b24d..49d13460df7f0edd4d2a08f97deaf831ba9d6a46 100644
(file)
--- a/
youtube_dl/extractor/kaltura.py
+++ b/
youtube_dl/extractor/kaltura.py
@@
-103,14
+103,24
@@
class KalturaIE(InfoExtractor):
{
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
'only_matching': True,
{
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
'only_matching': True,
+ },
+ {
+ # unavailable source format
+ 'url': 'kaltura:513551:1_66x4rg7o',
+ 'only_matching': True,
}
]
@staticmethod
def _extract_url(webpage):
}
]
@staticmethod
def _extract_url(webpage):
+ urls = KalturaIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
-
mobj
= (
- re.
search
(
+
finditer
= (
+ re.
finditer
(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
@@
-118,37
+128,46
@@
class KalturaIE(InfoExtractor):
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
- """, webpage)
or
-
re.search
(
+ """, webpage)
+
or re.finditer
(
r'''(?xs)
(?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
(?P=q1).*?
(?:
r'''(?xs)
(?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
(?P=q1).*?
(?:
- entry_?[Ii]d|
- (?P<q2>["'])entry_?[Ii]d(?P=q2)
- )\s*:\s*
+ (?:
+ entry_?[Ii]d|
+ (?P<q2>["'])entry_?[Ii]d(?P=q2)
+ )\s*:\s*|
+ \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
+ )
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
- ''', webpage)
or
-
re.search
(
+ ''', webpage)
+
or re.finditer
(
r'''(?xs)
r'''(?xs)
- <iframe[^>]+src=(?P<q1>["'])
- (?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+ <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+ (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+ (?:(?!(?P=q1)).)*
+ [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?:(?!(?P=q1)).)*
(?:(?!(?P=q1)).)*
- [?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?P=q1)
''', webpage)
)
(?P=q1)
''', webpage)
)
- if mobj:
+ urls = []
+ for mobj in finditer:
embed_info = mobj.groupdict()
embed_info = mobj.groupdict()
+ for k, v in embed_info.items():
+ if v:
+ embed_info[k] = v.strip()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
- service_
url
= re.search(
- r'<script[^>]+src=
["\']((?:https?:)?//.+?
)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
+ service_
mobj
= re.search(
+ r'<script[^>]+src=
(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+
)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage)
webpage)
- if service_url:
- url = smuggle_url(url, {'service_url': service_url.group(1)})
- return url
+ if service_mobj:
+ url = smuggle_url(url, {'service_url': service_mobj.group('id')})
+ urls.append(url)
+ return urls
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0]
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0]
@@
-188,6
+207,8
@@
class KalturaIE(InfoExtractor):
'entryId': video_id,
'service': 'baseentry',
'ks': '{1:result:ks}',
'entryId': video_id,
'service': 'baseentry',
'ks': '{1:result:ks}',
+ 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ 'responseProfile:type': 1,
},
{
'action': 'getbyentryid',
},
{
'action': 'getbyentryid',
@@
-287,6
+308,9
@@
class KalturaIE(InfoExtractor):
# skip for now.
if f.get('fileExt') == 'chun':
continue
# skip for now.
if f.get('fileExt') == 'chun':
continue
+ # DRM-protected video, cannot be decrypted
+ if f.get('fileExt') == 'wvm':
+ continue
if not f.get('fileExt'):
# QT indicates QuickTime; some videos have broken fileExt
if f.get('containerFormat') == 'qt':
if not f.get('fileExt'):
# QT indicates QuickTime; some videos have broken fileExt
if f.get('containerFormat') == 'qt':
@@
-295,12
+319,17
@@
class KalturaIE(InfoExtractor):
f['fileExt'] = 'mp4'
video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
f['fileExt'] = 'mp4'
video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
+ format_id = '%(fileExt)s-%(bitrate)s' % f
+ # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
+ if f.get('isOriginal') is True and not self._is_valid_url(
+ video_url, entry_id, format_id):
+ continue
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
# -f mp4-56)
vcodec = 'none' if 'videoCodecId' not in f and f.get(
'frameRate') == 0 else f.get('videoCodecId')
formats.append({
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
# -f mp4-56)
vcodec = 'none' if 'videoCodecId' not in f and f.get(
'frameRate') == 0 else f.get('videoCodecId')
formats.append({
- 'format_id':
'%(fileExt)s-%(bitrate)s' % f
,
+ 'format_id':
format_id
,
'ext': f.get('fileExt'),
'tbr': int_or_none(f['bitrate']),
'fps': int_or_none(f.get('frameRate')),
'ext': f.get('fileExt'),
'tbr': int_or_none(f['bitrate']),
'fps': int_or_none(f.get('frameRate')),
@@
-324,7
+353,7
@@
class KalturaIE(InfoExtractor):
if captions:
for caption in captions.get('objects', []):
# Continue if caption is not ready
if captions:
for caption in captions.get('objects', []):
# Continue if caption is not ready
- if
f
.get('status') != 2:
+ if
caption
.get('status') != 2:
continue
if not caption.get('id'):
continue
continue
if not caption.get('id'):
continue