projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
84a9fef
)
[youtube] Improve format filesize extraction (#16453)
author
Sergey M․
<dstftw@gmail.com>
Mon, 14 May 2018 16:27:56 +0000
(23:27 +0700)
committer
Sergey M․
<dstftw@gmail.com>
Mon, 14 May 2018 16:27:56 +0000
(23:27 +0700)
youtube_dl/extractor/youtube.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/youtube.py
b/youtube_dl/extractor/youtube.py
index 897398d2086aee820966134f29b04c2fcca17596..7f4298c088c7514534e66f6a8d1921fe57196f03 100644
(file)
--- a/
youtube_dl/extractor/youtube.py
+++ b/
youtube_dl/extractor/youtube.py
@@
-1815,6
+1815,10
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
chapters = self._extract_chapters(description_original, video_duration)
chapters = self._extract_chapters(description_original, video_duration)
+ def _extract_filesize(media_url):
+ return int_or_none(self._search_regex(
+ r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
+
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
@@
-1919,8
+1923,11
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
+ filesize = int_or_none(url_data.get(
+ 'clen', [None])[0]) or _extract_filesize(url)
+
more_fields = {
more_fields = {
- 'filesize':
int_or_none(url_data.get('clen', [None])[0])
,
+ 'filesize':
filesize
,
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
'width': width,
'height': height,
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
'width': width,
'height': height,
@@
-1994,6
+2001,8
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
for df in self._extract_mpd_formats(
mpd_url, video_id, fatal=dash_mpd_fatal,
formats_dict=self._formats):
for df in self._extract_mpd_formats(
mpd_url, video_id, fatal=dash_mpd_fatal,
formats_dict=self._formats):
+ if not df.get('filesize'):
+ df['filesize'] = _extract_filesize(df['url'])
# Do not overwrite DASH format found in some previous DASH manifest
if df['format_id'] not in dash_formats:
dash_formats[df['format_id']] = df
# Do not overwrite DASH format found in some previous DASH manifest
if df['format_id'] not in dash_formats:
dash_formats[df['format_id']] = df