projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
d0ba558
)
[franceculture] fix extraction(closes #12547)
author
Remita Amine
<remitamine@gmail.com>
Sat, 25 Mar 2017 06:03:13 +0000
(07:03 +0100)
committer
Remita Amine
<remitamine@gmail.com>
Sat, 25 Mar 2017 06:04:48 +0000
(07:04 +0100)
youtube_dl/extractor/franceculture.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/franceculture.py
b/youtube_dl/extractor/franceculture.py
index b98da692cb23ccc1a6de7a8657f0d8331640280f..df3d757f31901bef9a32d6c25159caf09b9bf32c 100644
(file)
--- a/
youtube_dl/extractor/franceculture.py
+++ b/
youtube_dl/extractor/franceculture.py
@@
-4,6
+4,8
@@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
from .common import InfoExtractor
from ..utils import (
determine_ext,
+ extract_attributes,
+ int_or_none,
unified_strdate,
)
unified_strdate,
)
@@
-19,6
+21,7
@@
class FranceCultureIE(InfoExtractor):
'title': 'Rendez-vous au pays des geeks',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
'title': 'Rendez-vous au pays des geeks',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
+ 'timestamp': 1393642916,
'vcodec': 'none',
}
}
'vcodec': 'none',
}
}
@@
-28,30
+31,34
@@
class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(url, display_id)
- video_
url =
self._search_regex(
- r'(?s)<div[^>]+class="[^"]*?
title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"
',
- webpage, 'video
path'
)
+ video_
data = extract_attributes(
self._search_regex(
+ r'(?s)<div[^>]+class="[^"]*?
(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)
',
+ webpage, 'video
data')
)
- title = self._og_search_title(webpage)
+ video_url = video_data['data-asset-source']
+ title = video_data.get('data-asset-title') or self._og_search_title(webpage)
-
upload_date = unified_strdate(self.
_search_regex(
-
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<
',
- webpage, '
upload date', fatal=False)
)
+
description = self._html
_search_regex(
+
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>
',
+ webpage, '
description', default=None
)
thumbnail = self._search_regex(
thumbnail = self._search_regex(
- r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+
data-dejavu-
src="([^"]+)"',
+ r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+
(?:data-dejavu-)?
src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
- r'(?s)<
div id="emission".*?<
span class="author">(.*?)</span>',
+ r'(?s)<span class="author">(.*?)</span>',
webpage, 'uploader', default=None)
webpage, 'uploader', default=None)
- vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
+ ext = determine_ext(video_url.lower())
return {
'id': display_id,
'display_id': display_id,
'url': video_url,
'title': title,
return {
'id': display_id,
'display_id': display_id,
'url': video_url,
'title': title,
+ 'description': description,
'thumbnail': thumbnail,
'thumbnail': thumbnail,
- 'vcodec': vcodec,
+ 'ext': ext,
+ 'vcodec': 'none' if ext == 'mp3' else None,
'uploader': uploader,
'uploader': uploader,
- 'upload_date': upload_date,
+ 'timestamp': int_or_none(video_data.get('data-asset-created-date')),
+ 'duration': int_or_none(video_data.get('data-duration')),
}
}