projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
8e4aa7b
)
[bbc] Improve title and description extraction (Closes #8826, closes #8822)
author
Sergey M․
<dstftw@gmail.com>
Sun, 13 Mar 2016 09:54:56 +0000
(15:54 +0600)
committer
Sergey M․
<dstftw@gmail.com>
Sun, 13 Mar 2016 09:54:56 +0000
(15:54 +0600)
youtube_dl/extractor/bbc.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/bbc.py
b/youtube_dl/extractor/bbc.py
index f4d8b4a2f2c1b12bb5ed2481d24839e76b7841cb..497ebfd72a096e94d345f65f7de72c8fbf54210e 100644
(file)
--- a/
youtube_dl/extractor/bbc.py
+++ b/
youtube_dl/extractor/bbc.py
@@
-563,6
+563,14
@@
class BBCIE(BBCCoUkIE):
'title': 'BBC Blogs - Adam Curtis - BUGGER',
},
'playlist_count': 18,
'title': 'BBC Blogs - Adam Curtis - BUGGER',
},
'playlist_count': 18,
+ }, {
+ # school report playlist with single video
+ 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
+ 'info_dict': {
+ 'id': '35744779',
+ 'title': 'School which breaks down barriers in Jerusalem',
+ },
+ 'playlist_count': 1,
}, {
# single video embedded with data-playable containing vpid
'url': 'http://www.bbc.com/news/world-europe-32041533',
}, {
# single video embedded with data-playable containing vpid
'url': 'http://www.bbc.com/news/world-europe-32041533',
@@
-734,8
+742,17
@@
class BBCIE(BBCCoUkIE):
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
timestamp = json_ld_info.get('timestamp')
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
timestamp = json_ld_info.get('timestamp')
+
playlist_title = json_ld_info.get('title')
playlist_title = json_ld_info.get('title')
- playlist_description = json_ld_info.get('description')
+ if not playlist_title:
+ playlist_title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
+ if playlist_title:
+ playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+
+ playlist_description = json_ld_info.get(
+ 'description') or self._og_search_description(webpage, default=None)
if not timestamp:
timestamp = parse_iso8601(self._search_regex(
if not timestamp:
timestamp = parse_iso8601(self._search_regex(
@@
-795,14
+812,6
@@
class BBCIE(BBCCoUkIE):
entries.append(self._extract_from_playlist_sxml(
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
entries.append(self._extract_from_playlist_sxml(
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
- playlist_title = self._og_search_title(webpage, default=None)
- playlist_title = playlist_title or self._html_search_regex(
- r'<title>(.*?)</title>', webpage, 'playlist title')
-
- playlist_title = self._search_regex(r'(.+)\s*-\s*BBC', playlist_title, 'title', default=playlist_title)
-
- playlist_description = self._og_search_description(webpage, default=None)
-
if entries:
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
if entries:
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)