projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[periscope] Extract width and height (closes #20015)
[youtube-dl]
/
youtube_dl
/
extractor
/
cbc.py
diff --git
a/youtube_dl/extractor/cbc.py
b/youtube_dl/extractor/cbc.py
index 54b4b9be958ae49f0ea4f7d37cadcdf4e2c8b1c7..43f95c739deed7e497b2d85b23393c24f0f5c864 100644
(file)
--- a/
youtube_dl/extractor/cbc.py
+++ b/
youtube_dl/extractor/cbc.py
@@
-17,9
+17,11
@@
from ..utils import (
xpath_element,
xpath_with_ns,
find_xpath_attr,
xpath_element,
xpath_with_ns,
find_xpath_attr,
+ orderedSet,
parse_duration,
parse_iso8601,
parse_age_limit,
parse_duration,
parse_iso8601,
parse_age_limit,
+ strip_or_none,
int_or_none,
ExtractorError,
)
int_or_none,
ExtractorError,
)
@@
-129,15
+131,23
@@
class CBCIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ title = self._og_search_title(webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+ media_ids = []
+ for media_id_re in (
+ r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+ r'<div[^>]+\bid=["\']player-(\d+)',
+ r'guid["\']\s*:\s*["\'](\d+)'):
+ media_ids.extend(re.findall(media_id_re, webpage))
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
- for media_id in
re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage
)])
+ for media_id in
orderedSet(media_ids
)])
return self.playlist_result(
return self.playlist_result(
- entries, display_id,
- self._og_search_title(webpage, fatal=False),
+ entries, display_id, strip_or_none(title),
self._og_search_description(webpage))
self._og_search_description(webpage))