projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
ed604ce
)
[youtube] Fix categories and improve tags extraction
author
Sergey M․
<dstftw@gmail.com>
Mon, 15 Jun 2020 20:13:39 +0000
(
03:13
+0700)
committer
Sergey M․
<dstftw@gmail.com>
Mon, 15 Jun 2020 20:13:39 +0000
(
03:13
+0700)
youtube_dl/extractor/youtube.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/youtube.py
b/youtube_dl/extractor/youtube.py
index ce2212a7caa13111e1d2835fa65be371a4937847..53dccdf0bf793568d78f4291029092d6b216b7b9 100644
(file)
--- a/
youtube_dl/extractor/youtube.py
+++ b/
youtube_dl/extractor/youtube.py
@@
-2356,17
+2356,21
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None)
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None)
+ category = None
if m_cat_container:
category = self._html_search_regex(
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
default=None)
if m_cat_container:
category = self._html_search_regex(
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
default=None)
- video_categories = None if category is None else [category]
- else:
- video_categories = None
+ if not category:
+ category = try_get(
+ microformat, lambda x: x['category'], compat_str)
+ video_categories = None if category is None else [category]
video_tags = [
unescapeHTML(m.group('content'))
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
video_tags = [
unescapeHTML(m.group('content'))
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+ if not video_tags:
+ video_tags = try_get(video_details, lambda x: x['keywords'], list)
def _extract_count(count_name):
return str_to_int(self._search_regex(
def _extract_count(count_name):
return str_to_int(self._search_regex(