Document and test categories (#2923)

author Philipp Hagemeister <phihag@phihag.de>

Thu, 15 May 2014 10:41:42 +0000 (12:41 +0200)

committer Philipp Hagemeister <phihag@phihag.de>

Thu, 15 May 2014 10:41:42 +0000 (12:41 +0200)
author Philipp Hagemeister <phihag@phihag.de>
Thu, 15 May 2014 10:41:42 +0000 (12:41 +0200)
committer Philipp Hagemeister <phihag@phihag.de>
Thu, 15 May 2014 10:41:42 +0000 (12:41 +0200)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 1e366a13c710d3c58d564466f22018efda8eebb5..db472aace8faabb465e9c93b7ff6013ccece4e8e 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -113,6 +113,8 @@ class InfoExtractor(object):
      webpage_url:    The url to the video webpage, if given to youtube-dl it
                      should allow to get the same result again. (It will be set
                      by YoutubeDL if it's missing)
+    categories:     A list of categories that the video falls in, for example
+                    ["Sports", "Berlin"]
  
      Unless mentioned otherwise, the fields should be Unicode strings.
  
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index f0a92d18224eebbd69fb9dad6a1a211a7d70fed3..3c8f7f7a2a65720e72c9efeb0fbf017202902de5 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -242,7 +242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  u"uploader": u"Philipp Hagemeister",
                  u"uploader_id": u"phihag",
                  u"upload_date": u"20121002",
-                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
+                u"categories": [u'Science & Technology'],
              }
          },
          {
@@ -1136,18 +1137,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          # upload date
          upload_date = None
-        mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
+        mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
          if mobj is not None:
              upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
              upload_date = unified_strdate(upload_date)
  
-
-        video_categories = []
-        # categories
          m_cat_container = get_element_by_id("eow-category", video_webpage)
          if m_cat_container:
-            video_categories = re.findall(r'<a[^<]+>(.*?)</a>',
-                                m_cat_container, re.DOTALL)
+            category = self._html_search_regex(
+                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'cateory',
+                default=None)
+            video_categories = None if category is None else [category]
+        else:
+            video_categories = None
  
          # description
          video_description = get_element_by_id("eow-description", video_webpage)
author	Philipp Hagemeister <phihag@phihag.de>
	Thu, 15 May 2014 10:41:42 +0000 (12:41 +0200)
committer	Philipp Hagemeister <phihag@phihag.de>
	Thu, 15 May 2014 10:41:42 +0000 (12:41 +0200)
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history