Dailymotion: Use og:title instead of <title> to find title (Closes: #253)

author Philipp Hagemeister <phihag@phihag.de>

Thu, 15 Dec 2011 19:32:05 +0000 (20:32 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Thu, 15 Dec 2011 19:32:05 +0000 (20:32 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Thu, 15 Dec 2011 19:32:05 +0000 (20:32 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Thu, 15 Dec 2011 19:32:05 +0000 (20:32 +0100)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 6a603349115d45de2cb28fa07324a7ac4e6e3cb4..d2601997556a3cdfd203080168da13145f72ae73 100755 (executable)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -1591,6 +1591,8 @@ class DailymotionIE(InfoExtractor):
                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
  
         def _real_extract(self, url):
                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
  
         def _real_extract(self, url):
+               htmlParser = HTMLParser.HTMLParser()
+               
                 # Extract id and simplified title from URL
                 mobj = re.match(self._VALID_URL, url)
                 if mobj is None:
                 # Extract id and simplified title from URL
                 mobj = re.match(self._VALID_URL, url)
                 if mobj is None:
@@ -1601,7 +1603,6 @@ class DailymotionIE(InfoExtractor):
                 self._downloader.increment_downloads()
                 video_id = mobj.group(1)
  
                 self._downloader.increment_downloads()
                 video_id = mobj.group(1)
  
-               simple_title = mobj.group(2).decode('utf-8')
                 video_extension = 'flv'
  
                 # Retrieve video webpage to extract further information
                 video_extension = 'flv'
  
                 # Retrieve video webpage to extract further information
@@ -1631,12 +1632,13 @@ class DailymotionIE(InfoExtractor):
  
                 video_url = mediaURL
  
  
                 video_url = mediaURL
  
-               mobj = re.search(r'(?im)<title>\s*(.+)\s*-\s*Video\s+Dailymotion</title>', webpage)
+               mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract title')
                         return
                 if mobj is None:
                         self._downloader.trouble(u'ERROR: unable to extract title')
                         return
-               video_title = mobj.group(1).decode('utf-8')
+               video_title = htmlParser.unescape(mobj.group('title')).decode('utf-8')
                 video_title = sanitize_title(video_title)
                 video_title = sanitize_title(video_title)
+               simple_title = _simplify_title(video_title)
  
                 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
                 if mobj is None:
  
                 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
                 if mobj is None:
author	Philipp Hagemeister <phihag@phihag.de>
	Thu, 15 Dec 2011 19:32:05 +0000 (20:32 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Thu, 15 Dec 2011 19:32:05 +0000 (20:32 +0100)