projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
561504f
)
Dailymotion: Use og:title instead of <title> to find title (Closes: #253)
author
Philipp Hagemeister
<phihag@phihag.de>
Thu, 15 Dec 2011 19:32:05 +0000
(20:32 +0100)
committer
Philipp Hagemeister
<phihag@phihag.de>
Thu, 15 Dec 2011 19:32:05 +0000
(20:32 +0100)
youtube_dl/__init__.py
patch
|
blob
|
history
diff --git
a/youtube_dl/__init__.py
b/youtube_dl/__init__.py
index 6a603349115d45de2cb28fa07324a7ac4e6e3cb4..d2601997556a3cdfd203080168da13145f72ae73 100755
(executable)
--- a/
youtube_dl/__init__.py
+++ b/
youtube_dl/__init__.py
@@
-1591,6
+1591,8
@@
class DailymotionIE(InfoExtractor):
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
def _real_extract(self, url):
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
def _real_extract(self, url):
+ htmlParser = HTMLParser.HTMLParser()
+
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
if mobj is None:
@@
-1601,7
+1603,6
@@
class DailymotionIE(InfoExtractor):
self._downloader.increment_downloads()
video_id = mobj.group(1)
self._downloader.increment_downloads()
video_id = mobj.group(1)
- simple_title = mobj.group(2).decode('utf-8')
video_extension = 'flv'
# Retrieve video webpage to extract further information
video_extension = 'flv'
# Retrieve video webpage to extract further information
@@
-1631,12
+1632,13
@@
class DailymotionIE(InfoExtractor):
video_url = mediaURL
video_url = mediaURL
- mobj = re.search(r'
(?im)<title>\s*(.+)\s*-\s*Video\s+Dailymotion</title
>', webpage)
+ mobj = re.search(r'
<meta property="og:title" content="(?P<title>[^"]*)" /
>', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
- video_title =
mobj.group(1
).decode('utf-8')
+ video_title =
htmlParser.unescape(mobj.group('title')
).decode('utf-8')
video_title = sanitize_title(video_title)
video_title = sanitize_title(video_title)
+ simple_title = _simplify_title(video_title)
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
if mobj is None:
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
if mobj is None: