X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdailymotion.py;h=5d0bfe454c9bfe1a3e16d1273b18ed3be2f436b8;hb=7e660ac113b5af8f92de2bbc9579426ea3d89581;hp=3bd0b862c6551c8f40207f62db2daf964621db47;hpb=ffa8f0df0a878463078467709f615b1e57c61ec1;p=youtube-dl diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 3bd0b862c..5d0bfe454 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -8,12 +8,11 @@ from .subtitles import SubtitlesInfoExtractor from ..utils import ( compat_urllib_request, compat_str, - get_element_by_attribute, - get_element_by_id, orderedSet, str_to_int, - + int_or_none, ExtractorError, + unescapeHTML, ) class DailymotionBaseInfoExtractor(InfoExtractor): @@ -28,7 +27,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): """Information Extractor for Dailymotion""" - _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' + _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P[^/?_]+)' IE_NAME = u'dailymotion' _FORMATS = [ @@ -81,7 +80,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): # Extract id and simplified title from URL mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1).split('_')[0].split('?')[0] + video_id = mobj.group('id') url = 'http://www.dailymotion.com/video/%s' % video_id @@ -101,10 +100,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): self.to_screen(u'Vevo video detected: %s' % vevo_id) return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') - video_uploader = self._search_regex([r'(?im)[^<]+?]+?>([^<]+?)', - # Looking for official user - r'<(?:span|a) .*?rel="author".*?>([^<]+?)]+>([\d\.,]+)<', webpage, u'view count')) + view_count = self._search_regex( + r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False) + if view_count is not None: + view_count = str_to_int(view_count) return { 'id': video_id, 'formats': formats, - 'uploader': video_uploader, + 'uploader': info['owner.screenname'], 'upload_date': video_upload_date, 'title': self._og_search_title(webpage), 'subtitles': video_subtitles, @@ -181,7 +178,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): IE_NAME = u'dailymotion:playlist' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' - _MORE_PAGES_INDICATOR = r'' + _MORE_PAGES_INDICATOR = r'(?s)
.*?[^/]+)' - _MORE_PAGES_INDICATOR = r'' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P[^/]+)' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user = mobj.group('user') webpage = self._download_webpage(url, user) - full_user = self._html_search_regex( - r'(.*?)' % re.escape(user), + webpage, u'user', flags=re.DOTALL)) return { '_type': 'playlist',