X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmotherless.py;h=d4bd273b61e756ef71db83bf264752fb2c0182af;hb=2f483bc1c389709623117079439708783122b5ec;hp=90ed91ba6353e12964882aefb65236c91942bafd;hpb=45283afdec81af21ba50ff3aca3d86fb6d2584b0;p=youtube-dl diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 90ed91ba6..d4bd273b6 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -77,8 +77,11 @@ class MotherlessIE(InfoExtractor): title = self._html_search_regex( r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') - video_url = self._html_search_regex( - r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL') + video_url = (self._html_search_regex( + (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + r'fileurl\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'video URL', default=None, group='url') or + 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id) age_limit = self._rta_search(webpage) view_count = str_to_int(self._html_search_regex( r'Views\s+([^<]+)<', @@ -120,7 +123,7 @@ class MotherlessIE(InfoExtractor): class MotherlessGroupIE(InfoExtractor): - _VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P[a-z0-9_]+)' + _VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P[a-z0-9_]+)' _TESTS = [{ 'url': 'http://motherless.com/g/movie_scenes', 'info_dict': { @@ -148,14 +151,27 @@ class MotherlessGroupIE(InfoExtractor): else super(MotherlessGroupIE, cls).suitable(url)) def _extract_entries(self, webpage, base): - return [ - self.url_result( - compat_urlparse.urljoin(base, video_path), - MotherlessIE.ie_key(), video_title=title) - for video_path, title in orderedSet(re.findall( - r'href="/([^"]+)"[^>]+>\s+]+alt="[^-]+-\s([^"]+)"', - webpage)) - ] + entries = [] + for mobj in re.finditer( + r'href="(?P/[^"]+)"[^>]*>(?:\s*]+alt="[^-]+-\s(?P[^"]+)")?', + webpage): + video_url = compat_urlparse.urljoin(base, mobj.group('href')) + if not MotherlessIE.suitable(video_url): + continue + video_id = MotherlessIE._match_id(video_url) + title = mobj.group('title') + entries.append(self.url_result( + video_url, ie=MotherlessIE.ie_key(), video_id=video_id, + video_title=title)) + # Alternative fallback + if not entries: + entries = [ + self.url_result( + compat_urlparse.urljoin(base, '/' + entry_id), + ie=MotherlessIE.ie_key(), video_id=entry_id) + for entry_id in orderedSet(re.findall( + r'data-codename=["\']([A-Z0-9]+)', webpage))] + return entries def _real_extract(self, url): group_id = self._match_id(url)