X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ffacebook.py;h=f5bbd39d2d0e90996c118e3fae325034fc2bbb6d;hb=09e3f91e408eb357929abad3710c799376004138;hp=b354c5521852c2f78a18f83a70b3b473839a84f2;hpb=98801241965ff81afd68392df52e38a54d49ded2;p=youtube-dl diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b354c5521..f5bbd39d2 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -38,7 +38,8 @@ class FacebookIE(InfoExtractor): story\.php )\?(?:.*?)(?:v|video_id|story_fbid)=| [^/]+/videos/(?:[^/]+/)?| - [^/]+/posts/ + [^/]+/posts/| + groups/[^/]+/permalink/ )| facebook: ) @@ -102,6 +103,15 @@ class FacebookIE(InfoExtractor): 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog', 'uploader': 'S. Saint', }, + }, { + 'note': 'swf params escaped', + 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749', + 'md5': '97ba073838964d12c70566e0085c2b91', + 'info_dict': { + 'id': '10153664894881749', + 'ext': 'mp4', + 'title': 'Facebook video #10153664894881749', + }, }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -114,6 +124,9 @@ class FacebookIE(InfoExtractor): }, { 'url': 'facebook:544765982287235', 'only_matching': True, + }, { + 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', + 'only_matching': True, }] def _login(self): @@ -189,11 +202,12 @@ class FacebookIE(InfoExtractor): video_data = None - BEFORE = '{swf.addParam(param[0], param[1]);});\n' + BEFORE = '{swf.addParam(param[0], param[1]);});' AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' - m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) + m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage) if m: - data = dict(json.loads(m.group(1))) + swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"') + data = dict(json.loads(swf_params)) params_raw = compat_urllib_parse_unquote(data['params']) video_data = json.loads(params_raw)['video_data'] @@ -280,8 +294,8 @@ class FacebookIE(InfoExtractor): if '/posts/' in url: entries = [ - self.url_result('facebook:%s' % video_id, FacebookIE.ie_key()) - for video_id in self._parse_json( + self.url_result('facebook:%s' % vid, FacebookIE.ie_key()) + for vid in self._parse_json( self._search_regex( r'(["\'])video_ids\1\s*:\s*(?P\[.+?\])', webpage, 'video ids', group='ids'),