X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ffacebook.py;h=f5bbd39d2d0e90996c118e3fae325034fc2bbb6d;hb=2e7e561c1d9dedf1a8e5a206e1ef86cfa4599956;hp=23a6fcd88b6ffef90f3ce0fcdf7fe6abd770ca90;hpb=9c7b509b2a3da1e23e81a2b9039185e09180328a;p=youtube-dl diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 23a6fcd88..f5bbd39d2 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -38,7 +38,8 @@ class FacebookIE(InfoExtractor): story\.php )\?(?:.*?)(?:v|video_id|story_fbid)=| [^/]+/videos/(?:[^/]+/)?| - [^/]+/posts/ + [^/]+/posts/| + groups/[^/]+/permalink/ )| facebook: ) @@ -93,6 +94,24 @@ class FacebookIE(InfoExtractor): 'title': '"What are you doing running in the snow?"', 'uploader': 'FailArmy', } + }, { + 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903', + 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3', + 'info_dict': { + 'id': '1035862816472149', + 'ext': 'mp4', + 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog', + 'uploader': 'S. Saint', + }, + }, { + 'note': 'swf params escaped', + 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749', + 'md5': '97ba073838964d12c70566e0085c2b91', + 'info_dict': { + 'id': '10153664894881749', + 'ext': 'mp4', + 'title': 'Facebook video #10153664894881749', + }, }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -106,7 +125,7 @@ class FacebookIE(InfoExtractor): 'url': 'facebook:544765982287235', 'only_matching': True, }, { - 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903', + 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', 'only_matching': True, }] @@ -183,11 +202,12 @@ class FacebookIE(InfoExtractor): video_data = None - BEFORE = '{swf.addParam(param[0], param[1]);});\n' + BEFORE = '{swf.addParam(param[0], param[1]);});' AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' - m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) + m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage) if m: - data = dict(json.loads(m.group(1))) + swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"') + data = dict(json.loads(swf_params)) params_raw = compat_urllib_parse_unquote(data['params']) video_data = json.loads(params_raw)['video_data'] @@ -200,7 +220,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json(self._search_regex( - r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id) + r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id) for item in server_js_data.get('instances', []): if item[1][0] == 'VideoConfig': video_data = video_data_list2dict(item[2][0]['videoData']) @@ -274,8 +294,8 @@ class FacebookIE(InfoExtractor): if '/posts/' in url: entries = [ - self.url_result('facebook:%s' % video_id, FacebookIE.ie_key()) - for video_id in self._parse_json( + self.url_result('facebook:%s' % vid, FacebookIE.ie_key()) + for vid in self._parse_json( self._search_regex( r'(["\'])video_ids\1\s*:\s*(?P\[.+?\])', webpage, 'video ids', group='ids'),