X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ffacebook.py;h=cb5dd57fb4a6cd54400ebd03631dda7e3f07962a;hb=a853427427269549593a0217db203305332ac983;hp=321eec59ef672eb6f88bf07f78466ba83456afbc;hpb=c3d3110f6a4b769e1ddb5532ac61f3da419ebd07;p=youtube-dl diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 321eec59e..cb5dd57fb 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -7,11 +7,11 @@ import socket from .common import InfoExtractor from ..compat import ( compat_http_client, - compat_str, compat_urllib_error, compat_urllib_parse_unquote, ) from ..utils import ( + error_to_compat_str, ExtractorError, limit_length, sanitized_Request, @@ -23,15 +23,23 @@ from ..utils import ( class FacebookIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?:\w+\.)?facebook\.com/ - (?:[^#]*?\#!/)? - (?: - (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?) - (?:v|video_id)=| - [^/]+/videos/(?:[^/]+/)? - ) - (?P[0-9]+) - (?:.*)''' + (?: + https?:// + (?:\w+\.)?facebook\.com/ + (?:[^#]*?\#!/)? + (?: + (?: + video/video\.php| + photo\.php| + video\.php| + video/embed + )\?(?:.*?)(?:v|video_id)=| + [^/]+/videos/(?:[^/]+/)? + )| + facebook: + ) + (?P[0-9]+) + ''' _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' _NETRC_MACHINE = 'facebook' @@ -66,6 +74,9 @@ class FacebookIE(InfoExtractor): }, { 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', 'only_matching': True, + }, { + 'url': 'facebook:544765982287235', + 'only_matching': True, }] def _login(self): @@ -74,7 +85,7 @@ class FacebookIE(InfoExtractor): return login_page_req = sanitized_Request(self._LOGIN_URL) - login_page_req.add_header('Cookie', 'locale=en_US') + self._set_cookie('facebook.com', 'locale', 'en_US') login_page = self._download_webpage(login_page_req, None, note='Downloading login page', errnote='Unable to download login page') @@ -100,13 +111,25 @@ class FacebookIE(InfoExtractor): login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') if re.search(r'', login_results) is not None: - self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') + error = self._html_search_regex( + r'(?s)]+class=(["\']).*?login_error_box.*?\1[^>]*>]*>.*?]*>(?P.+?)', + login_results, 'login error', default=None, group='error') + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.') + return + + fb_dtsg = self._search_regex( + r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None) + h = self._search_regex( + r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None) + + if not fb_dtsg or not h: return check_form = { - 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), - 'h': self._search_regex( - r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'), + 'fb_dtsg': fb_dtsg, + 'h': h, 'name_action_selected': 'dont_save', } check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) @@ -114,9 +137,9 @@ class FacebookIE(InfoExtractor): check_response = self._download_webpage(check_req, None, note='Confirming login') if re.search(r'id="checkpointSubmitButton"', check_response) is not None: - self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') + self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.report_warning('unable to log in: %s' % compat_str(err)) + self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err)) return def _real_initialize(self): @@ -176,3 +199,33 @@ class FacebookIE(InfoExtractor): 'formats': formats, 'uploader': uploader, } + + +class FacebookPostIE(InfoExtractor): + IE_NAME = 'facebook:post' + _VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P\d+)' + _TEST = { + 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', + 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6', + 'info_dict': { + 'id': '544765982287235', + 'ext': 'mp4', + 'title': '"What are you doing running in the snow?"', + 'uploader': 'FailArmy', + } + } + + def _real_extract(self, url): + post_id = self._match_id(url) + + webpage = self._download_webpage(url, post_id) + + entries = [ + self.url_result('facebook:%s' % video_id, FacebookIE.ie_key()) + for video_id in self._parse_json( + self._search_regex( + r'(["\'])video_ids\1\s*:\s*(?P\[.+?\])', + webpage, 'video ids', group='ids'), + post_id)] + + return self.playlist_result(entries, post_id)