X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Ffacebook.py;h=b69c1ede0046d73e31df2098f78cf6dc20c254d4;hb=bf2a5555c0d56579484df4d2a5d65546aeb1bf39;hp=bbdb14366589d65c87d9a96080e22059300a3d99;hpb=6a424391d9745f46c8451771ae4d62da389cc311;p=youtube-dl diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index bbdb14366..b69c1ede0 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -1,6 +1,6 @@ +# coding: utf-8 from __future__ import unicode_literals -import json import re import socket @@ -13,13 +13,16 @@ from ..compat import ( compat_urllib_parse_unquote_plus, ) from ..utils import ( + clean_html, error_to_compat_str, ExtractorError, + get_element_by_id, + int_or_none, + js_to_json, limit_length, sanitized_Request, + try_get, urlencode_postdata, - get_element_by_id, - clean_html, ) @@ -27,7 +30,7 @@ class FacebookIE(InfoExtractor): _VALID_URL = r'''(?x) (?: https?:// - (?:\w+\.)?facebook\.com/ + (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/ (?:[^#]*?\#!/)? (?: (?: @@ -62,6 +65,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', 'uploader': 'Tennis on Facebook', + 'upload_date': '20140908', + 'timestamp': 1410199200, } }, { 'note': 'Video without discernible title', @@ -69,8 +74,10 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '274175099429670', 'ext': 'mp4', - 'title': 'Facebook video #274175099429670', + 'title': 'Asif Nawab Butt posted a video to his Timeline.', 'uploader': 'Asif Nawab Butt', + 'upload_date': '20140506', + 'timestamp': 1399398998, }, 'expected_warnings': [ 'title' @@ -78,12 +85,14 @@ class FacebookIE(InfoExtractor): }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', - 'md5': '54706e4db4f5ad58fbad82dde1f1213f', + 'md5': 'b2c28d528273b323abe5c6ab59f0f030', 'info_dict': { 'id': '957955867617029', 'ext': 'mp4', 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...', 'uploader': 'Demy de Zeeuw', + 'upload_date': '20160110', + 'timestamp': 1452431627, }, }, { 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', @@ -93,7 +102,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': '"What are you doing running in the snow?"', 'uploader': 'FailArmy', - } + }, + 'skip': 'Video gone', }, { 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903', 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3', @@ -103,6 +113,7 @@ class FacebookIE(InfoExtractor): 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog', 'uploader': 'S. Saint', }, + 'skip': 'Video gone', }, { 'note': 'swf params escaped', 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749', @@ -112,6 +123,58 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'Facebook video #10153664894881749', }, + }, { + # have 1080P, but only up to 720p in swf params + 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', + 'md5': '0d9813160b146b3bc8744e006027fcc6', + 'info_dict': { + 'id': '10155529876156509', + 'ext': 'mp4', + 'title': 'Holocaust survivor becomes US citizen', + 'timestamp': 1477818095, + 'upload_date': '20161030', + 'uploader': 'CNN', + }, + }, { + # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall + 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', + 'info_dict': { + 'id': '1417995061575415', + 'ext': 'mp4', + 'title': 'md5:a7b86ca673f51800cd54687b7f4012fe', + 'timestamp': 1486648217, + 'upload_date': '20170209', + 'uploader': 'Yaroslav Korpan', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471', + 'info_dict': { + 'id': '1072691702860471', + 'ext': 'mp4', + 'title': 'md5:ae2d22a93fbb12dad20dc393a869739d', + 'timestamp': 1477305000, + 'upload_date': '20161024', + 'uploader': 'La GuÃa Del Varón', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', + 'info_dict': { + 'id': '1396382447100162', + 'ext': 'mp4', + 'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3', + 'timestamp': 1486035494, + 'upload_date': '20170202', + 'uploader': 'Elisabeth Ahtn', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -127,6 +190,16 @@ class FacebookIE(InfoExtractor): }, { 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', 'only_matching': True, + }, { + 'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/', + 'only_matching': True, + }, { + 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', + 'only_matching': True, + }, { + # no title + 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', + 'only_matching': True, }] @staticmethod @@ -139,7 +212,7 @@ class FacebookIE(InfoExtractor): # Facebook API embed # see https://developers.facebook.com/docs/plugins/embedded-video-player mobj = re.search(r'''(?x)