[facebook] Recognize #! URLs (Fixes #1988)
authorPhilipp Hagemeister <phihag@phihag.de>
Mon, 16 Dec 2013 20:10:06 +0000 (21:10 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Mon, 16 Dec 2013 20:10:06 +0000 (21:10 +0100)
test/test_all_urls.py
youtube_dl/extractor/facebook.py

index e9458b2e331526de1c986c697c4a8d0ddd682f14..bd77b7c30149d556caa1237b4be4c06a56adc613 100644 (file)
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import get_testcases
 
 from youtube_dl.extractor import (
+    FacebookIE,
     gen_extractors,
     JustinTVIE,
     YoutubeIE,
@@ -87,12 +88,15 @@ class TestAllURLsMatching(unittest.TestCase):
         assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
         assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
 
+    def test_facebook_matching(self):
+        self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+
     def test_no_duplicates(self):
         ies = gen_extractors()
         for tc in get_testcases():
             url = tc['url']
             for ie in ies:
-                if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
+                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
                     self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
                 else:
                     self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
index 3b210710e3695ec3aa940b335d9868a281d7740a..4556079c8ad5edce7a6a3efe29989299d719ed28 100644 (file)
@@ -17,7 +17,7 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
     """Information Extractor for Facebook"""
 
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
     _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
     _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
     _NETRC_MACHINE = 'facebook'
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
         u'file': u'120708114770723.mp4',
         u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
         u'info_dict': {
-            u"duration": 279, 
+            u"duration": 279,
             u"title": u"PEOPLE ARE AWESOME 2013"
         }
     }