Merge branch 'zdf'(fixes #8024)
authorremitamine <remitamine@gmail.com>
Mon, 28 Dec 2015 16:54:04 +0000 (17:54 +0100)
committerremitamine <remitamine@gmail.com>
Mon, 28 Dec 2015 16:54:04 +0000 (17:54 +0100)
youtube_dl/extractor/cspan.py
youtube_dl/extractor/facebook.py

index 7b685d157ddc07de54d59ba52cfc7db0940f6eb2..b3ee670188e2e4b7fb1e50d173ee091e74a05654 100644 (file)
@@ -58,18 +58,23 @@ class CSpanIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        video_type = None
         webpage = self._download_webpage(url, video_id)
-        matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage)
-        if matches:
+        # We first look for clipid, because clipprog always appears before
+        patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
+        results = list(filter(None, (re.search(p, webpage) for p in patterns)))
+        if results:
+            matches = results[0]
             video_type, video_id = matches.groups()
-            if video_type == 'prog':
-                video_type = 'program'
+            video_type = 'clip' if video_type == 'id' else 'program'
         else:
             senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
             if senate_isvp_url:
                 title = self._og_search_title(webpage)
                 surl = smuggle_url(senate_isvp_url, {'force_title': title})
                 return self.url_result(surl, 'SenateISVP', video_id, title)
+        if video_type is None or video_id is None:
+            raise ExtractorError('unable to find video id and type')
 
         def get_text_attr(d, attr):
             return d.get(attr, {}).get('#text')
index 39c481068fbc692ba43651c84b1afb270900fa8a..5e43f235965cb7b702987276d63f2ae43a75c501 100644 (file)
@@ -74,7 +74,7 @@ class FacebookIE(InfoExtractor):
             return
 
         login_page_req = sanitized_Request(self._LOGIN_URL)
-        login_page_req.add_header('Cookie', 'locale=en_US')
+        self._set_cookie('facebook.com', 'locale', 'en_US')
         login_page = self._download_webpage(login_page_req, None,
                                             note='Downloading login page',
                                             errnote='Unable to download login page')
@@ -100,13 +100,25 @@ class FacebookIE(InfoExtractor):
             login_results = self._download_webpage(request, None,
                                                    note='Logging in', errnote='unable to fetch login page')
             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
+                error = self._html_search_regex(
+                    r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
+                    login_results, 'login error', default=None, group='error')
+                if error:
+                    raise ExtractorError('Unable to login: %s' % error, expected=True)
                 self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                 return
 
+            fb_dtsg = self._search_regex(
+                r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
+            h = self._search_regex(
+                r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
+
+            if not fb_dtsg or not h:
+                return
+
             check_form = {
-                'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
-                'h': self._search_regex(
-                    r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
+                'fb_dtsg': fb_dtsg,
+                'h': h,
                 'name_action_selected': 'dont_save',
             }
             check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))