Merge remote-tracking branch 'Rudloff/websurg'
[youtube-dl] / youtube_dl / extractor / googleplus.py
index e922bd1405d57ba0c074af48f0192b90cf0eb5ef..ab12d7e9381317b4dfddb679eced39db2f752ed4 100644 (file)
@@ -1,3 +1,5 @@
+# coding: utf-8
+
 import datetime
 import re
 
@@ -8,10 +10,18 @@ from ..utils import (
 
 
 class GooglePlusIE(InfoExtractor):
-    """Information extractor for plus.google.com."""
-
+    IE_DESC = u'Google Plus'
     _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
     IE_NAME = u'plus.google'
+    _TEST = {
+        u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
+        u"file": u"ZButuJc6CtH.flv",
+        u"info_dict": {
+            u"upload_date": u"20120613",
+            u"uploader": u"井上ヨシマサ",
+            u"title": u"嘆きの天使 降臨"
+        }
+    }
 
     def _real_extract(self, url):
         # Extract id from URL
@@ -30,7 +40,9 @@ class GooglePlusIE(InfoExtractor):
         self.report_extraction(video_id)
 
         # Extract update date
-        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
+        upload_date = self._html_search_regex(
+            r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*>
+                    ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
             webpage, u'upload date', fatal=False)
         if upload_date:
             # Convert timestring to a format suitable for filename
@@ -46,14 +58,18 @@ class GooglePlusIE(InfoExtractor):
         video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
             webpage, 'title', default=u'NA')
 
-        # Step 2, Stimulate clicking the image box to launch video
-        video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]',
+        # Step 2, Simulate clicking the image box to launch video
+        DOMAIN = 'https://plus.google.com/'
+        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
             webpage, u'video page URL')
+        if not video_page.startswith(DOMAIN):
+            video_page = DOMAIN + video_page
+
         webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
 
         # Extract video links on video page
         """Extract video links of all sizes"""
-        pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
+        pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
         mobj = re.findall(pattern, webpage)
         if len(mobj) == 0:
             raise ExtractorError(u'Unable to extract video links')