Merge branch 'pr-democracynow' of https://github.com/atomicdryad/youtube-dl into...

[youtube-dl] / youtube_dl / extractor / twitter.py
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py

index 6b3b39aee6fedcc00de29f07bb111eaf2438e43f..9d3e46b946843ae0da6b9de525c4aa4b8b3f4cbb 100644 (file)
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -7,7 +7,6 @@ from .common import InfoExtractor
  from ..compat import compat_urllib_request
  from ..utils import (
      float_or_none,
-    unescapeHTML,
      xpath_text,
      remove_end,
  )
@@ -38,6 +37,19 @@ class TwitterCardIE(InfoExtractor):
                  'thumbnail': 're:^https?://.*\.jpg',
                  'duration': 80.155,
              },
+        },
+        {
+            'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
+            'md5': 'b6f35e8b08a0bec6c8af77a2f4b3a814',
+            'info_dict': {
+                'id': 'dq4Oj5quskI',
+                'ext': 'mp4',
+                'title': 'Ubuntu 11.10 Overview',
+                'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/',
+                'upload_date': '20111013',
+                'uploader': 'OMG! Ubuntu!',
+                'uploader_id': 'omgubuntu',
+            },
          }
      ]
  
@@ -57,9 +69,14 @@ class TwitterCardIE(InfoExtractor):
              request.add_header('User-Agent', user_agent)
              webpage = self._download_webpage(request, video_id)
  
-            config = self._parse_json(
-                unescapeHTML(self._search_regex(
-                    r'data-player-config="([^"]+)"', webpage, 'data player config')),
+            youtube_url = self._html_search_regex(
+                r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
+                webpage, 'youtube iframe', default=None)
+            if youtube_url:
+                return self.url_result(youtube_url, 'Youtube')
+
+            config = self._parse_json(self._html_search_regex(
+                r'data-player-config="([^"]+)"', webpage, 'data player config'),
                  video_id)
              if 'playlist' not in config:
                  if 'vmapUrl' in config:
@@ -130,7 +147,7 @@ class TwitterIE(InfoExtractor):
          title = self._og_search_description(webpage).strip('').replace('\n', ' ')
  
          # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
-        mobj = re.match(r'“(.*)\s+(http://[^ ]+)”', title)
+        mobj = re.match(r'“(.*)\s+(https?://[^ ]+)”', title)
          title, short_url = mobj.groups()
  
          card_id = self._search_regex(