Merge remote-tracking branch 'David-Development/rtl2.py'

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 5c41ff517c114bc90407cb3890c7f3959c232209..a028c4ed4e47a780a3a2f04e03a37997af003e4e 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -17,6 +17,7 @@ from ..utils import (
      ExtractorError,
      float_or_none,
      HEADRequest,
+    is_html,
      orderedSet,
      parse_xml,
      smuggle_url,
@@ -488,6 +489,16 @@ class GenericIE(InfoExtractor):
                  'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
              }
          },
+        # Cinerama player
+        {
+            'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
+            'info_dict': {
+                'id': '730m_DandD_1901_512k',
+                'ext': 'mp4',
+                'uploader': 'www.abc.net.au',
+                'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
+            }
+        }
      ]
  
      def report_following_redirect(self, new_url):
@@ -647,7 +658,7 @@ class GenericIE(InfoExtractor):
          # Maybe it's a direct link to a video?
          # Be careful not to download the whole thing!
          first_bytes = full_response.read(512)
-        if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
+        if not is_html(first_bytes):
              self._downloader.report_warning(
                  'URL could be a direct video link, returning it as such.')
              upload_date = unified_strdate(
@@ -926,7 +937,7 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded TED player
          mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'TED')
  
@@ -1045,6 +1056,10 @@ class GenericIE(InfoExtractor):
                      \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
                          ["']?url["']?\s*:\s*["']([^"']+)["']
              ''', webpage))
+        if not found:
+            # Cinerama player
+            found = re.findall(
+                r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
          if not found:
              # Try to find twitter cards info
              found = filter_video(re.findall(