Merge remote-tracking branch 'upstream/master' into bliptv

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 5b1da47e3d4bb0678a7e6433d024a6cadad55bed..4d38b0c9d1144383b2c2840d8fd9b91f88dadb91 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
  
  import os
  import re
  
  import os
  import re
+import sys
  
  from .common import InfoExtractor
  from .youtube import YoutubeIE
  from ..compat import (
  
  from .common import InfoExtractor
  from .youtube import YoutubeIE
  from ..compat import (
-    compat_urllib_parse,
+    compat_etree_fromstring,
      compat_urllib_parse_unquote,
      compat_urllib_parse_unquote,
-    compat_urllib_request,
      compat_urlparse,
      compat_xml_parse_error,
  )
      compat_urlparse,
      compat_xml_parse_error,
  )
@@ -21,7 +21,7 @@ from ..utils import (
      HEADRequest,
      is_html,
      orderedSet,
      HEADRequest,
      is_html,
      orderedSet,
-    parse_xml,
+    sanitized_Request,
      smuggle_url,
      unescapeHTML,
      unified_strdate,
      smuggle_url,
      unescapeHTML,
      unified_strdate,
@@ -30,17 +30,20 @@ from ..utils import (
      url_basename,
      xpath_text,
  )
      url_basename,
      xpath_text,
  )
-from .brightcove import BrightcoveIE
+from .brightcove import (
+    BrightcoveLegacyIE,
+    BrightcoveNewIE,
+)
  from .nbc import NBCSportsVPlayerIE
  from .ooyala import OoyalaIE
  from .rutv import RUTVIE
  from .tvc import TVCIE
  from .sportbox import SportBoxEmbedIE
  from .smotri import SmotriIE
  from .nbc import NBCSportsVPlayerIE
  from .ooyala import OoyalaIE
  from .rutv import RUTVIE
  from .tvc import TVCIE
  from .sportbox import SportBoxEmbedIE
  from .smotri import SmotriIE
+from .myvi import MyviIE
  from .condenast import CondeNastIE
  from .udn import UDNEmbedIE
  from .senateisvp import SenateISVPIE
  from .condenast import CondeNastIE
  from .udn import UDNEmbedIE
  from .senateisvp import SenateISVPIE
-from .bliptv import BlipTVIE
  from .svt import SVTIE
  from .pornhub import PornHubIE
  from .xhamster import XHamsterEmbedIE
  from .svt import SVTIE
  from .pornhub import PornHubIE
  from .xhamster import XHamsterEmbedIE
@@ -48,6 +51,8 @@ from .vimeo import VimeoIE
  from .dailymotion import DailymotionCloudIE
  from .onionstudios import OnionStudiosIE
  from .snagfilms import SnagFilmsEmbedIE
  from .dailymotion import DailymotionCloudIE
  from .onionstudios import OnionStudiosIE
  from .snagfilms import SnagFilmsEmbedIE
+from .screenwavemedia import ScreenwaveMediaIE
+from .mtv import MTVServicesEmbeddedIE
  
  
  class GenericIE(InfoExtractor):
  
  
  class GenericIE(InfoExtractor):
@@ -130,6 +135,90 @@ class GenericIE(InfoExtractor):
                  'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
              }
          },
                  'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
              }
          },
+        # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
+        {
+            'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
+            'info_dict': {
+                'id': 'smil',
+                'ext': 'mp4',
+                'title': 'Automatics, robotics and biocybernetics',
+                'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+                'upload_date': '20130627',
+                'formats': 'mincount:16',
+                'subtitles': 'mincount:1',
+            },
+            'params': {
+                'force_generic_extractor': True,
+                'skip_download': True,
+            },
+        },
+        # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
+        {
+            'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
+            'info_dict': {
+                'id': 'hds',
+                'ext': 'flv',
+                'title': 'hds',
+                'formats': 'mincount:1',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        # SMIL from https://www.restudy.dk/video/play/id/1637
+        {
+            'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
+            'info_dict': {
+                'id': 'video_1637',
+                'ext': 'flv',
+                'title': 'video_1637',
+                'formats': 'mincount:3',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
+        {
+            'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
+            'info_dict': {
+                'id': 'smil-service',
+                'ext': 'flv',
+                'title': 'smil-service',
+                'formats': 'mincount:1',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
+        {
+            'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
+            'info_dict': {
+                'id': '4719370',
+                'ext': 'mp4',
+                'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
+                'formats': 'mincount:3',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
+        {
+            'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
+            'info_dict': {
+                'id': 'mZlp2ctYIUEB',
+                'ext': 'mp4',
+                'title': 'Tikibad ontruimd wegens brand',
+                'description': 'md5:05ca046ff47b931f9b04855015e163a4',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 33,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
          # google redirect
          {
              'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
          # google redirect
          {
              'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -146,6 +235,22 @@ class GenericIE(InfoExtractor):
                  'skip_download': False,
              }
          },
                  'skip_download': False,
              }
          },
+        {
+            # redirect in Refresh HTTP header
+            'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+            'info_dict': {
+                'id': 'pO8h3EaFRdo',
+                'ext': 'mp4',
+                'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+                'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+                'upload_date': '20150917',
+                'uploader_id': 'brtvofficial',
+                'uploader': 'Boiler Room',
+            },
+            'params': {
+                'skip_download': False,
+            },
+        },
          {
              'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
              'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
          {
              'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
              'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
@@ -172,7 +277,7 @@ class GenericIE(InfoExtractor):
          # it also tests brightcove videos that need to set the 'Referer' in the
          # http requests
          {
          # it also tests brightcove videos that need to set the 'Referer' in the
          # http requests
          {
-            'add_ie': ['Brightcove'],
+            'add_ie': ['BrightcoveLegacy'],
              'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
              'info_dict': {
                  'id': '2765128793001',
              'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
              'info_dict': {
                  'id': '2765128793001',
@@ -196,7 +301,7 @@ class GenericIE(InfoExtractor):
                  'uploader': 'thestar.com',
                  'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
              },
                  'uploader': 'thestar.com',
                  'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
              },
-            'add_ie': ['Brightcove'],
+            'add_ie': ['BrightcoveLegacy'],
          },
          {
              'url': 'http://www.championat.com/video/football/v/87/87499.html',
          },
          {
              'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -211,7 +316,7 @@ class GenericIE(InfoExtractor):
          },
          {
              # https://github.com/rg3/youtube-dl/issues/3541
          },
          {
              # https://github.com/rg3/youtube-dl/issues/3541
-            'add_ie': ['Brightcove'],
+            'add_ie': ['BrightcoveLegacy'],
              'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
              'info_dict': {
                  'id': '3866516442001',
              'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
              'info_dict': {
                  'id': '3866516442001',
@@ -236,6 +341,19 @@ class GenericIE(InfoExtractor):
              },
              'add_ie': ['Ooyala'],
          },
              },
              'add_ie': ['Ooyala'],
          },
+        {
+            # ooyala video embedded with http://player.ooyala.com/iframe.js
+            'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
+            'info_dict': {
+                'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
+                'ext': 'mp4',
+                'title': '"Steve Jobs: Man in the Machine" trailer',
+                'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
          # multiple ooyala embeds on SBN network websites
          {
              'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
          # multiple ooyala embeds on SBN network websites
          {
              'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
@@ -276,14 +394,6 @@ class GenericIE(InfoExtractor):
                  'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
              },
          },
                  'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
              },
          },
-        # BBC iPlayer embeds
-        {
-            'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
-            'info_dict': {
-                'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
-            },
-            'playlist_mincount': 18,
-        },
          # RUTV embed
          {
              'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
          # RUTV embed
          {
              'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
@@ -338,6 +448,17 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              },
          },
                  'skip_download': True,
              },
          },
+        # Myvi.ru embed
+        {
+            'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
+            'info_dict': {
+                'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
+                'ext': 'mp4',
+                'title': 'Ужастики, русский трейлер (2015)',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 153,
+            }
+        },
          # XHamster embed
          {
              'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
          # XHamster embed
          {
              'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
@@ -396,6 +517,26 @@ class GenericIE(InfoExtractor):
                  'skip_download': 'Requires rtmpdump'
              }
          },
                  'skip_download': 'Requires rtmpdump'
              }
          },
+        # francetv embed
+        {
+            'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
+            'info_dict': {
+                'id': 'EV_30231',
+                'ext': 'mp4',
+                'title': 'Alcaline, le concert avec Calogero',
+                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+                'upload_date': '20150226',
+                'timestamp': 1424989860,
+                'duration': 5400,
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            },
+            'expected_warnings': [
+                'Forbidden'
+            ]
+        },
          # Condé Nast embed
          {
              'url': 'http://www.wired.com/2014/04/honda-asimo/',
          # Condé Nast embed
          {
              'url': 'http://www.wired.com/2014/04/honda-asimo/',
@@ -669,6 +810,31 @@ class GenericIE(InfoExtractor):
                  'title': 'John Carlson Postgame 2/25/15',
              },
          },
                  'title': 'John Carlson Postgame 2/25/15',
              },
          },
+        # Kaltura embed (different embed code)
+        {
+            'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
+            'info_dict': {
+                'id': '1_a52wc67y',
+                'ext': 'flv',
+                'upload_date': '20150127',
+                'uploader_id': 'PremierMedia',
+                'timestamp': int,
+                'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
+            },
+        },
+        # Kaltura embed protected with referrer
+        {
+            'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
+            'info_dict': {
+                'id': '1_g4fbemnq',
+                'ext': 'mp4',
+                'title': 'Violetta - Achter De Schermen - Ruggero',
+                'description': 'Achter de schermen met Ruggero',
+                'timestamp': 1435133761,
+                'upload_date': '20150624',
+                'uploader_id': 'echojecka',
+            },
+        },
          # Eagle.Platform embed (generic URL)
          {
              'url': 'http://lenta.ru/news/2015/03/06/navalny/',
          # Eagle.Platform embed (generic URL)
          {
              'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -850,6 +1016,15 @@ class GenericIE(InfoExtractor):
                  'uploader_id': 'clickhole',
              }
          },
                  'uploader_id': 'clickhole',
              }
          },
+        # SnagFilms embed
+        {
+            'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
+            'info_dict': {
+                'id': '74849a00-85a9-11e1-9660-123139220831',
+                'ext': 'mp4',
+                'title': '#whilewewatch',
+            }
+        },
          # AdobeTVVideo embed
          {
              'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
          # AdobeTVVideo embed
          {
              'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
@@ -861,6 +1036,41 @@ class GenericIE(InfoExtractor):
                  'description': 'New experience with Acrobat DC',
                  'duration': 248.667,
              },
                  'description': 'New experience with Acrobat DC',
                  'duration': 248.667,
              },
+        },
+        # ScreenwaveMedia embed
+        {
+            'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
+            'md5': '24ace5baba0d35d55c6810b51f34e9e0',
+            'info_dict': {
+                'id': 'cinemasnob-55d26273809dd',
+                'ext': 'mp4',
+                'title': 'cinemasnob',
+            },
+        },
+        # BrightcoveInPageEmbed embed
+        {
+            'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+            'info_dict': {
+                'id': '4238694884001',
+                'ext': 'flv',
+                'title': 'Tabletop: Dread, Last Thoughts',
+                'description': 'Tabletop: Dread, Last Thoughts',
+                'duration': 51690,
+            },
+        },
+        # JWPlayer with M3U8
+        {
+            'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
+            'info_dict': {
+                'id': 'playlist',
+                'ext': 'mp4',
+                'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
+                'uploader': 'ren.tv',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            }
          }
      ]
  
          }
      ]
  
@@ -1004,7 +1214,7 @@ class GenericIE(InfoExtractor):
  
          full_response = None
          if head_response is False:
  
          full_response = None
          if head_response is False:
-            request = compat_urllib_request.Request(url)
+            request = sanitized_Request(url)
              request.add_header('Accept-Encoding', '*')
              full_response = self._request_webpage(request, video_id)
              head_response = full_response
              request.add_header('Accept-Encoding', '*')
              full_response = self._request_webpage(request, video_id)
              head_response = full_response
@@ -1033,7 +1243,7 @@ class GenericIE(InfoExtractor):
                  '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
  
          if not full_response:
                  '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
  
          if not full_response:
-            request = compat_urllib_request.Request(url)
+            request = sanitized_Request(url)
              # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
              # making it impossible to download only chunk of the file (yet we need only 512kB to
              # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
              # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
              # making it impossible to download only chunk of the file (yet we need only 512kB to
              # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
@@ -1066,11 +1276,15 @@ class GenericIE(InfoExtractor):
  
          self.report_extraction(video_id)
  
  
          self.report_extraction(video_id)
  
-        # Is it an RSS feed?
+        # Is it an RSS feed, a SMIL file or a XSPF playlist?
          try:
          try:
-            doc = parse_xml(webpage)
+            doc = compat_etree_fromstring(webpage.encode('utf-8'))
              if doc.tag == 'rss':
                  return self._extract_rss(url, video_id, doc)
              if doc.tag == 'rss':
                  return self._extract_rss(url, video_id, doc)
+            elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
+                return self._parse_smil(doc, url, video_id)
+            elif doc.tag == '{http://xspf.org/ns/0/}playlist':
+                return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
          except compat_xml_parse_error:
              pass
  
          except compat_xml_parse_error:
              pass
  
@@ -1082,7 +1296,7 @@ class GenericIE(InfoExtractor):
          # Sometimes embedded video player is hidden behind percent encoding
          # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
          # Unescaping the whole page allows to handle those cases in a generic way
          # Sometimes embedded video player is hidden behind percent encoding
          # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
          # Unescaping the whole page allows to handle those cases in a generic way
-        webpage = compat_urllib_parse.unquote(webpage)
+        webpage = compat_urllib_parse_unquote(webpage)
  
          # it's tempting to parse this further, but you would
          # have to take into account all the variations like
  
          # it's tempting to parse this further, but you would
          # have to take into account all the variations like
@@ -1116,14 +1330,14 @@ class GenericIE(InfoExtractor):
              return self.playlist_result(
                  urlrs, playlist_id=video_id, playlist_title=video_title)
  
              return self.playlist_result(
                  urlrs, playlist_id=video_id, playlist_title=video_title)
  
-        # Look for BrightCove:
-        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+        # Look for Brightcove Legacy Studio embeds
+        bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
          if bc_urls:
              self.to_screen('Brightcove video detected.')
              entries = [{
                  '_type': 'url',
                  'url': smuggle_url(bc_url, {'Referer': url}),
          if bc_urls:
              self.to_screen('Brightcove video detected.')
              entries = [{
                  '_type': 'url',
                  'url': smuggle_url(bc_url, {'Referer': url}),
-                'ie_key': 'Brightcove'
+                'ie_key': 'BrightcoveLegacy'
              } for bc_url in bc_urls]
  
              return {
              } for bc_url in bc_urls]
  
              return {
@@ -1133,6 +1347,11 @@ class GenericIE(InfoExtractor):
                  'entries': entries,
              }
  
                  'entries': entries,
              }
  
+        # Look for Brightcove New Studio embeds
+        bc_urls = BrightcoveNewIE._extract_urls(webpage)
+        if bc_urls:
+            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+
          # Look for embedded rtl.nl player
          matches = re.findall(
              r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
          # Look for embedded rtl.nl player
          matches = re.findall(
              r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@@ -1144,6 +1363,12 @@ class GenericIE(InfoExtractor):
          if vimeo_url is not None:
              return self.url_result(vimeo_url)
  
          if vimeo_url is not None:
              return self.url_result(vimeo_url)
  
+        vid_me_embed_url = self._search_regex(
+            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+            webpage, 'vid.me embed', default=None)
+        if vid_me_embed_url is not None:
+            return self.url_result(vid_me_embed_url, 'Vidme')
+
          # Look for embedded YouTube player
          matches = re.findall(r'''(?x)
              (?:
          # Look for embedded YouTube player
          matches = re.findall(r'''(?x)
              (?:
@@ -1210,11 +1435,6 @@ class GenericIE(InfoExtractor):
                  'id': match.group('id')
              }
  
                  'id': match.group('id')
              }
  
-        # Look for embedded blip.tv player
-        bliptv_url = BlipTVIE._extract_url(webpage)
-        if bliptv_url:
-            return self.url_result(bliptv_url, 'BlipTV')
-
          # Look for SVT player
          svt_url = SVTIE._extract_url(webpage)
          if svt_url:
          # Look for SVT player
          svt_url = SVTIE._extract_url(webpage)
          if svt_url:
@@ -1270,7 +1490,7 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'))
  
          # Look for Ooyala videos
              return self.url_result(mobj.group('url'))
  
          # Look for Ooyala videos
-        mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+        mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                  re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
                  re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
                  re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
                  re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
                  re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
                  re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
@@ -1336,7 +1556,7 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'))
          mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
          if mobj is not None:
              return self.url_result(mobj.group('url'))
          mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
          if mobj is not None:
-            return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
+            return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
  
          # Look for funnyordie embed
          matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
  
          # Look for funnyordie embed
          matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -1399,11 +1619,23 @@ class GenericIE(InfoExtractor):
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'ArteTVEmbed')
  
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'ArteTVEmbed')
  
+        # Look for embedded francetv player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
          # Look for embedded smotri.com player
          smotri_url = SmotriIE._extract_url(webpage)
          if smotri_url:
              return self.url_result(smotri_url, 'Smotri')
  
          # Look for embedded smotri.com player
          smotri_url = SmotriIE._extract_url(webpage)
          if smotri_url:
              return self.url_result(smotri_url, 'Smotri')
  
+        # Look for embedded Myvi.ru player
+        myvi_url = MyviIE._extract_url(webpage)
+        if myvi_url:
+            return self.url_result(myvi_url)
+
          # Look for embeded soundcloud player
          mobj = re.search(
              r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
          # Look for embeded soundcloud player
          mobj = re.search(
              r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
@@ -1421,12 +1653,9 @@ class GenericIE(InfoExtractor):
              return self.url_result(url, ie='Vulture')
  
          # Look for embedded mtvservices player
              return self.url_result(url, ie='Vulture')
  
          # Look for embedded mtvservices player
-        mobj = re.search(
-            r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
-            webpage)
-        if mobj is not None:
-            url = unescapeHTML(mobj.group('url'))
-            return self.url_result(url, ie='MTVServicesEmbedded')
+        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
+        if mtvservices_url:
+            return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
  
          # Look for embedded yahoo player
          mobj = re.search(
  
          # Look for embedded yahoo player
          mobj = re.search(
@@ -1465,7 +1694,7 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'), 'MLB')
  
          mobj = re.search(
              return self.url_result(mobj.group('url'), 'MLB')
  
          mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+            r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
              webpage)
          if mobj is not None:
              return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
              webpage)
          if mobj is not None:
              return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
@@ -1483,10 +1712,12 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'), 'Zapiks')
  
          # Look for Kaltura embeds
              return self.url_result(mobj.group('url'), 'Zapiks')
  
          # Look for Kaltura embeds
-        mobj = re.search(
-            r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
+        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
+                re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
          if mobj is not None:
          if mobj is not None:
-            return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
+            return self.url_result(smuggle_url(
+                'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
+                {'source_url': url}), 'Kaltura')
  
          # Look for Eagle.Platform embeds
          mobj = re.search(
  
          # Look for Eagle.Platform embeds
          mobj = re.search(
@@ -1531,7 +1762,7 @@ class GenericIE(InfoExtractor):
  
          # Look for UDN embeds
          mobj = re.search(
  
          # Look for UDN embeds
          mobj = re.search(
-            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
          if mobj is not None:
              return self.url_result(
                  compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
          if mobj is not None:
              return self.url_result(
                  compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -1556,6 +1787,11 @@ class GenericIE(InfoExtractor):
          if snagfilms_url:
              return self.url_result(snagfilms_url)
  
          if snagfilms_url:
              return self.url_result(snagfilms_url)
  
+        # Look for ScreenwaveMedia embeds
+        mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
+        if mobj is not None:
+            return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
+
          # Look for AdobeTVVideo embeds
          mobj = re.search(
              r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
          # Look for AdobeTVVideo embeds
          mobj = re.search(
              r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
@@ -1593,7 +1829,7 @@ class GenericIE(InfoExtractor):
          if not found:
              # Broaden the findall a little bit: JWPlayer JS loader
              found = filter_video(re.findall(
          if not found:
              # Broaden the findall a little bit: JWPlayer JS loader
              found = filter_video(re.findall(
-                r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
+                r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
          if not found:
              # Flow player
              found = filter_video(re.findall(r'''(?xs)
          if not found:
              # Flow player
              found = filter_video(re.findall(r'''(?xs)
@@ -1619,7 +1855,7 @@ class GenericIE(InfoExtractor):
                  found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
          if not found:
              # HTML5 video
                  found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
          if not found:
              # HTML5 video
-            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
+            found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
          if not found:
              REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
              found = re.search(
          if not found:
              REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
              found = re.search(
@@ -1630,9 +1866,12 @@ class GenericIE(InfoExtractor):
                  # Look also in Refresh HTTP header
                  refresh_header = head_response.headers.get('Refresh')
                  if refresh_header:
                  # Look also in Refresh HTTP header
                  refresh_header = head_response.headers.get('Refresh')
                  if refresh_header:
+                    # In python 2 response HTTP headers are bytestrings
+                    if sys.version_info < (3, 0) and isinstance(refresh_header, str):
+                        refresh_header = refresh_header.decode('iso-8859-1')
                      found = re.search(REDIRECT_REGEX, refresh_header)
              if found:
                      found = re.search(REDIRECT_REGEX, refresh_header)
              if found:
-                new_url = compat_urlparse.urljoin(url, found.group(1))
+                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
                  self.report_following_redirect(new_url)
                  return {
                      '_type': 'url',
                  self.report_following_redirect(new_url)
                  return {
                      '_type': 'url',
@@ -1643,8 +1882,9 @@ class GenericIE(InfoExtractor):
  
          entries = []
          for video_url in found:
  
          entries = []
          for video_url in found:
+            video_url = video_url.replace('\\/', '/')
              video_url = compat_urlparse.urljoin(url, video_url)
              video_url = compat_urlparse.urljoin(url, video_url)
-            video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
+            video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
  
              # Sometimes, jwplayer extraction will result in a YouTube URL
              if YoutubeIE.suitable(video_url):
  
              # Sometimes, jwplayer extraction will result in a YouTube URL
              if YoutubeIE.suitable(video_url):
@@ -1654,22 +1894,24 @@ class GenericIE(InfoExtractor):
              # here's a fun little line of code for you:
              video_id = os.path.splitext(video_id)[0]
  
              # here's a fun little line of code for you:
              video_id = os.path.splitext(video_id)[0]
  
-            if determine_ext(video_url) == 'smil':
-                entries.append({
-                    'id': video_id,
-                    'formats': self._extract_smil_formats(video_url, video_id),
-                    'uploader': video_uploader,
-                    'title': video_title,
-                    'age_limit': age_limit,
-                })
+            entry_info_dict = {
+                'id': video_id,
+                'uploader': video_uploader,
+                'title': video_title,
+                'age_limit': age_limit,
+            }
+
+            ext = determine_ext(video_url)
+            if ext == 'smil':
+                entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
+            elif ext == 'xspf':
+                return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
+            elif ext == 'm3u8':
+                entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
              else:
              else:
-                entries.append({
-                    'id': video_id,
-                    'url': video_url,
-                    'uploader': video_uploader,
-                    'title': video_title,
-                    'age_limit': age_limit,
-                })
+                entry_info_dict['url'] = video_url
+
+            entries.append(entry_info_dict)
  
          if len(entries) == 1:
              return entries[0]
  
          if len(entries) == 1:
              return entries[0]