Merge branch 'pr-bbcnews' of https://github.com/atomicdryad/youtube-dl into atomicdry...

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 392ad36486c8e953f4d3bed341353a3b5d11e65a..6d2efb22e784ecd40dcdebe5195a0d8dde63d632 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -8,7 +8,6 @@ import re
  from .common import InfoExtractor
  from .youtube import YoutubeIE
  from ..compat import (
-    compat_urllib_parse,
      compat_urllib_parse_unquote,
      compat_urllib_request,
      compat_urlparse,
@@ -408,6 +407,26 @@ class GenericIE(InfoExtractor):
                  'skip_download': 'Requires rtmpdump'
              }
          },
+        # francetv embed
+        {
+            'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
+            'info_dict': {
+                'id': 'EV_30231',
+                'ext': 'mp4',
+                'title': 'Alcaline, le concert avec Calogero',
+                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+                'upload_date': '20150226',
+                'timestamp': 1424989860,
+                'duration': 5400,
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            },
+            'expected_warnings': [
+                'Forbidden'
+            ]
+        },
          # Condé Nast embed
          {
              'url': 'http://www.wired.com/2014/04/honda-asimo/',
@@ -1115,7 +1134,7 @@ class GenericIE(InfoExtractor):
          # Sometimes embedded video player is hidden behind percent encoding
          # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
          # Unescaping the whole page allows to handle those cases in a generic way
-        webpage = compat_urllib_parse.unquote(webpage)
+        webpage = compat_urllib_parse_unquote(webpage)
  
          # it's tempting to parse this further, but you would
          # have to take into account all the variations like
@@ -1177,6 +1196,12 @@ class GenericIE(InfoExtractor):
          if vimeo_url is not None:
              return self.url_result(vimeo_url)
  
+        vid_me_embed_url = self._search_regex(
+            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+            webpage, 'vid.me embed', default=None)
+        if vid_me_embed_url is not None:
+            return self.url_result(vid_me_embed_url, 'Vidme')
+
          # Look for embedded YouTube player
          matches = re.findall(r'''(?x)
              (?:
@@ -1369,7 +1394,7 @@ class GenericIE(InfoExtractor):
              return self.url_result(mobj.group('url'))
          mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
          if mobj is not None:
-            return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
+            return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
  
          # Look for funnyordie embed
          matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -1432,6 +1457,13 @@ class GenericIE(InfoExtractor):
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'ArteTVEmbed')
  
+        # Look for embedded francetv player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'))
+
          # Look for embedded smotri.com player
          smotri_url = SmotriIE._extract_url(webpage)
          if smotri_url:
@@ -1670,7 +1702,7 @@ class GenericIE(InfoExtractor):
                  if refresh_header:
                      found = re.search(REDIRECT_REGEX, refresh_header)
              if found:
-                new_url = compat_urlparse.urljoin(url, found.group(1))
+                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
                  self.report_following_redirect(new_url)
                  return {
                      '_type': 'url',
@@ -1682,7 +1714,7 @@ class GenericIE(InfoExtractor):
          entries = []
          for video_url in found:
              video_url = compat_urlparse.urljoin(url, video_url)
-            video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
+            video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
  
              # Sometimes, jwplayer extraction will result in a YouTube URL
              if YoutubeIE.suitable(video_url):