Merge branch 'master' of github.com-rndusr:rg3/youtube-dl into fix/str-item-assignment
authorRandom User <rndusr@posteo.de>
Sat, 25 Mar 2017 20:36:59 +0000 (21:36 +0100)
committerRandom User <rndusr@posteo.de>
Sat, 25 Mar 2017 20:36:59 +0000 (21:36 +0100)
1  2 
youtube_dl/extractor/common.py
youtube_dl/extractor/generic.py

index c2ca73ee1756519e90e19fc7512b1a77d834b2ee,0852b8e8c1c15e3600ed16d5e95d9cac901f32bc..6c3c095f78cec4f44951f0424f20c6828e2462d7
@@@ -36,34 -36,35 +36,35 @@@ from ..utils import 
      clean_html,
      compiled_regex_type,
      determine_ext,
+     determine_protocol,
      error_to_compat_str,
      ExtractorError,
+     extract_attributes,
      fix_xml_ampersands,
      float_or_none,
      GeoRestrictedError,
      GeoUtils,
      int_or_none,
      js_to_json,
+     mimetype2ext,
+     orderedSet,
+     parse_codecs,
+     parse_duration,
      parse_iso8601,
+     parse_m3u8_attributes,
      RegexNotFoundError,
-     sanitize_filename,
      sanitized_Request,
+     sanitize_filename,
      unescapeHTML,
      unified_strdate,
      unified_timestamp,
+     update_Request,
+     update_url_query,
+     urljoin,
      url_basename,
      xpath_element,
      xpath_text,
      xpath_with_ns,
-     determine_protocol,
-     parse_duration,
-     mimetype2ext,
-     update_Request,
-     update_url_query,
-     parse_m3u8_attributes,
-     extract_attributes,
-     parse_codecs,
-     urljoin,
  )
  
  
@@@ -714,6 -715,13 +715,13 @@@ class InfoExtractor(object)
              video_info['title'] = video_title
          return video_info
  
+     def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
+         urlrs = orderedSet(
+             self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
+             for m in matches)
+         return self.playlist_result(
+             urlrs, playlist_id=video_id, playlist_title=video_title)
      @staticmethod
      def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
          """Returns a playlist"""
                      })
          return formats
  
 -    @staticmethod
 -    def _find_jwplayer_data(webpage):
 +    def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
          mobj = re.search(
              r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
              webpage)
          if mobj:
 -            return mobj.group('options')
 +            try:
 +                jwplayer_data = self._parse_json(mobj.group('options'),
 +                                                 video_id=video_id,
 +                                                 transform_source=transform_source)
 +            except ExtractorError:
 +                pass
 +            else:
 +                if isinstance(jwplayer_data, dict):
 +                    return jwplayer_data
  
      def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
 -        jwplayer_data = self._parse_json(
 -            self._find_jwplayer_data(webpage), video_id,
 -            transform_source=js_to_json)
 +        jwplayer_data = self._find_jwplayer_data(
 +            webpage, video_id, transform_source=js_to_json)
          return self._parse_jwplayer_data(
              jwplayer_data, video_id, *args, **kwargs)
  
  
              this_video_id = video_id or video_data['mediaid']
  
-             formats = []
-             for source in video_data['sources']:
-                 source_url = self._proto_relative_url(source['file'])
-                 if base_url:
-                     source_url = compat_urlparse.urljoin(base_url, source_url)
-                 source_type = source.get('type') or ''
-                 ext = mimetype2ext(source_type) or determine_ext(source_url)
-                 if source_type == 'hls' or ext == 'm3u8':
-                     formats.extend(self._extract_m3u8_formats(
-                         source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
-                 elif ext == 'mpd':
-                     formats.extend(self._extract_mpd_formats(
-                         source_url, this_video_id, mpd_id=mpd_id, fatal=False))
-                 # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
-                 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
-                     formats.append({
-                         'url': source_url,
-                         'vcodec': 'none',
-                         'ext': ext,
-                     })
-                 else:
-                     height = int_or_none(source.get('height'))
-                     if height is None:
-                         # Often no height is provided but there is a label in
-                         # format like 1080p.
-                         height = int_or_none(self._search_regex(
-                             r'^(\d{3,})[pP]$', source.get('label') or '',
-                             'height', default=None))
-                     a_format = {
-                         'url': source_url,
-                         'width': int_or_none(source.get('width')),
-                         'height': height,
-                         'ext': ext,
-                     }
-                     if source_url.startswith('rtmp'):
-                         a_format['ext'] = 'flv'
-                         # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
-                         # of jwplayer.flash.swf
-                         rtmp_url_parts = re.split(
-                             r'((?:mp4|mp3|flv):)', source_url, 1)
-                         if len(rtmp_url_parts) == 3:
-                             rtmp_url, prefix, play_path = rtmp_url_parts
-                             a_format.update({
-                                 'url': rtmp_url,
-                                 'play_path': prefix + play_path,
-                             })
-                         if rtmp_params:
-                             a_format.update(rtmp_params)
-                     formats.append(a_format)
+             formats = self._parse_jwplayer_formats(
+                 video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
+                 mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
              self._sort_formats(formats)
  
              subtitles = {}
          else:
              return self.playlist_result(entries)
  
+     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
+                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+         formats = []
+         for source in jwplayer_sources_data:
+             source_url = self._proto_relative_url(source['file'])
+             if base_url:
+                 source_url = compat_urlparse.urljoin(base_url, source_url)
+             source_type = source.get('type') or ''
+             ext = mimetype2ext(source_type) or determine_ext(source_url)
+             if source_type == 'hls' or ext == 'm3u8':
+                 formats.extend(self._extract_m3u8_formats(
+                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                     m3u8_id=m3u8_id, fatal=False))
+             elif ext == 'mpd':
+                 formats.extend(self._extract_mpd_formats(
+                     source_url, video_id, mpd_id=mpd_id, fatal=False))
+             elif ext == 'smil':
+                 formats.extend(self._extract_smil_formats(
+                     source_url, video_id, fatal=False))
+             # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+             elif source_type.startswith('audio') or ext in (
+                     'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+                 formats.append({
+                     'url': source_url,
+                     'vcodec': 'none',
+                     'ext': ext,
+                 })
+             else:
+                 height = int_or_none(source.get('height'))
+                 if height is None:
+                     # Often no height is provided but there is a label in
+                     # format like "1080p", "720p SD", or 1080.
+                     height = int_or_none(self._search_regex(
+                         r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+                         'height', default=None))
+                 a_format = {
+                     'url': source_url,
+                     'width': int_or_none(source.get('width')),
+                     'height': height,
+                     'tbr': int_or_none(source.get('bitrate')),
+                     'ext': ext,
+                 }
+                 if source_url.startswith('rtmp'):
+                     a_format['ext'] = 'flv'
+                     # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+                     # of jwplayer.flash.swf
+                     rtmp_url_parts = re.split(
+                         r'((?:mp4|mp3|flv):)', source_url, 1)
+                     if len(rtmp_url_parts) == 3:
+                         rtmp_url, prefix, play_path = rtmp_url_parts
+                         a_format.update({
+                             'url': rtmp_url,
+                             'play_path': prefix + play_path,
+                         })
+                     if rtmp_params:
+                         a_format.update(rtmp_params)
+                 formats.append(a_format)
+         return formats
      def _live_title(self, name):
          """ Generate the title for a live video """
          now = datetime.datetime.now()
index 3fe0237b6ebb2c6821660f4cd671304be5768ee8,4fff93efe2859b541e34a4f09647417fffcf940d..274f817384d65a6287427ad35f97d3126cea7cd0
@@@ -84,6 -84,7 +84,7 @@@ from .twentymin import TwentyMinutenI
  from .ustream import UstreamIE
  from .openload import OpenloadIE
  from .videopress import VideoPressIE
+ from .rutube import RutubeIE
  
  
  class GenericIE(InfoExtractor):
                  },
              }],
          },
+         {
+             # Brightcove with UUID in videoPlayer
+             'url': 'http://www8.hp.com/cn/zh/home.html',
+             'info_dict': {
+                 'id': '5255815316001',
+                 'ext': 'mp4',
+                 'title': 'Sprocket Video - China',
+                 'description': 'Sprocket Video - China',
+                 'uploader': 'HP-Video Gallery',
+                 'timestamp': 1482263210,
+                 'upload_date': '20161220',
+                 'uploader_id': '1107601872001',
+             },
+             'params': {
+                 'skip_download': True,  # m3u8 download
+             },
+         },
          # ooyala video
          {
              'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
          },
          # LazyYT
          {
-             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
+             'url': 'https://skiplagged.com/',
              'info_dict': {
-                 'id': '1986',
-                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
+                 'id': 'skiplagged',
+                 'title': 'Skiplagged: The smart way to find cheap flights',
              },
-             'playlist_mincount': 2,
+             'playlist_mincount': 1,
+             'add_ie': ['Youtube'],
          },
          # Cinchcast embed
          {
                  'thumbnail': r're:^https?://.*\.jpg$',
              },
          },
 +        {
 +            # JWPlayer config passed as variable
 +            'url': 'http://www.txxx.com/videos/3326530/ariele/',
 +            'info_dict': {
 +                'id': '3326530_hq',
 +                'ext': 'mp4',
 +                'title': 'ARIELE | Tube Cup',
 +                'uploader': 'www.txxx.com',
 +                'age_limit': 18,
 +            },
 +            'params': {
 +                'skip_download': True,
 +            }
 +        },
          # rtl.nl embed
          {
              'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
              },
              'add_ie': [VideoPressIE.ie_key()],
          },
+         {
+             # Rutube embed
+             'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+             'info_dict': {
+                 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+                 'ext': 'flv',
+                 'title': 'Магаззино: Казань 2',
+                 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+                 'uploader': 'Магаззино',
+                 'upload_date': '20170228',
+                 'uploader_id': '996642',
+             },
+             'params': {
+                 'skip_download': True,
+             },
+             'add_ie': [RutubeIE.ie_key()],
+         },
          {
              # ThePlatform embedded with whitespaces in URLs
              'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
              'only_matching': True,
          },
+         {
+             # Senate ISVP iframe https
+             'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
+             'md5': 'fb8c70b0b515e5037981a2492099aab8',
+             'info_dict': {
+                 'id': 'govtaff020316',
+                 'ext': 'mp4',
+                 'title': 'Integrated Senate Video Player',
+             },
+             'add_ie': [SenateISVPIE.ie_key()],
+         },
          # {
          #     # TODO: find another test
          #     # http://schema.org/VideoObject
          video_description = self._og_search_description(webpage, default=None)
          video_thumbnail = self._og_search_thumbnail(webpage, default=None)
  
-         # Helper method
-         def _playlist_from_matches(matches, getter=None, ie=None):
-             urlrs = orderedSet(
-                 self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
-                 for m in matches)
-             return self.playlist_result(
-                 urlrs, playlist_id=video_id, playlist_title=video_title)
          # Look for Brightcove Legacy Studio embeds
          bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
          if bc_urls:
          # Look for Brightcove New Studio embeds
          bc_urls = BrightcoveNewIE._extract_urls(webpage)
          if bc_urls:
-             return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+             return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
  
          # Look for ThePlatform embeds
          tp_urls = ThePlatformIE._extract_urls(webpage)
          if tp_urls:
-             return _playlist_from_matches(tp_urls, ie='ThePlatform')
+             return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
  
          # Look for Vessel embeds
          vessel_urls = VesselIE._extract_urls(webpage)
          if vessel_urls:
-             return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
+             return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
  
          # Look for embedded rtl.nl player
          matches = re.findall(
              r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
              webpage)
          if matches:
-             return _playlist_from_matches(matches, ie='RtlNl')
+             return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
  
          vimeo_urls = VimeoIE._extract_urls(url, webpage)
          if vimeo_urls:
-             return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
+             return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
  
          vid_me_embed_url = self._search_regex(
              r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
                  (?:embed|v|p)/.+?)
              \1''', webpage)
          if matches:
-             return _playlist_from_matches(
-                 matches, lambda m: unescapeHTML(m[1]))
+             return self.playlist_from_matches(
+                 matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
  
          # Look for lazyYT YouTube embed
          matches = re.findall(
              r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
          if matches:
-             return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
+             return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
  
          # Look for Wordpress "YouTube Video Importer" plugin
          matches = re.findall(r'''(?x)<div[^>]+
              class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
              data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
          if matches:
-             return _playlist_from_matches(matches, lambda m: m[-1])
+             return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
  
          matches = DailymotionIE._extract_urls(webpage)
          if matches:
-             return _playlist_from_matches(matches)
+             return self.playlist_from_matches(matches, video_id, video_title)
  
          # Look for embedded Dailymotion playlist player (#3822)
          m = re.search(
              playlists = re.findall(
                  r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
              if playlists:
-                 return _playlist_from_matches(
-                     playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
+                 return self.playlist_from_matches(
+                     playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
  
          # Look for embedded Wistia player
          match = re.search(
          if mobj is not None:
              embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
              if embeds:
-                 return _playlist_from_matches(
-                     embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
+                 return self.playlist_from_matches(
+                     embeds, video_id, video_title,
+                     getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
  
          # Look for Aparat videos
          mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
          # Look for funnyordie embed
          matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
          if matches:
-             return _playlist_from_matches(
-                 matches, getter=unescapeHTML, ie='FunnyOrDie')
+             return self.playlist_from_matches(
+                 matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
  
          # Look for BBC iPlayer embed
          matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
          if matches:
-             return _playlist_from_matches(matches, ie='BBCCoUk')
+             return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
  
          # Look for embedded RUTV player
          rutv_url = RUTVIE._extract_url(webpage)
          # Look for embedded SportBox player
          sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
          if sportbox_urls:
-             return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
+             return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
  
          # Look for embedded XHamster player
          xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
          if xhamster_urls:
-             return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
+             return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
  
          # Look for embedded TNAFlixNetwork player
          tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
          if tnaflix_urls:
-             return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+             return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
  
          # Look for embedded PornHub player
          pornhub_urls = PornHubIE._extract_urls(webpage)
          if pornhub_urls:
-             return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+             return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
  
          # Look for embedded DrTuber player
          drtuber_urls = DrTuberIE._extract_urls(webpage)
          if drtuber_urls:
-             return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
+             return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
  
          # Look for embedded RedTube player
          redtube_urls = RedTubeIE._extract_urls(webpage)
          if redtube_urls:
-             return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
+             return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
  
          # Look for embedded Tvigle player
          mobj = re.search(
          # Look for embedded soundcloud player
          soundcloud_urls = SoundcloudIE._extract_urls(webpage)
          if soundcloud_urls:
-             return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+             return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
  
          # Look for tunein player
          tunein_urls = TuneInBaseIE._extract_urls(webpage)
          if tunein_urls:
-             return _playlist_from_matches(tunein_urls)
+             return self.playlist_from_matches(tunein_urls, video_id, video_title)
  
          # Look for embedded mtvservices player
          mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
          # Look for DBTV embeds
          dbtv_urls = DBTVIE._extract_urls(webpage)
          if dbtv_urls:
-             return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
+             return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
  
          # Look for Videa embeds
          videa_urls = VideaIE._extract_urls(webpage)
          if videa_urls:
-             return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
+             return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
  
          # Look for 20 minuten embeds
          twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
          if twentymin_urls:
-             return _playlist_from_matches(
-                 twentymin_urls, ie=TwentyMinutenIE.ie_key())
+             return self.playlist_from_matches(
+                 twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
  
          # Look for Openload embeds
          openload_urls = OpenloadIE._extract_urls(webpage)
          if openload_urls:
-             return _playlist_from_matches(
-                 openload_urls, ie=OpenloadIE.ie_key())
+             return self.playlist_from_matches(
+                 openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
  
          # Look for VideoPress embeds
          videopress_urls = VideoPressIE._extract_urls(webpage)
          if videopress_urls:
-             return _playlist_from_matches(
-                 videopress_urls, ie=VideoPressIE.ie_key())
+             return self.playlist_from_matches(
+                 videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
+         # Look for Rutube embeds
+         rutube_urls = RutubeIE._extract_urls(webpage)
+         if rutube_urls:
+             return self.playlist_from_matches(
+                 rutube_urls, ie=RutubeIE.ie_key())
  
          # Looking for http://schema.org/VideoObject
          json_ld = self._search_json_ld(
                  self._sort_formats(entry['formats'])
              return self.playlist_result(entries)
  
 -        jwplayer_data_str = self._find_jwplayer_data(webpage)
 -        if jwplayer_data_str:
 -            try:
 -                jwplayer_data = self._parse_json(
 -                    jwplayer_data_str, video_id, transform_source=js_to_json)
 -                info = self._parse_jwplayer_data(
 -                    jwplayer_data, video_id, require_title=False)
 -                if not info.get('title'):
 -                    info['title'] = video_title
 -                return info
 -            except ExtractorError:
 -                pass
 +        jwplayer_data = self._find_jwplayer_data(
 +            webpage, video_id, transform_source=js_to_json)
 +        if jwplayer_data:
-             return self._parse_jwplayer_data(jwplayer_data, video_id)
++            info = self._parse_jwplayer_data(
++                jwplayer_data, video_id, require_title=False)
++            if not info.get('title'):
++                info['title'] = video_title
++            return info
  
          def check_video(vurl):
              if YoutubeIE.suitable(vurl):
                      found = re.search(REDIRECT_REGEX, refresh_header)
              if found:
                  new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
-                 self.report_following_redirect(new_url)
-                 return {
-                     '_type': 'url',
-                     'url': new_url,
-                 }
+                 if new_url != url:
+                     self.report_following_redirect(new_url)
+                     return {
+                         '_type': 'url',
+                         'url': new_url,
+                     }
+                 else:
+                     found = None
  
          if not found:
              # twitter:player is a https URL to iframe player that may or may not