Fix generic class move (add all files)

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index a25ccc173e26500e675dac8dc1a55337ababca33..b3335a89c8da9b2b0ab899bb4a2190545edc2134 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -23,253 +23,7 @@ import urllib
  from .utils import *
  
  
-class InfoExtractor(object):
-    """Information Extractor class.
-
-    Information extractors are the classes that, given a URL, extract
-    information about the video (or videos) the URL refers to. This
-    information includes the real video URL, the video title, author and
-    others. The information is stored in a dictionary which is then
-    passed to the FileDownloader. The FileDownloader processes this
-    information possibly downloading the video to the file system, among
-    other possible outcomes.
-
-    The dictionaries must include the following fields:
-
-    id:             Video identifier.
-    url:            Final video URL.
-    title:          Video title, unescaped.
-    ext:            Video filename extension.
-
-    The following fields are optional:
-
-    format:         The video format, defaults to ext (used for --get-format)
-    thumbnail:      Full URL to a video thumbnail image.
-    description:    One-line video description.
-    uploader:       Full name of the video uploader.
-    upload_date:    Video upload date (YYYYMMDD).
-    uploader_id:    Nickname or id of the video uploader.
-    location:       Physical location of the video.
-    player_url:     SWF Player URL (used for rtmpdump).
-    subtitles:      The subtitle file contents.
-    urlhandle:      [internal] The urlHandle to be used to download the file,
-                    like returned by urllib.request.urlopen
-
-    The fields should all be Unicode strings.
-
-    Subclasses of this one should re-define the _real_initialize() and
-    _real_extract() methods and define a _VALID_URL regexp.
-    Probably, they should also be added to the list of extractors.
-
-    _real_extract() must return a *list* of information dictionaries as
-    described above.
-
-    Finally, the _WORKING attribute should be set to False for broken IEs
-    in order to warn the users and skip the tests.
-    """
-
-    _ready = False
-    _downloader = None
-    _WORKING = True
-
-    def __init__(self, downloader=None):
-        """Constructor. Receives an optional downloader."""
-        self._ready = False
-        self.set_downloader(downloader)
-
-    @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-        return re.match(cls._VALID_URL, url) is not None
-
-    @classmethod
-    def working(cls):
-        """Getter method for _WORKING."""
-        return cls._WORKING
-
-    def initialize(self):
-        """Initializes an instance (authentication, etc)."""
-        if not self._ready:
-            self._real_initialize()
-            self._ready = True
-
-    def extract(self, url):
-        """Extracts URL information and returns it in list of dicts."""
-        self.initialize()
-        return self._real_extract(url)
-
-    def set_downloader(self, downloader):
-        """Sets the downloader for this IE."""
-        self._downloader = downloader
-
-    def _real_initialize(self):
-        """Real initialization process. Redefine in subclasses."""
-        pass
-
-    def _real_extract(self, url):
-        """Real extraction process. Redefine in subclasses."""
-        pass
-
-    @property
-    def IE_NAME(self):
-        return type(self).__name__[:-2]
-
-    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
-        """ Returns the response handle """
-        if note is None:
-            self.report_download_webpage(video_id)
-        elif note is not False:
-            self.to_screen(u'%s: %s' % (video_id, note))
-        try:
-            return compat_urllib_request.urlopen(url_or_request)
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            if errnote is None:
-                errnote = u'Unable to download webpage'
-            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
-
-    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
-        """ Returns a tuple (page content as string, URL handle) """
-        urlh = self._request_webpage(url_or_request, video_id, note, errnote)
-        content_type = urlh.headers.get('Content-Type', '')
-        m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
-        if m:
-            encoding = m.group(1)
-        else:
-            encoding = 'utf-8'
-        webpage_bytes = urlh.read()
-        if self._downloader.params.get('dump_intermediate_pages', False):
-            try:
-                url = url_or_request.get_full_url()
-            except AttributeError:
-                url = url_or_request
-            self.to_screen(u'Dumping request to ' + url)
-            dump = base64.b64encode(webpage_bytes).decode('ascii')
-            self._downloader.to_screen(dump)
-        content = webpage_bytes.decode(encoding, 'replace')
-        return (content, urlh)
-
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
-        """ Returns the data of the page as a string """
-        return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
-
-    def to_screen(self, msg):
-        """Print msg to screen, prefixing it with '[ie_name]'"""
-        self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
-
-    def report_extraction(self, id_or_name):
-        """Report information extraction."""
-        self.to_screen(u'%s: Extracting information' % id_or_name)
-
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
-
-    def report_age_confirmation(self):
-        """Report attempt to confirm age."""
-        self.to_screen(u'Confirming age')
-
-    #Methods for following #608
-    #They set the correct value of the '_type' key
-    def video_result(self, video_info):
-        """Returns a video"""
-        video_info['_type'] = 'video'
-        return video_info
-    def url_result(self, url, ie=None):
-        """Returns a url that points to a page that should be processed"""
-        #TODO: ie should be the class used for getting the info
-        video_info = {'_type': 'url',
-                      'url': url,
-                      'ie_key': ie}
-        return video_info
-    def playlist_result(self, entries, playlist_id=None, playlist_title=None):
-        """Returns a playlist"""
-        video_info = {'_type': 'playlist',
-                      'entries': entries}
-        if playlist_id:
-            video_info['id'] = playlist_id
-        if playlist_title:
-            video_info['title'] = playlist_title
-        return video_info
-
-    def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
-        """
-        Perform a regex search on the given string, using a single or a list of
-        patterns returning the first matching group.
-        In case of failure return a default value or raise a WARNING or a
-        ExtractorError, depending on fatal, specifying the field name.
-        """
-        if isinstance(pattern, (str, compat_str, compiled_regex_type)):
-            mobj = re.search(pattern, string, flags)
-        else:
-            for p in pattern:
-                mobj = re.search(p, string, flags)
-                if mobj: break
-
-        if sys.stderr.isatty() and os.name != 'nt':
-            _name = u'\033[0;34m%s\033[0m' % name
-        else:
-            _name = name
-
-        if mobj:
-            # return the first matching group
-            return next(g for g in mobj.groups() if g is not None)
-        elif default is not None:
-            return default
-        elif fatal:
-            raise ExtractorError(u'Unable to extract %s' % _name)
-        else:
-            self._downloader.report_warning(u'unable to extract %s; '
-                u'please report this issue on GitHub.' % _name)
-            return None
-
-    def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
-        """
-        Like _search_regex, but strips HTML tags and unescapes entities.
-        """
-        res = self._search_regex(pattern, string, name, default, fatal, flags)
-        if res:
-            return clean_html(res).strip()
-        else:
-            return res
-
-class SearchInfoExtractor(InfoExtractor):
-    """
-    Base class for paged search queries extractors.
-    They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
-    Instances should define _SEARCH_KEY and _MAX_RESULTS.
-    """
-
-    @classmethod
-    def _make_valid_url(cls):
-        return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
-
-    @classmethod
-    def suitable(cls, url):
-        return re.match(cls._make_valid_url(), url) is not None
-
-    def _real_extract(self, query):
-        mobj = re.match(self._make_valid_url(), query)
-        if mobj is None:
-            raise ExtractorError(u'Invalid search query "%s"' % query)
-
-        prefix = mobj.group('prefix')
-        query = mobj.group('query')
-        if prefix == '':
-            return self._get_n_results(query, 1)
-        elif prefix == 'all':
-            return self._get_n_results(query, self._MAX_RESULTS)
-        else:
-            n = int(prefix)
-            if n <= 0:
-                raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
-            elif n > self._MAX_RESULTS:
-                self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
-                n = self._MAX_RESULTS
-            return self._get_n_results(query, n)
-
-    def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
-        raise NotImplementedError("This method must be implemented by sublclasses")
+from .extractor.common import InfoExtractor, SearchInfoExtractor
  
  
  class YoutubeIE(InfoExtractor):
@@ -379,6 +133,17 @@ class YoutubeIE(InfoExtractor):
          """Indicate the download will use the RTMP protocol."""
          self.to_screen(u'RTMP download detected')
  
+    @staticmethod
+    def _decrypt_signature(s):
+        """Decrypt the key the two subkeys must have a length of 43"""
+        (a,b) = s.split('.')
+        if len(a) != 43 or len(b) != 43:
+            raise ExtractorError(u'Unable to decrypt signature, subkeys lengths not valid')
+        b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]
+        a = a[-40:]
+        s_dec = '.'.join((a,b))[::-1]
+        return s_dec
+
      def _get_available_subtitles(self, video_id):
          self.report_video_subtitles_download(video_id)
          request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
@@ -420,7 +185,7 @@ class YoutubeIE(InfoExtractor):
      def _request_automatic_caption(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
-        sub_lang = self._downloader.params.get('subtitleslang')
+        sub_lang = self._downloader.params.get('subtitleslang') or 'en'
          sub_format = self._downloader.params.get('subtitlesformat')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
@@ -699,14 +464,14 @@ class YoutubeIE(InfoExtractor):
                          pass
                      else:
                          # We report the original error
-                        self._downloader.report_error(sub_error)
+                        self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('allsubtitles', False):
              video_subtitles = self._extract_all_subtitles(video_id)
              for video_subtitle in video_subtitles:
                  (sub_error, sub_lang, sub) = video_subtitle
                  if sub_error:
-                    self._downloader.report_error(sub_error)
+                    self._downloader.report_warning(sub_error)
  
          if self._downloader.params.get('listsubtitles', False):
              sub_lang_list = self._list_available_subtitles(video_id)
@@ -724,6 +489,17 @@ class YoutubeIE(InfoExtractor):
          # Decide which formats to download
          req_format = self._downloader.params.get('format', None)
  
+        try:
+            mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
+            info = json.loads(mobj.group(1))
+            args = info['args']
+            if args.get('ptk','') == 'vevo' or 'dashmpd':
+                # Vevo videos with encrypted signatures
+                self.to_screen(u'%s: Vevo video detected.' % video_id)
+                video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
+        except ValueError:
+            pass
+
          if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
              self.report_rtmp_download()
              video_url_list = [(None, video_info['conn'][0])]
@@ -732,8 +508,14 @@ class YoutubeIE(InfoExtractor):
              for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
                  url_data = compat_parse_qs(url_data_str)
                  if 'itag' in url_data and 'url' in url_data:
-                    url = url_data['url'][0] + '&signature=' + url_data['sig'][0]
-                    if not 'ratebypass' in url: url += '&ratebypass=yes'
+                    url = url_data['url'][0]
+                    if 'sig' in url_data:
+                        url += '&signature=' + url_data['sig'][0]
+                    elif 's' in url_data:
+                        signature = self._decrypt_signature(url_data['s'][0])
+                        url += '&signature=' + signature
+                    if 'ratebypass' not in url:
+                        url += '&ratebypass=yes'
                      url_map[url_data['itag'][0]] = url
  
              format_limit = self._downloader.params.get('format_limit', None)
@@ -940,16 +722,10 @@ class DailymotionIE(InfoExtractor):
          video_title = unescapeHTML(mobj.group('title'))
  
          video_uploader = None
-        mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
-        if mobj is None:
-            # lookin for official user
-            mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
-            if mobj_official is None:
-                self._downloader.report_warning(u'unable to extract uploader nickname')
-            else:
-                video_uploader = mobj_official.group(1)
-        else:
-            video_uploader = mobj.group(1)
+        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
+                                             # Looking for official user
+                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
+                                            webpage, 'video uploader')
  
          video_upload_date = None
          mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@@ -1099,6 +875,25 @@ class VimeoIE(InfoExtractor):
      _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
      IE_NAME = u'vimeo'
  
+    def _verify_video_password(self, url, video_id, webpage):
+        password = self._downloader.params.get('password', None)
+        if password is None:
+            raise ExtractorError(u'This video is protected by a password, use the --password option')
+        token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
+        data = compat_urllib_parse.urlencode({'password': password,
+                                              'token': token})
+        # I didn't manage to use the password with https
+        if url.startswith('https'):
+            pass_url = url.replace('https','http')
+        else:
+            pass_url = url
+        password_request = compat_urllib_request.Request(pass_url+'/password', data)
+        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        password_request.add_header('Cookie', 'xsrft=%s' % token)
+        pass_web = self._download_webpage(password_request, video_id,
+                                          u'Verifying the password',
+                                          u'Wrong password')
+
      def _real_extract(self, url, new_video=True):
          # Extract ID from URL
          mobj = re.match(self._VALID_URL, url)
@@ -1127,6 +922,10 @@ class VimeoIE(InfoExtractor):
          except:
              if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                  raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
+
+            if re.search('If so please provide the correct password.', webpage):
+                self._verify_video_password(url, video_id, webpage)
+                return self._real_extract(url)
              else:
                  raise ExtractorError(u'Unable to extract info section')
  
@@ -1412,6 +1211,13 @@ class GenericIE(InfoExtractor):
          if mobj is None:
              # Try to find twitter cards info
              mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
+        if mobj is None:
+            # We look for Open Graph info:
+            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+            m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+            if m_video_type is not None:
+                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
          if mobj is None:
              raise ExtractorError(u'Invalid URL: %s' % url)
  
@@ -1576,7 +1382,7 @@ class YoutubePlaylistIE(InfoExtractor):
                       |
                          ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
                       )"""
-    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json'
+    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
      _MAX_RESULTS = 50
      IE_NAME = u'youtube:playlist'
  
@@ -1612,9 +1418,10 @@ class YoutubePlaylistIE(InfoExtractor):
                  # Number of videos is a multiple of self._MAX_RESULTS
                  break
  
-            videos += [ (entry['yt$position']['$t'], entry['content']['src'])
-                        for entry in response['feed']['entry']
-                        if 'content' in entry ]
+            for entry in response['feed']['entry']:
+                index = entry['yt$position']['$t']
+                if 'media$group' in entry and 'media$player' in entry['media$group']:
+                    videos.append((index, entry['media$group']['media$player']['url']))
  
              if len(response['feed']['entry']) < self._MAX_RESULTS:
                  break
@@ -3365,6 +3172,8 @@ class SteamIE(InfoExtractor):
                  (?P<gameID>\d+)/?
                  (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
                  """
+    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
+    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
  
      @classmethod
      def suitable(cls, url):
@@ -3374,11 +3183,19 @@ class SteamIE(InfoExtractor):
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url, re.VERBOSE)
          gameID = m.group('gameID')
-        videourl = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' % gameID
-        self.report_age_confirmation()
+
+        videourl = self._VIDEO_PAGE_TEMPLATE % gameID
          webpage = self._download_webpage(videourl, gameID)
-        game_title = re.search(r'<h2 class="pageheader">(?P<game_title>.*?)</h2>', webpage).group('game_title')
-        
+
+        if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
+            videourl = self._AGECHECK_TEMPLATE % gameID
+            self.report_age_confirmation()
+            webpage = self._download_webpage(videourl, gameID)
+
+        self.report_extraction(gameID)
+        game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
+                                             webpage, 'game title')
+
          urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
          mweb = re.finditer(urlRE, webpage)
          namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
@@ -3484,8 +3301,8 @@ class RBMARadioIE(InfoExtractor):
  
          webpage = self._download_webpage(url, video_id)
  
-        json_data = self._search_regex(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>',
-            webpage, u'json data')
+        json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
+            webpage, u'json data', flags=re.MULTILINE)
  
          try:
              data = json.loads(json_data)
@@ -3788,10 +3605,6 @@ class TEDIE(InfoExtractor):
              self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
              return [self._playlist_videos_info(url,name,playlist_id)]
  
-    def _talk_video_link(self,mediaSlug):
-        '''Returns the video link for that mediaSlug'''
-        return 'http://download.ted.com/talks/%s.mp4' % mediaSlug
-
      def _playlist_videos_info(self,url,name,playlist_id=0):
          '''Returns the videos of the playlist'''
          video_RE=r'''
@@ -3804,9 +3617,8 @@ class TEDIE(InfoExtractor):
          m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
          m_names=re.finditer(video_name_RE,webpage)
  
-        playlist_RE = r'div class="headline">(\s*?)<h1>(\s*?)<span>(?P<playlist_title>.*?)</span>'
-        m_playlist = re.search(playlist_RE, webpage)
-        playlist_title = m_playlist.group('playlist_title')
+        playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
+                                                 webpage, 'playlist title')
  
          playlist_entries = []
          for m_video, m_name in zip(m_videos,m_names):
@@ -3817,27 +3629,28 @@ class TEDIE(InfoExtractor):
  
      def _talk_info(self, url, video_id=0):
          """Return the video for the talk in the url"""
-        m=re.match(self._VALID_URL, url,re.VERBOSE)
-        videoName=m.group('name')
-        webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
+        m = re.match(self._VALID_URL, url,re.VERBOSE)
+        video_name = m.group('name')
+        webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
+        self.report_extraction(video_name)
          # If the url includes the language we get the title translated
-        title_RE=r'<span id="altHeadline" >(?P<title>.*)</span>'
-        title=re.search(title_RE, webpage).group('title')
-        info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
-                        "id":(?P<videoID>[\d]+).*?
-                        "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
-        thumb_RE=r'</span>[\s.]*</div>[\s.]*<img src="(?P<thumbnail>.*?)"'
-        thumb_match=re.search(thumb_RE,webpage)
-        info_match=re.search(info_RE,webpage,re.VERBOSE)
-        video_id=info_match.group('videoID')
-        mediaSlug=info_match.group('mediaSlug')
-        video_url=self._talk_video_link(mediaSlug)
+        title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
+                                        webpage, 'title')
+        json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
+                                    webpage, 'json data')
+        info = json.loads(json_data)
+        desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
+                                       webpage, 'description', flags = re.DOTALL)
+        
+        thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
+                                       webpage, 'thumbnail')
          info = {
-                'id': video_id,
-                'url': video_url,
+                'id': info['id'],
+                'url': info['htmlStreams'][-1]['file'],
                  'ext': 'mp4',
                  'title': title,
-                'thumbnail': thumb_match.group('thumbnail')
+                'thumbnail': thumbnail,
+                'description': desc,
                  }
          return info
  
@@ -4465,11 +4278,12 @@ class Vbox7IE(InfoExtractor):
          video_id = mobj.group(1)
  
          redirect_page, urlh = self._download_webpage_handle(url, video_id)
-        redirect_url = urlh.geturl() + re.search(r'window\.location = \'(.*)\';', redirect_page).group(1)
+        new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
+        redirect_url = urlh.geturl() + new_location
          webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
  
-        title = re.search(r'<title>(.*)</title>', webpage)
-        title = (title.group(1)).split('/')[0].strip()
+        title = self._html_search_regex(r'<title>(.*)</title>',
+            webpage, u'title').split('/')[0].strip()
  
          ext = "flv"
          info_url = "http://vbox7.com/play/magare.do"
@@ -4503,10 +4317,7 @@ class GametrailersIE(InfoExtractor):
              mgid_re = r'data-video="(?P<mgid>mgid:.*?)"'
          else:
              mgid_re = r'data-contentId=\'(?P<mgid>mgid:.*?)\''
-        m_mgid = re.search(mgid_re, webpage)
-        if m_mgid is None:
-            raise ExtractorError(u'Unable to extract mgid')
-        mgid = m_mgid.group(1)
+        mgid = self._search_regex(mgid_re, webpage, u'mgid')
          data = compat_urllib_parse.urlencode({'uri': mgid, 'acceptMethods': 'fms'})
  
          info_page = self._download_webpage('http://www.gametrailers.com/feeds/mrss?' + data,
@@ -4528,11 +4339,11 @@ class GametrailersIE(InfoExtractor):
          video_description = m_info.group('description')
          video_thumb = m_info.group('thumb')
  
-        m_urls = re.finditer(r'<src>(?P<url>.*)</src>', links_webpage)
-        if m_urls is None:
+        m_urls = list(re.finditer(r'<src>(?P<url>.*)</src>', links_webpage))
+        if m_urls is None or len(m_urls) == 0:
              raise ExtractError(u'Unable to extrat video url')
          # They are sorted from worst to best quality
-        video_url = list(m_urls)[-1].group('url')
+        video_url = m_urls[-1].group('url')
  
          return {'url':         video_url,
                  'id':          video_id,
@@ -4543,6 +4354,37 @@ class GametrailersIE(InfoExtractor):
                  'description': video_description,
                  }
  
+class StatigramIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group(1)
+        webpage = self._download_webpage(url, video_id)
+        video_url = self._html_search_regex(
+            r'<meta property="og:video:secure_url" content="(.+?)">',
+            webpage, u'video URL')
+        thumbnail_url = self._html_search_regex(
+            r'<meta property="og:image" content="(.+?)" />',
+            webpage, u'thumbnail URL', fatal=False)
+        html_title = self._html_search_regex(
+            r'<title>(.+?)</title>',
+            webpage, u'title')
+        title = html_title.rpartition(u' | Statigram')[0]
+        uploader_id = self._html_search_regex(
+            r'@([^ ]+)', title, u'uploader name', fatal=False)
+        ext = 'mp4'
+
+        return [{
+            'id':        video_id,
+            'url':       video_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+            'uploader_id' : uploader_id
+        }]
+
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
      The order does matter; the first extractor matched is the one handling the URL.
@@ -4609,6 +4451,7 @@ def gen_extractors():
          HypemIE(),
          Vbox7IE(),
          GametrailersIE(),
+        StatigramIE(),
          GenericIE()
      ]