_ Git - youtube-dl/blob - youtube_dl/extractor/gogoanime.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     ExtractorError,
   8     compat_urllib_parse,
   9     get_element_by_attribute,
  10     unescapeHTML
  11 )
  12
  13
  14 class GoGoAnimeIE(InfoExtractor):
  15     IE_NAME = 'gogoanime'
  16     IE_DESC = 'GoGoAnime'
  17     _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
  18
  19     _TEST = {
  20         'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
  21         'info_dict': {
  22             'id': 'mahou-shoujo-madoka-magica-movie-1'
  23         },
  24         'playlist_count': 3
  25     }
  26
  27     def _real_extract(self, url):
  28         video_id = self._match_id(url)
  29         page = self._download_webpage(url, video_id)
  30
  31         if 'Oops! Page Not Found</font>' in page:
  32             raise ExtractorError('Video does not exist', expected=True)
  33
  34         content = get_element_by_attribute("class", "postcontent", page)
  35         vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
  36         vids = [
  37             unescapeHTML(compat_urllib_parse.unquote(x))
  38             for x in vids if not re.search(r".*videofun.*", x)]
  39
  40         if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
  41             return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
  42
  43         title = self._html_search_regex(
  44             r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
  45
  46         return {
  47             '_type': 'url',
  48             'id': video_id,
  49             'url': vids[0],
  50             'title': title,
  51         }
  52
  53
  54 class GoGoAnimeSearchIE(InfoExtractor):
  55     IE_NAME = 'gogoanime:search'
  56     IE_DESC = 'GoGoAnime Search'
  57
  58     _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
  59     _TEST = {
  60         'url': 'http://www.gogoanime.com/?s=bokusatsu',
  61         'info_dict': {
  62             'id': 'bokusatsu'
  63         },
  64         'playlist_count': 6
  65     }
  66
  67     def _real_extract(self, url):
  68         playlist_id = self._match_id(url)
  69         webpage = self._download_webpage(url, playlist_id)
  70
  71         posts = re.findall(
  72             r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
  73             webpage)
  74
  75         return self.playlist_result(
  76             [self.url_result(p) for p in posts], playlist_id)