_ Git - youtube-dl/blob - youtube_dl/extractor/anitube.py

   1 import re
   2 import xml.etree.ElementTree
   3
   4 from .common import InfoExtractor
   5
   6
   7 class AnitubeIE(InfoExtractor):
   8     IE_NAME = u'anitube.se'
   9     _VALID_URL = r'http?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
  10
  11     _TEST = {
  12         u'url': u'http://www.anitube.se/video/36621',
  13         u'md5': u'0c4e4f1051bf50f5982f829f7230f539',
  14         u'info_dict': {
  15             u'id': u'36621',
  16             u'ext': u'mp4',
  17             u'title': u'Recorder to Randoseru 01',
  18         },
  19     }
  20
  21     def _real_extract(self, url):
  22         mobj = re.match(self._VALID_URL, url)
  23         video_id = mobj.group('id')
  24
  25         webpage = self._download_webpage(url, video_id)
  26
  27         key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
  28                                       webpage, u'key')
  29
  30         webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
  31                                                 key)
  32
  33         config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
  34
  35         video_title = config_xml.find('title').text
  36
  37
  38         formats = []
  39
  40         video_url = config_xml.find('file')
  41         if video_url is not None:
  42             formats.append({
  43                 'format_id': 'sd',
  44                 'url': video_url.text,
  45             })
  46
  47         video_url = config_xml.find('filehd')
  48         if video_url is not None:
  49             formats.append({
  50                 'format_id': 'hd',
  51                 'url': video_url.text,
  52             })
  53
  54         return {
  55             'id': video_id,
  56             'title': video_title,
  57             'ext': 'mp4',
  58             'formats': formats
  59         }