_ Git - youtube-dl/blob - youtube_dl/extractor/europa.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     compat_urlparse,
   7     xpath_text
   8 )
   9
  10
  11 class EuropaIE(InfoExtractor):
  12     _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?(?:[^&]|&(?!ref))*ref=(?P<id>[A-Za-z0-9]+)'
  13     _TEST = {
  14         'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
  15         'md5': '728cca2fd41d5aa7350cec1141fbe620',
  16         'info_dict': {
  17             'id': 'I107758',
  18             'ext': 'mp4',
  19             'title': 'TRADE - Wikileaks on TTIP',
  20             'description': 'NEW  LIVE EC Midday press briefing of 11/08/2015',
  21             'thumbnail': 're:^http://defiris\.ec\.streamcloud\.be/findmedia/18/107758/THUMB_[0-9A-Z]+\.jpg$'
  22         }
  23     }
  24
  25     def _real_extract(self, url):
  26         video_id = self._match_id(url)
  27         query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  28         lang = query.get('sitelang', ['en'])[0]
  29
  30         playlist = self._download_xml('http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=' + video_id, video_id)
  31         videos = {}
  32         formats = []
  33
  34         for item in playlist.findall('info/title/item'):
  35             videos[xpath_text(item, 'lg')] = {'title': xpath_text(item, 'label').strip()}
  36
  37         for item in playlist.findall('info/description/item'):
  38             videos[xpath_text(item, 'lg')]['description'] = xpath_text(item, 'label').strip()
  39
  40         for item in playlist.findall('files/file'):
  41             lg = xpath_text(item, 'lg')
  42             vid = videos[lg]
  43             vid['format_note'] = xpath_text(item, 'lglabel')
  44             vid['url'] = xpath_text(item, 'url')
  45
  46             if lg == lang:
  47                 vid['language_preference'] = 10
  48
  49             formats.append(vid)
  50
  51         formats.reverse()
  52         def_video = videos.get(lang, videos['int'])
  53
  54         return {
  55             'id': video_id,
  56             'title': def_video['title'],
  57             'description': def_video['description'],
  58             'thumbnail': xpath_text(playlist, 'info/thumburl', 'thumburl'),
  59             'formats': formats
  60         }