git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/internazionale.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import unified_timestamp
   6
   7
   8 class InternazionaleIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
  10     _TESTS = [{
  11         'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
  12         'md5': '3e39d32b66882c1218e305acbf8348ca',
  13         'info_dict': {
  14             'id': '265968',
  15             'display_id': 'richard-linklater-racconta-una-scena-di-boyhood',
  16             'ext': 'mp4',
  17             'title': 'Richard Linklater racconta una scena di Boyhood',
  18             'description': 'md5:efb7e5bbfb1a54ae2ed5a4a015f0e665',
  19             'timestamp': 1424354635,
  20             'upload_date': '20150219',
  21             'thumbnail': r're:^https?://.*\.jpg$',
  22         },
  23         'params': {
  24             'format': 'bestvideo',
  25         },
  26     }, {
  27         'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi',
  28         'md5': '9db8663704cab73eb972d1cee0082c79',
  29         'info_dict': {
  30             'id': '761344',
  31             'display_id': 'telefono-stare-con-noi-stessi',
  32             'ext': 'mp4',
  33             'title': 'Usiamo il telefono per evitare di stare con noi stessi',
  34             'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44',
  35             'timestamp': 1535528954,
  36             'upload_date': '20180829',
  37             'thumbnail': r're:^https?://.*\.jpg$',
  38         },
  39         'params': {
  40             'format': 'bestvideo',
  41         },
  42     }]
  43
  44     def _real_extract(self, url):
  45         display_id = self._match_id(url)
  46
  47         webpage = self._download_webpage(url, display_id)
  48
  49         DATA_RE = r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1'
  50
  51         title = self._search_regex(
  52             DATA_RE % 'video-title', webpage, 'title', default=None,
  53             group='value') or self._og_search_title(webpage)
  54
  55         video_id = self._search_regex(
  56             DATA_RE % 'job-id', webpage, 'video id', group='value')
  57         video_path = self._search_regex(
  58             DATA_RE % 'video-path', webpage, 'video path', group='value')
  59         video_available_abroad = self._search_regex(
  60             DATA_RE % 'video-available_abroad', webpage,
  61             'video available aboard', default='1', group='value')
  62         video_available_abroad = video_available_abroad == '1'
  63
  64         video_base = 'https://video%s.internazionale.it/%s/%s.' % \
  65             ('' if video_available_abroad else '-ita', video_path, video_id)
  66
  67         formats = self._extract_m3u8_formats(
  68             video_base + 'm3u8', display_id, 'mp4',
  69             entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
  70         formats.extend(self._extract_mpd_formats(
  71             video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
  72         self._sort_formats(formats)
  73
  74         timestamp = unified_timestamp(self._html_search_meta(
  75             'article:published_time', webpage, 'timestamp'))
  76
  77         return {
  78             'id': video_id,
  79             'display_id': display_id,
  80             'title': title,
  81             'thumbnail': self._og_search_thumbnail(webpage),
  82             'description': self._og_search_description(webpage),
  83             'timestamp': timestamp,
  84             'formats': formats,
  85         }