_ Git - youtube-dl/blob - youtube_dl/extractor/la7.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     determine_ext,
   7     js_to_json,
   8 )
   9
  10
  11 class LA7IE(InfoExtractor):
  12     IE_NAME = 'la7.it'
  13     _VALID_URL = r'''(?x)(https?://)?(?:
  14         (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
  15         tg\.la7\.it/repliche-tgla7\?id=
  16     )(?P<id>.+)'''
  17
  18     _TESTS = [{
  19         # 'src' is a plain URL
  20         'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
  21         'md5': '6054674766e7988d3e02f2148ff92180',
  22         'info_dict': {
  23             'id': 'inccool8-02-10-2015-163722',
  24             'ext': 'mp4',
  25             'title': 'Inc.Cool8',
  26             'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto  atletico',
  27             'thumbnail': 're:^https?://.*',
  28         },
  29     }, {
  30         # 'src' is a dictionary
  31         'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
  32         'md5': '6b0d8888d286e39870208dfeceaf456b',
  33         'info_dict': {
  34             'id': '189080',
  35             'ext': 'mp4',
  36             'title': 'TG LA7',
  37         },
  38     }, {
  39         'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
  40         'only_matching': True,
  41     }]
  42
  43     def _real_extract(self, url):
  44         video_id = self._match_id(url)
  45
  46         webpage = self._download_webpage(url, video_id)
  47
  48         player_data = self._parse_json(
  49             self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
  50             video_id, transform_source=js_to_json)
  51
  52         source = player_data['src']
  53         source_urls = source.values() if isinstance(source, dict) else [source]
  54
  55         formats = []
  56         for source_url in source_urls:
  57             ext = determine_ext(source_url)
  58             if ext == 'm3u8':
  59                 formats.extend(self._extract_m3u8_formats(
  60                     source_url, video_id, ext='mp4',
  61                     entry_protocol='m3u8_native', m3u8_id='hls'))
  62             else:
  63                 formats.append({
  64                     'url': source_url,
  65                 })
  66         self._sort_formats(formats)
  67
  68         return {
  69             'id': video_id,
  70             'title': player_data['title'],
  71             'description': self._og_search_description(webpage, default=None),
  72             'thumbnail': player_data.get('poster'),
  73             'formats': formats,
  74         }