[la7.it] Fix the extractor
[youtube-dl] / youtube_dl / extractor / la7.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     determine_ext,
7     js_to_json,
8 )
9
10
11 class LA7IE(InfoExtractor):
12     IE_NAME = 'la7.it'
13     _VALID_URL = r'''(?x)(https?://)?(?:
14         (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
15         tg\.la7\.it/repliche-tgla7\?id=
16     )(?P<id>.+)'''
17
18     _TESTS = [{
19         # 'src' is a plain URL
20         'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
21         'md5': '6054674766e7988d3e02f2148ff92180',
22         'info_dict': {
23             'id': 'inccool8-02-10-2015-163722',
24             'ext': 'mp4',
25             'title': 'Inc.Cool8',
26             'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto  atletico',
27             'thumbnail': 're:^https?://.*',
28         },
29     }, {
30         # 'src' is a dictionary
31         'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
32         'md5': '6b0d8888d286e39870208dfeceaf456b',
33         'info_dict': {
34             'id': '189080',
35             'ext': 'mp4',
36             'title': 'TG LA7',
37         },
38     }, {
39         'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
40         'only_matching': True,
41     }]
42
43     def _real_extract(self, url):
44         video_id = self._match_id(url)
45
46         webpage = self._download_webpage(url, video_id)
47
48         player_data = self._parse_json(
49             self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
50             video_id, transform_source=js_to_json)
51
52         source = player_data['src']
53         source_urls = source.values() if isinstance(source, dict) else [source]
54
55         formats = []
56         for source_url in source_urls:
57             ext = determine_ext(source_url)
58             if ext == 'm3u8':
59                 formats.extend(self._extract_m3u8_formats(
60                     source_url, video_id, ext='mp4',
61                     entry_protocol='m3u8_native', m3u8_id='hls'))
62             else:
63                 formats.append({
64                     'url': source_url,
65                 })
66         self._sort_formats(formats)
67
68         return {
69             'id': video_id,
70             'title': player_data['title'],
71             'description': self._og_search_description(webpage, default=None),
72             'thumbnail': player_data.get('poster'),
73             'formats': formats,
74         }