[rdsca] New extractor
[youtube-dl] / youtube_dl / extractor / rdsca.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     parse_iso8601,
7     url_basename,
8 )
9
10
11 class RDScaIE(InfoExtractor):
12     IE_NAME = 'RDS.ca'
13     _VALID_URL = r'http://(?:www\.)?rds\.ca/videos/(?P<id>.*)'
14
15     _TESTS = [{
16         'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
17         'info_dict': {
18             "ext": "mp4",
19             "title": "Fowler Jr. prend la direction de Jacksonville",
20             "description": "Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ",
21             "timestamp": 1430397346,
22         }
23     }]
24
25     def _real_extract(self, url):
26         video_id = url_basename(url)
27
28         webpage = self._download_webpage(url, video_id)
29
30         title = self._search_regex(
31             r'<span itemprop="name"[^>]*>([^\n]*)</span>', webpage, 'video title', default=None)
32         video_url = self._search_regex(
33             r'<span itemprop="contentURL" content="([^"]+)"', webpage, 'video URL')
34         upload_date = parse_iso8601(self._search_regex(
35             r'<span itemprop="uploadDate" content="([^"]+)"', webpage, 'upload date', default=None))
36         description = self._search_regex(
37             r'<span itemprop="description"[^>]*>([^\n]*)</span>', webpage, 'description', default=None)
38         thumbnail = self._search_regex(
39             r'<span itemprop="thumbnailUrl" content="([^"]+)"', webpage, 'upload date', default=None)
40
41         return {
42             'id': video_id,
43             'title': title,
44             'description': description,
45             'thumbnail': thumbnail,
46             'timestamp': upload_date,
47             'formats': [{
48                 'url': video_url,
49             }],
50         }