[kinopoisk] Add extractor (closes #17283)
authorSergey M․ <dstftw@gmail.com>
Tue, 21 Aug 2018 19:19:30 +0000 (02:19 +0700)
committerSergey M․ <dstftw@gmail.com>
Tue, 21 Aug 2018 19:19:30 +0000 (02:19 +0700)
youtube_dl/extractor/extractors.py
youtube_dl/extractor/kinopoisk.py [new file with mode: 0644]

index d8e86c277e00f42ce501581b27a7e1a87793d69c..9fc1cfa707f1aa39694b8bfc08310419a92dbd4c 100644 (file)
@@ -520,6 +520,7 @@ from .keezmovies import KeezMoviesIE
 from .ketnet import KetnetIE
 from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
+from .kinopoisk import KinoPoiskIE
 from .keek import KeekIE
 from .konserthusetplay import KonserthusetPlayIE
 from .kontrtube import KontrTubeIE
diff --git a/youtube_dl/extractor/kinopoisk.py b/youtube_dl/extractor/kinopoisk.py
new file mode 100644 (file)
index 0000000..9e8d01f
--- /dev/null
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    dict_get,
+    int_or_none,
+)
+
+
+class KinoPoiskIE(InfoExtractor):
+    _GEO_COUNTRIES = ['RU']
+    _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.kinopoisk.ru/film/81041/watch/',
+        'md5': '4f71c80baea10dfa54a837a46111d326',
+        'info_dict': {
+            'id': '81041',
+            'ext': 'mp4',
+            'title': 'Алеша попович и тугарин змей',
+            'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
+            'thumbnail': r're:^https?://.*',
+            'duration': 4533,
+            'age_limit': 12,
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }, {
+        'url': 'https://www.kinopoisk.ru/film/81041',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
+            query={'kpId': video_id})
+
+        data = self._parse_json(
+            self._search_regex(
+                r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
+                webpage, 'data'),
+            video_id)['models']
+
+        film = data['filmStatus']
+        title = film.get('title') or film['originalTitle']
+
+        formats = self._extract_m3u8_formats(
+            data['playlistEntity']['uri'], video_id, 'mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        description = dict_get(
+            film, ('descriptscription', 'description',
+                   'shortDescriptscription', 'shortDescription'))
+        thumbnail = film.get('coverUrl') or film.get('posterUrl')
+        duration = int_or_none(film.get('duration'))
+        age_limit = int_or_none(film.get('restrictionAge'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'age_limit': age_limit,
+            'formats': formats,
+        }