-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
from ..utils import (
determine_ext,
ExtractorError,
class LifeNewsIE(InfoExtractor):
- IE_NAME = 'lifenews'
- IE_DESC = 'LIFE | NEWS'
- _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
+ IE_NAME = 'life'
+ IE_DESC = 'Life.ru'
+ _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
_TESTS = [{
# single video embedded via video/source
- 'url': 'http://lifenews.ru/news/98736',
+ 'url': 'https://life.ru/t/новости/98736',
'md5': '77c95eaefaca216e32a76a343ad89d23',
'info_dict': {
'id': '98736',
}
}, {
# single video embedded via iframe
- 'url': 'http://lifenews.ru/news/152125',
+ 'url': 'https://life.ru/t/новости/152125',
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
'info_dict': {
'id': '152125',
}
}, {
# two videos embedded via iframe
- 'url': 'http://lifenews.ru/news/153461',
+ 'url': 'https://life.ru/t/новости/153461',
'info_dict': {
'id': '153461',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
},
}],
}, {
- 'url': 'http://lifenews.ru/video/13035',
+ 'url': 'https://life.ru/t/новости/213035',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- section = mobj.group('section')
+ video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'http://lifenews.ru/%s/%s' % (section, video_id),
- video_id, 'Downloading page')
+ webpage = self._download_webpage(url, video_id)
video_urls = re.findall(
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
iframe_links = re.findall(
- r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']',
+ r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']',
webpage)
if not video_urls and not iframe_links:
title = remove_end(
self._og_search_title(webpage),
- ' - Первый по срочным новостям — LIFE | NEWS')
+ ' - Life.ru')
description = self._og_search_description(webpage)
class LifeEmbedIE(InfoExtractor):
IE_NAME = 'life:embed'
- _VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
+ _VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P<id>[\da-f]{32})'
- _TEST = {
+ _TESTS = [{
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
'md5': 'b889715c9e49cb1981281d0e5458fbbe',
'info_dict': {
'id': 'e50c2dec2867350528e2574c899b8291',
'ext': 'mp4',
'title': 'e50c2dec2867350528e2574c899b8291',
- 'thumbnail': 're:http://.*\.jpg',
+ 'thumbnail': r're:http://.*\.jpg',
}
- }
+ }, {
+ # with 1080p
+ 'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ thumbnail = None
formats = []
- for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
- video_url = compat_urlparse.urljoin(url, video_url)
- ext = determine_ext(video_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id='m3u8'))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': ext,
- 'preference': 1,
- })
+
+ def extract_m3u8(manifest_url):
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='m3u8'))
+
+ def extract_original(original_url):
+ formats.append({
+ 'url': original_url,
+ 'format_id': determine_ext(original_url, None),
+ 'preference': 1,
+ })
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'options\s*=\s*({.+?});', webpage, 'options', default='{}'),
+ video_id).get('playlist', {})
+ if playlist:
+ master = playlist.get('master')
+ if isinstance(master, compat_str) and determine_ext(master) == 'm3u8':
+ extract_m3u8(compat_urlparse.urljoin(url, master))
+ original = playlist.get('original')
+ if isinstance(original, compat_str):
+ extract_original(original)
+ thumbnail = playlist.get('image')
+
+ # Old rendition fallback
+ if not formats:
+ for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
+ video_url = compat_urlparse.urljoin(url, video_url)
+ if determine_ext(video_url) == 'm3u8':
+ extract_m3u8(video_url)
+ else:
+ extract_original(video_url)
+
self._sort_formats(formats)
- thumbnail = self._search_regex(
+ thumbnail = thumbnail or self._search_regex(
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
return {