X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fallocine.py;h=7d280d871d10ccc914b17ee98193716c386e7b18;hp=34f0cd49bafa104d3e3c175a4bd0ab6bf2494c1c;hb=dcdb292fddc82ae11f4c0b647815a45c88a6b6d5;hpb=64a8c39a1f67d9a189cc59f7d86c46b72dc139a9 diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 34f0cd49b..7d280d871 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -1,19 +1,20 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re import json from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - compat_str, qualities, - determine_ext, + unescapeHTML, + xpath_element, ) class AllocineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P[0-9]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' _TESTS = [{ 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', @@ -22,7 +23,7 @@ class AllocineIE(InfoExtractor): 'id': '19546517', 'ext': 'mp4', 'title': 'Astérix - Le Domaine des Dieux Teaser VF', - 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', + 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -32,7 +33,7 @@ class AllocineIE(InfoExtractor): 'id': '19540403', 'ext': 'mp4', 'title': 'Planes 2 Bande-annonce VF', - 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', + 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -42,9 +43,12 @@ class AllocineIE(InfoExtractor): 'id': '19544709', 'ext': 'mp4', 'title': 'Dragons 2 - Bande annonce finale VF', - 'description': 'md5:e74a4dc750894bac300ece46c7036490', + 'description': 'md5:601d15393ac40f249648ef000720e7e3', 'thumbnail': 're:http://.*\.jpg', }, + }, { + 'url': 'http://www.allocine.fr/video/video-19550147/', + 'only_matching': True, }] def _real_extract(self, url): @@ -57,14 +61,18 @@ class AllocineIE(InfoExtractor): if typ == 'film': video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') else: - player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') - - player_data = json.loads(player) - video_id = compat_str(player_data['refMedia']) + player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None) + if player: + player_data = json.loads(player) + video_id = compat_str(player_data['refMedia']) + else: + model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model') + model_data = self._parse_json(unescapeHTML(model), display_id) + video_id = compat_str(model_data['id']) xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) - video = xml.find('.//AcVisionVideo').attrib + video = xpath_element(xml, './/AcVisionVideo').attrib quality = qualities(['ld', 'md', 'hd']) formats = [] @@ -75,9 +83,7 @@ class AllocineIE(InfoExtractor): 'format_id': format_id, 'quality': quality(format_id), 'url': v, - 'ext': determine_ext(v), }) - self._sort_formats(formats) return {