X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftagesschau.py;h=c351b754594a08be2f585f901c3a71ac425bcfd7;hb=HEAD;hp=f6102c22431460c2ab6df554b17f4a57ce1c3974;hpb=651ad35ce0f0ee9d04db085c50c29441b47bc825;p=youtube-dl diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index f6102c224..c351b7545 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re @@ -23,7 +23,7 @@ class TagesschauPlayerIE(InfoExtractor): 'id': '179517', 'ext': 'mp4', 'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', 'formats': 'mincount:6', }, }, { @@ -33,7 +33,7 @@ class TagesschauPlayerIE(InfoExtractor): 'id': '29417', 'ext': 'mp3', 'title': 'Trabi - Bye, bye Rennpappe', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', 'formats': 'mincount:2', }, }, { @@ -125,54 +125,54 @@ class TagesschauPlayerIE(InfoExtractor): class TagesschauIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P[^/]+/(?:[^/]+/)*?[^/#?]+?(?P-?[0-9]+)?)(?:~_?[^/#?]+?)?\.html' + _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P[^/]+/(?:[^/]+/)*?(?P[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html' _TESTS = [{ 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html', 'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6', 'info_dict': { - 'id': '102143', + 'id': 'video-102143', 'ext': 'mp4', 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt', 'description': '18.07.2015 20:10 Uhr', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', }, }, { 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', 'md5': '3c54c1f6243d279b706bde660ceec633', 'info_dict': { - 'id': '5727', + 'id': 'ts-5727', 'ext': 'mp4', 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', 'description': 'md5:695c01bfd98b7e313c501386327aea59', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', }, }, { # exclusive audio 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html', 'md5': '76e6eec6ebd40740671cf0a2c88617e5', 'info_dict': { - 'id': '29417', + 'id': 'audio-29417', 'ext': 'mp3', 'title': 'Trabi - Bye, bye Rennpappe', 'description': 'md5:8687dda862cbbe2cfb2df09b56341317', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', }, }, { # audio in article 'url': 'http://www.tagesschau.de/inland/bnd-303.html', 'md5': 'e0916c623e85fc1d2b26b78f299d3958', 'info_dict': { - 'id': '303', + 'id': 'bnd-303', 'ext': 'mp3', 'title': 'Viele Baustellen für neuen BND-Chef', 'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4', - 'thumbnail': 're:^https?:.*\.jpg$', + 'thumbnail': r're:^https?:.*\.jpg$', }, }, { 'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html', 'info_dict': { - 'id': '135', + 'id': 'afd-parteitag-135', 'title': 'Möchtegern-Underdog mit Machtanspruch', }, 'playlist_count': 2, @@ -200,6 +200,10 @@ class TagesschauIE(InfoExtractor): }, { 'url': 'http://www.tagesschau.de/100sekunden/index.html', 'only_matching': True, + }, { + # playlist article with collapsing sections + 'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html', + 'only_matching': True, }] @classmethod @@ -275,7 +279,7 @@ class TagesschauIE(InfoExtractor): if webpage_type == 'website': # Article entries = [] for num, (entry_title, media_kind, download_text) in enumerate(re.findall( - r'(?s)]+class="infotext"[^>]*>.*?(.+?).*?

.*?%s' % DOWNLOAD_REGEX, + r'(?s)]+class="infotext"[^>]*>\s*(?:]+>)?\s*(.+?).*?

.*?%s' % DOWNLOAD_REGEX, webpage), 1): entries.append({ 'id': '%s-%d' % (display_id, num),