X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Flecturio.py;h=24f78d928449b999f09acec8337297fa62da8f55;hb=77d95677b7ab4a9840ef142b14627b07a9a31120;hp=62ff28e028b2443fcbfe74da821daa33481844d9;hpb=dfe0a3a9d2e07ac1e5ad221912d03b999ebb4d75;p=youtube-dl diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py index 62ff28e02..24f78d928 100644 --- a/youtube_dl/extractor/lecturio.py +++ b/youtube_dl/extractor/lecturio.py @@ -64,8 +64,14 @@ class LecturioBaseIE(InfoExtractor): class LecturioIE(LecturioBaseIE): - _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P[^/?#&]+)\.lecture' - _TEST = { + _VALID_URL = r'''(?x) + https:// + (?: + app\.lecturio\.com/[^/]+/(?P[^/?#&]+)\.lecture| + (?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.vortrag + ) + ''' + _TESTS = [{ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos', 'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870', 'info_dict': { @@ -74,7 +80,10 @@ class LecturioIE(LecturioBaseIE): 'title': 'Important Concepts and Terms – Introduction to Microbiology', }, 'skip': 'Requires lecturio account credentials', - } + }, { + 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', + 'only_matching': True, + }] _CC_LANGS = { 'German': 'de', @@ -86,7 +95,8 @@ class LecturioIE(LecturioBaseIE): } def _real_extract(self, url): - display_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') or mobj.group('id_de') webpage = self._download_webpage( 'https://app.lecturio.com/en/lecture/%s/player.html' % display_id, @@ -136,9 +146,15 @@ class LecturioIE(LecturioBaseIE): cc_url = url_or_none(cc_url) if not cc_url: continue - sub_dict = automatic_captions if 'auto-translated' in cc_label else subtitles lang = self._search_regex( - r'/([a-z]{2})_', cc_url, 'lang', default=cc_label.split()[0]) + r'/([a-z]{2})_', cc_url, 'lang', + default=cc_label.split()[0] if cc_label else 'en') + original_lang = self._search_regex( + r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang', + default=None) + sub_dict = (automatic_captions + if 'auto-translated' in cc_label or original_lang + else subtitles) sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({ 'url': cc_url, }) @@ -184,3 +200,30 @@ class LecturioCourseIE(LecturioBaseIE): 'title', default=None) return self.playlist_result(entries, display_id, title) + + +class LecturioDeCourseIE(LecturioBaseIE): + _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.kurs' + _TEST = { + 'url': 'https://www.lecturio.de/jura/grundrechte.kurs', + 'only_matching': True, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer( + r'(?s)]+\bdata-lecture-id=["\'](?P\d+).+?\bhref=(["\'])(?P(?:(?!\2).)+\.vortrag)\b[^>]+>', + webpage): + lecture_url = urljoin(url, mobj.group('url')) + lecture_id = mobj.group('id') + entries.append(self.url_result( + lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) + + title = self._search_regex( + r']*>([^<]+)', webpage, 'title', default=None) + + return self.playlist_result(entries, display_id, title)