X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcamdemy.py;h=1bc602c31e8407c4e0931203e92b416cc802d267;hb=7e6011101f00e4f7216467e80091cc89c2846f01;hp=9a6341f0f6e3595ffc4764870da0b0dfcc91a583;hpb=8367d3f3cbad7ddae5116c3cd5d39ebfed9711c5;p=youtube-dl diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index 9a6341f0f..1bc602c31 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -4,12 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import (compat_urllib_parse, compat_urlparse) from ..utils import parse_iso8601 class CamdemyIE(InfoExtractor): - _VALID_URL = r'http://www.camdemy.com/media/(?P\d+).*' + _VALID_URL = r'http://www.camdemy.com/media/(?P\d+)' _TESTS = [{ # single file 'url': 'http://www.camdemy.com/media/5181/', @@ -76,13 +76,13 @@ class CamdemyIE(InfoExtractor): fileName = fileListXML.find('./video/item/fileName').text creation_time = self._html_search_regex( - r"
Posted :
.*
([0-9:\- ]+)<", - page, 'creation time', flags=re.MULTILINE | re.DOTALL) + '+08:00' + r"
Posted :
[\r\n ]*
([^<>]+)<", + page, 'creation time', flags=re.MULTILINE) + '+08:00' creation_timestamp = parse_iso8601(creation_time, delimiter=' ') view_count_str = self._html_search_regex( - r"
Views :
.*
([0-9,]+)<", - page, 'view count', flags=re.MULTILINE | re.DOTALL) + r"
Views :
[\r\n ]*
([^<>]+)<", + page, 'view count', flags=re.MULTILINE) views = int(view_count_str.replace(',', '')) return { @@ -96,3 +96,53 @@ class CamdemyIE(InfoExtractor): 'timestamp': creation_timestamp, 'view_count': views, } + + +class CamdemyFolderIE(InfoExtractor): + _VALID_URL = r'http://www.camdemy.com/folder/(?P\d+)' + _TESTS = [{ + # links with trailing slash + 'url': 'http://www.camdemy.com/folder/450', + 'info_dict': { + 'id': '450', + 'title': '信號與系統 2012 & 2011 (Signals and Systems)', + }, + 'playlist_mincount': 145 + }, { + # links without trailing slash + # and multi-page + 'url': 'http://www.camdemy.com/folder/853', + 'info_dict': { + 'id': '853', + 'title': '科學計算 - 使用 Matlab' + }, + 'playlist_mincount': 20 + }, { + # with displayMode parameter. For testing the codes to add parameters + 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', + 'info_dict': { + 'id': '853', + 'title': '科學計算 - 使用 Matlab' + }, + 'playlist_mincount': 20 + }] + + def _real_extract(self, url): + folder_id = self._match_id(url) + + # Add displayMode=list so that all links are displayed in a single page + parsed_url = list(compat_urlparse.urlparse(url)) + query = dict(compat_urlparse.parse_qsl(parsed_url[4])) + query.update({'displayMode': 'list'}) + parsed_url[4] = compat_urllib_parse.urlencode(query) + final_url = compat_urlparse.urlunparse(parsed_url) + + page = self._download_webpage(final_url, folder_id) + matches = re.findall(r"href='(/media/\d+/?)'", page) + + entries = [self.url_result('http://www.camdemy.com' + media_path) + for media_path in matches] + + folder_title = self._html_search_meta('keywords', page) + + return self.playlist_result(entries, folder_id, folder_title)