Merge branch 'hakatashi-niconico-channel-video'
[youtube-dl] / youtube_dl / extractor / niconico.py
1 # encoding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8     compat_urllib_parse,
9     compat_urllib_request,
10     compat_urlparse,
11     ExtractorError,
12     unified_strdate,
13     parse_duration,
14     int_or_none,
15 )
16
17
18 class NiconicoIE(InfoExtractor):
19     IE_NAME = 'niconico'
20     IE_DESC = 'ニコニコ動画'
21
22     _TEST = {
23         'url': 'http://www.nicovideo.jp/watch/sm22312215',
24         'md5': 'd1a75c0823e2f629128c43e1212760f9',
25         'info_dict': {
26             'id': 'sm22312215',
27             'ext': 'mp4',
28             'title': 'Big Buck Bunny',
29             'uploader': 'takuya0301',
30             'uploader_id': '2698420',
31             'upload_date': '20131123',
32             'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
33             'duration': 33,
34         },
35         'params': {
36             'username': 'ydl.niconico@gmail.com',
37             'password': 'youtube-dl',
38         },
39     }
40
41     _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
42     _NETRC_MACHINE = 'niconico'
43
44     def _real_initialize(self):
45         self._login()
46
47     def _login(self):
48         (username, password) = self._get_login_info()
49         if username is None:
50             # Login is required
51             raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
52
53         # Log in
54         login_form_strs = {
55             'mail': username,
56             'password': password,
57         }
58         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
59         # chokes on unicode
60         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
61         login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
62         request = compat_urllib_request.Request(
63             'https://secure.nicovideo.jp/secure/login', login_data)
64         login_results = self._download_webpage(
65             request, None, note='Logging in', errnote='Unable to log in')
66         if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
67             self._downloader.report_warning('unable to log in: bad username or password')
68             return False
69         return True
70
71     def _real_extract(self, url):
72         mobj = re.match(self._VALID_URL, url)
73         video_id = mobj.group(1)
74
75         # Get video webpage. We are not actually interested in it, but need
76         # the cookies in order to be able to download the info webpage
77         self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
78
79         video_info = self._download_xml(
80             'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
81             note='Downloading video info page')
82
83         # Get flv info
84         flv_info_webpage = self._download_webpage(
85             'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
86             video_id, 'Downloading flv info')
87         video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
88
89         # Start extracting information
90         title = video_info.find('.//title').text
91         extension = video_info.find('.//movie_type').text
92         video_format = extension.upper()
93         thumbnail = video_info.find('.//thumbnail_url').text
94         description = video_info.find('.//description').text
95         upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
96         view_count = int_or_none(video_info.find('.//view_counter').text)
97         comment_count = int_or_none(video_info.find('.//comment_num').text)
98         duration = parse_duration(video_info.find('.//length').text)
99         webpage_url = video_info.find('.//watch_url').text
100
101         if video_info.find('.//ch_id') is not None:
102             uploader_id = video_info.find('.//ch_id').text
103             uploader = video_info.find('.//ch_name').text
104         elif video_info.find('.//user_id') is not None:
105             uploader_id = video_info.find('.//user_id').text
106             uploader = video_info.find('.//user_nickname').text
107         else:
108             uploader_id = uploader = None
109
110         return {
111             'id': video_id,
112             'url': video_real_url,
113             'title': title,
114             'ext': extension,
115             'format': video_format,
116             'thumbnail': thumbnail,
117             'description': description,
118             'uploader': uploader,
119             'upload_date': upload_date,
120             'uploader_id': uploader_id,
121             'view_count': view_count,
122             'comment_count': comment_count,
123             'duration': duration,
124             'webpage_url': webpage_url,
125         }