[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / fc2.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import hashlib
5 import re
6
7 from .common import InfoExtractor
8 from ..compat import (
9     compat_parse_qs,
10     compat_urllib_request,
11     compat_urlparse,
12 )
13 from ..utils import (
14     ExtractorError,
15     sanitized_Request,
16     urlencode_postdata,
17 )
18
19
20 class FC2IE(InfoExtractor):
21     _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
22     IE_NAME = 'fc2'
23     _NETRC_MACHINE = 'fc2'
24     _TESTS = [{
25         'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
26         'md5': 'a6ebe8ebe0396518689d963774a54eb7',
27         'info_dict': {
28             'id': '20121103kUan1KHs',
29             'ext': 'flv',
30             'title': 'Boxing again with Puff',
31         },
32     }, {
33         'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
34         'info_dict': {
35             'id': '20150125cEva0hDn',
36             'ext': 'mp4',
37         },
38         'params': {
39             'username': 'ytdl@yt-dl.org',
40             'password': '(snip)',
41         },
42         'skip': 'requires actual password',
43     }, {
44         'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
45         'only_matching': True,
46     }]
47
48     def _login(self):
49         username, password = self._get_login_info()
50         if username is None or password is None:
51             return False
52
53         # Log in
54         login_form_strs = {
55             'email': username,
56             'password': password,
57             'done': 'video',
58             'Submit': ' Login ',
59         }
60
61         login_data = urlencode_postdata(login_form_strs)
62         request = sanitized_Request(
63             'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
64
65         login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
66         if 'mode=redirect&login=done' not in login_results:
67             self.report_warning('unable to log in: bad username or password')
68             return False
69
70         # this is also needed
71         login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
72         self._download_webpage(
73             login_redir, None, note='Login redirect', errnote='Login redirect failed')
74
75         return True
76
77     def _real_extract(self, url):
78         video_id = self._match_id(url)
79         self._login()
80         webpage = None
81         if not url.startswith('fc2:'):
82             webpage = self._download_webpage(url, video_id)
83             self._downloader.cookiejar.clear_session_cookies()  # must clear
84             self._login()
85
86         title = 'FC2 video %s' % video_id
87         thumbnail = None
88         if webpage is not None:
89             title = self._og_search_title(webpage)
90             thumbnail = self._og_search_thumbnail(webpage)
91         refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
92
93         mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
94
95         info_url = (
96             'http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&'.
97             format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
98
99         info_webpage = self._download_webpage(
100             info_url, video_id, note='Downloading info page')
101         info = compat_urlparse.parse_qs(info_webpage)
102
103         if 'err_code' in info:
104             # most of the time we can still download wideo even if err_code is 403 or 602
105             self.report_warning(
106                 'Error code was: %s... but still trying' % info['err_code'][0])
107
108         if 'filepath' not in info:
109             raise ExtractorError('Cannot download file. Are you logged in?')
110
111         video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
112         title_info = info.get('title')
113         if title_info:
114             title = title_info[0]
115
116         return {
117             'id': video_id,
118             'title': title,
119             'url': video_url,
120             'ext': 'flv',
121             'thumbnail': thumbnail,
122         }
123
124
125 class FC2EmbedIE(InfoExtractor):
126     _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
127     IE_NAME = 'fc2:embed'
128
129     _TEST = {
130         'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
131         'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
132         'info_dict': {
133             'id': '201403223kCqB3Ez',
134             'ext': 'flv',
135             'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
136             'thumbnail': r're:^https?://.*\.jpg$',
137         },
138     }
139
140     def _real_extract(self, url):
141         mobj = re.match(self._VALID_URL, url)
142         query = compat_parse_qs(mobj.group('query'))
143
144         video_id = query['i'][-1]
145         title = query.get('tl', ['FC2 video %s' % video_id])[0]
146
147         sj = query.get('sj', [None])[0]
148         thumbnail = None
149         if sj:
150             # See thumbnailImagePath() in ServerConst.as of flv2.swf
151             thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
152                 sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
153
154         return {
155             '_type': 'url_transparent',
156             'ie_key': FC2IE.ie_key(),
157             'url': 'fc2:%s' % video_id,
158             'title': title,
159             'thumbnail': thumbnail,
160         }