X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvlive.py;h=d68f222e7b05b50a81f16b03ac4692ffe1b0d9d1;hb=067aa17edf5a46a8cbc4d6b90864eddf051fa2bc;hp=f3825db5cc208d0fe8ae248b68198213e6ce82e1;hpb=b71c18b4343d54ce8373e9a11df882aca1ae82a0;p=youtube-dl diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index f3825db5c..d68f222e7 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -57,7 +57,7 @@ class VLiveIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.vlive.tv/video/%s' % video_id, video_id) + 'https://www.vlive.tv/video/%s' % video_id, video_id) VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' VIDEO_PARAMS_FIELD = 'video params' @@ -108,11 +108,11 @@ class VLiveIE(InfoExtractor): def _live(self, video_id, webpage): init_page = self._download_webpage( - 'http://www.vlive.tv/video/init/view', + 'https://www.vlive.tv/video/init/view', video_id, note='Downloading live webpage', data=urlencode_postdata({'videoSeq': video_id}), headers={ - 'Referer': 'http://www.vlive.tv/video/%s' % video_id, + 'Referer': 'https://www.vlive.tv/video/%s' % video_id, 'Content-Type': 'application/x-www-form-urlencoded' }) @@ -236,7 +236,12 @@ class VLiveChannelIE(InfoExtractor): query={ 'app_id': app_id, 'channelSeq': channel_seq, - 'maxNumOfRows': 1000, + # Large values of maxNumOfRows (~300 or above) may cause + # empty responses (see [1]), e.g. this happens for [2] that + # has more than 300 videos. + # 1. https://github.com/ytdl-org/youtube-dl/issues/13830 + # 2. http://channels.vlive.tv/EDBF. + 'maxNumOfRows': 100, '_': int(time.time()), 'pageNo': page_num } @@ -280,10 +285,8 @@ class VLivePlaylistIE(InfoExtractor): } def _real_extract(self, url): - playlist_id = self._match_id(url) - video_id_match = re.match(self._VALID_URL, url) - assert video_id_match - video_id = compat_str(video_id_match.group('video_id')) + mobj = re.match(self._VALID_URL, url) + video_id, playlist_id = mobj.group('video_id', 'id') VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s' if self._downloader.params.get('noplaylist'): @@ -294,26 +297,27 @@ class VLivePlaylistIE(InfoExtractor): ie=VLiveIE.ie_key(), video_id=video_id) self.to_screen( - 'Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + 'Downloading playlist %s - add --no-playlist to just download video' + % playlist_id) webpage = self._download_webpage( - 'http://www.vlive.tv/video/%s/playlist/%s' % (video_id, playlist_id), video_id) + 'http://www.vlive.tv/video/%s/playlist/%s' + % (video_id, playlist_id), playlist_id) - playlist_name = self._html_search_regex( - r']+class="[^"]*multicam_playlist[^>]*>\s*]+>([^<]+)', - webpage, 'playlist name', fatal=False) + item_ids = self._parse_json( + self._search_regex( + r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage, + 'playlist video seqs'), + playlist_id) - item_ids = self._search_regex( - r'\bvar\s+playlistVideoSeqs\s*=\s*(\[[^]]+\])', - webpage, 'playlist item ids') + entries = [ + self.url_result( + VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(), + video_id=compat_str(item_id)) + for item_id in item_ids] - entries = [] - for item_id in self._parse_json(item_ids, playlist_id): - item_id = compat_str(item_id) - entries.append( - self.url_result( - VIDEO_URL_TEMPLATE % item_id, - ie=VLiveIE.ie_key(), video_id=item_id)) + playlist_name = self._html_search_regex( + r']+class="[^"]*multicam_playlist[^>]*>\s*]+>([^<]+)', + webpage, 'playlist title', fatal=False) - return self.playlist_result( - entries, playlist_id, playlist_name) + return self.playlist_result(entries, playlist_id, playlist_name)