projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
73f5364
)
[vier:videos] Fix extraction with old approach (Closes #6806)
author
Sergey M․
<dstftw@gmail.com>
Wed, 9 Sep 2015 17:59:17 +0000
(23:59 +0600)
committer
Sergey M․
<dstftw@gmail.com>
Wed, 9 Sep 2015 17:59:17 +0000
(23:59 +0600)
youtube_dl/extractor/vier.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/vier.py
b/youtube_dl/extractor/vier.py
index 15377097e658b20e75a08f19b370be3bef2158c7..c76c20614e49b7468234aed68a985f6476fe1d0a 100644
(file)
--- a/
youtube_dl/extractor/vier.py
+++ b/
youtube_dl/extractor/vier.py
@@
-2,6
+2,7
@@
from __future__ import unicode_literals
import re
from __future__ import unicode_literals
import re
+import itertools
from .common import InfoExtractor
from .common import InfoExtractor
@@
-91,31
+92,27
@@
class VierVideosIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
program = mobj.group('program')
mobj = re.match(self._VALID_URL, url)
program = mobj.group('program')
- webpage = self._download_webpage(url, program)
-
page_id = mobj.group('page')
if page_id:
page_id = int(page_id)
start_page = page_id
page_id = mobj.group('page')
if page_id:
page_id = int(page_id)
start_page = page_id
- last_page = start_page + 1
playlist_id = '%s-page%d' % (program, page_id)
else:
start_page = 0
playlist_id = '%s-page%d' % (program, page_id)
else:
start_page = 0
- last_page = int(self._search_regex(
- r'videos\?page=(\d+)">laatste</a>',
- webpage, 'last page', default=0)) + 1
playlist_id = program
entries = []
playlist_id = program
entries = []
- for current_page_id in
range(start_page, las
t_page):
+ for current_page_id in
itertools.count(star
t_page):
current_page = self._download_webpage(
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
program,
current_page = self._download_webpage(
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
program,
- 'Downloading page %d' % (current_page_id + 1))
if current_page_id != page_id else webpage
+ 'Downloading page %d' % (current_page_id + 1))
page_entries = [
self.url_result('http://www.vier.be' + video_url, 'Vier')
for video_url in re.findall(
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
entries.extend(page_entries)
page_entries = [
self.url_result('http://www.vier.be' + video_url, 'Vier')
for video_url in re.findall(
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
entries.extend(page_entries)
+ if page_id or '>Meer<' not in current_page:
+ break
return self.playlist_result(entries, playlist_id)
return self.playlist_result(entries, playlist_id)