projects
/
youtube-dl
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
e6fe993
)
[pornhd] Detect removed videos and modernize
author
Sergey M․
<dstftw@gmail.com>
Fri, 17 Jun 2016 22:42:20 +0000
(
05:42
+0700)
committer
Sergey M․
<dstftw@gmail.com>
Fri, 17 Jun 2016 22:42:20 +0000
(
05:42
+0700)
youtube_dl/extractor/pornhd.py
patch
|
blob
|
history
diff --git
a/youtube_dl/extractor/pornhd.py
b/youtube_dl/extractor/pornhd.py
index e7721b0138ec76653eb0862ff2fc034df2a346d7..7a5f00fe0b087cb6a2c298ac243e12b15daba60d 100644
(file)
--- a/
youtube_dl/extractor/pornhd.py
+++ b/
youtube_dl/extractor/pornhd.py
@@
-1,10
+1,10
@@
from __future__ import unicode_literals
import re
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
from ..utils import (
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
int_or_none,
js_to_json,
)
int_or_none,
js_to_json,
)
@@
-37,17
+37,17
@@
class PornHdIE(InfoExtractor):
title = self._html_search_regex(
[r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
title = self._html_search_regex(
[r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
- description = self._html_search_regex(
- r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1',
- webpage, 'description', fatal=False, group='value')
- view_count = int_or_none(self._html_search_regex(
- r'(\d+) views\s*<', webpage, 'view count', fatal=False))
- thumbnail = self._search_regex(
- r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
- sources =
json.loads
(js_to_json(self._search_regex(
+ sources =
self._parse_json
(js_to_json(self._search_regex(
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
- webpage, 'sources')))
+ webpage, 'sources', default='{}')), video_id)
+
+ if not sources:
+ message = self._html_search_regex(
+ r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
+ webpage, 'error message', group='value')
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+
formats = []
for format_id, video_url in sources.items():
if not video_url:
formats = []
for format_id, video_url in sources.items():
if not video_url:
@@
-61,6
+61,14
@@
class PornHdIE(InfoExtractor):
})
self._sort_formats(formats)
})
self._sort_formats(formats)
+ description = self._html_search_regex(
+ r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1',
+ webpage, 'description', fatal=False, group='value')
+ view_count = int_or_none(self._html_search_regex(
+ r'(\d+) views\s*<', webpage, 'view count', fatal=False))
+ thumbnail = self._search_regex(
+ r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
+
return {
'id': video_id,
'display_id': display_id,
return {
'id': video_id,
'display_id': display_id,