Switch codebase to use sanitized_Request instead of
[youtube-dl] / youtube_dl / extractor / primesharetv.py
index 967125abc4883fdbf73e0920267162add6c2e089..85aae95765370249023d8202b9d51c44acb99a97 100644 (file)
@@ -1,42 +1,58 @@
-# encoding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_urllib_parse
 from ..utils import (
-    int_or_none,
-    parse_filesize,
-    unified_strdate,
-    urlencode_postdata,
-)
-from ..compat import (
-    compat_urllib_request,
+    ExtractorError,
+    sanitized_Request,
 )
 
-class PrimesharetvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>.*)(?:.*)'
+
+class PrimeShareTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
+
+    _TEST = {
+        'url': 'http://primeshare.tv/download/238790B611',
+        'md5': 'b92d9bf5461137c36228009f31533fbc',
+        'info_dict': {
+            'id': '238790B611',
+            'ext': 'mp4',
+            'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
+        },
+    }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
-       
-        self._sleep(9, video_id)
-        
-        hashtoken = self._search_regex(r' name="hash" value="(.*?)" ', webpage, 'hash token')
-        data = urlencode_postdata({
-            'hash': hashtoken,
-        })
+
+        if '>File not exist<' in webpage:
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+        fields = self._hidden_inputs(webpage)
+
         headers = {
             'Referer': url,
             'Content-Type': 'application/x-www-form-urlencoded',
         }
-        video_page_request = compat_urllib_request.Request(url, data, headers=headers)
-        video_page = self._download_webpage(video_page_request, None, False, '')
 
-        video_url = self._html_search_regex(
-            r'url: \'(http://l\.primeshare\.tv[^\']+)\',', video_page, 'video url')
+        wait_time = int(self._search_regex(
+            r'var\s+cWaitTime\s*=\s*(\d+)',
+            webpage, 'wait time', default=7)) + 1
+        self._sleep(wait_time, video_id)
+
+        req = sanitized_Request(
+            url, compat_urllib_parse.urlencode(fields), headers)
+        video_page = self._download_webpage(
+            req, video_id, 'Downloading video page')
+
+        video_url = self._search_regex(
+            r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
+            video_page, 'video url')
 
         title = self._html_search_regex(
-            r'<h1>Watch&nbsp;[^\(]+\(([^/)]+)\)&nbsp;', video_page, 'title')
+            r'<h1>Watch\s*(?:&nbsp;)?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?:&nbsp;)?\s*<strong>',
+            video_page, 'title')
 
         return {
             'id': video_id,