Merge branch 'dumpert' of https://github.com/jorams/youtube-dl into jorams-dumpert
authorSergey M․ <dstftw@gmail.com>
Mon, 30 Mar 2015 13:47:11 +0000 (19:47 +0600)
committerSergey M․ <dstftw@gmail.com>
Mon, 30 Mar 2015 13:47:11 +0000 (19:47 +0600)
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/xuite.py

index 3a27e37890dc78b26af866c9884807c97c56ccb9..0c8b731cf47267568e43ccd09ff21f1683b4d992 100644 (file)
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
     }
 
     def _extract_count(self, pattern, webpage, name):
-        count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
-        if count:
-            count = str_to_int(count)
-        return count
+        return str_to_int(self._search_regex(
+            pattern, webpage, '%s count' % name, fatal=False))
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
         if thumbnail:
             thumbnail = compat_urllib_parse.unquote(thumbnail)
 
-        view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
-        like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
-        dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+        view_count = self._extract_count(
+            r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+        like_count = self._extract_count(
+            r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+        dislike_count = self._extract_count(
+            r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
         comment_count = self._extract_count(
-            r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
 
         video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
         if webpage.find('"encrypted":true') != -1:
index 4971965f9d090cce61a2e8b6d1486fadc873b4dc..81d885fdcee1cf788c217e862629df58f386d73c 100644 (file)
@@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def base64_decode_utf8(data):
+        return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+    @staticmethod
+    def base64_encode_utf8(data):
+        return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
     def _extract_flv_config(self, media_id):
-        base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+        base64_media_id = self.base64_encode_utf8(media_id)
         flv_config = self._download_xml(
             'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
             'flv config')
         prop_dict = {}
         for prop in flv_config.findall('./property'):
-            prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+            prop_id = self.base64_decode_utf8(prop.attrib['id'])
             # CDATA may be empty in flv config
             if not prop.text:
                 continue
-            encoded_content = base64.b64decode(prop.text).decode('utf-8')
+            encoded_content = self.base64_decode_utf8(prop.text)
             prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
         return prop_dict