Merge branch 'daum' of https://github.com/remitamine/youtube-dl into remitamine-daum

[youtube-dl] / youtube_dl / extractor / daum.py
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py

index 0f5686e0756592a062505557c74a784e1a6a5a83..a083cc0dc9ed44bd6772cd833fcca1f0e452482f 100644 (file)
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
  
  from .common import InfoExtractor
  from ..compat import compat_urllib_parse
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    str_to_int,
+    xpath_text,
+)
  
  
  class DaumIE(InfoExtractor):
@@ -20,6 +24,8 @@ class DaumIE(InfoExtractor):
              'description': 'Mark Hunt vs Antonio Silva',
              'upload_date': '20131217',
              'duration': 2117,
+            'view_count': int,
+            'comment_count': int,
          },
      }, {
          'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
@@ -60,15 +66,17 @@ class DaumIE(InfoExtractor):
              'id': video_id,
              'title': info.find('TITLE').text,
              'formats': formats,
-            'thumbnail': info.find('THUMB_URL').text,
-            'description': info.find('CONTENTS').text,
-            'duration': int_or_none(info.find('DURATION').text),
+            'thumbnail': xpath_text(info, 'THUMB_URL'),
+            'description': xpath_text(info, 'CONTENTS'),
+            'duration': int_or_none(xpath_text(info, 'DURATION')),
              'upload_date': info.find('REGDTTM').text[:8],
+            'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')),
+            'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')),
          }
  
  
  class DaumClipIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
      IE_NAME = 'daum.net'
  
      _TESTS = [{
@@ -86,7 +94,9 @@ class DaumClipIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-        clip_info = self._download_json('http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id, video_id)['clip_bean']
+        clip_info = self._download_json(
+            'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id,
+            video_id, 'Downloading clip info')['clip_bean']
  
          return {
              '_type': 'url_transparent',