[daum.net] Support VodPlayer.swf URLs (closes #8173)

[youtube-dl] / youtube_dl / extractor / daum.py
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py

index f08f5715770f8f55f14e1b11bfae41e3f485888b..ea6a9d848899df5410bc9fc5fd4b770396a7dff0 100644 (file)
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -2,8 +2,13 @@
  
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
-from ..compat import compat_urllib_parse
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_parse_unquote,
+)
  from ..utils import (
      int_or_none,
      str_to_int,
@@ -12,7 +17,7 @@ from ..utils import (
  
  
  class DaumIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)'
+    _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
      IE_NAME = 'daum.net'
  
      _TESTS = [{
@@ -27,21 +32,51 @@ class DaumIE(InfoExtractor):
              'view_count': int,
              'comment_count': int,
          },
+    }, {
+        'url': 'http://m.tvpot.daum.net/v/65139429',
+        'info_dict': {
+            'id': '65139429',
+            'ext': 'mp4',
+            'title': 'md5:a100d65d09cec246d8aa9bde7de45aed',
+            'description': 'md5:79794514261164ff27e36a21ad229fc5',
+            'upload_date': '20150604',
+            'duration': 154,
+            'view_count': int,
+            'comment_count': int,
+        },
      }, {
          'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
          'only_matching': True,
+    }, {
+        'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
+        'info_dict': {
+            'id': 'vwIpVpCQsT8$',
+            'ext': 'flv',
+            'title': '01-Korean War ( Trouble on the horizon )',
+            'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
+            'upload_date': '20080223',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 249,
+            'view_count': int,
+            'comment_count': int,
+        },
      }]
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
+        video_id = compat_urllib_parse_unquote(self._match_id(url))
          query = compat_urllib_parse.urlencode({'vid': video_id})
-        info = self._download_xml(
-            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
-            'Downloading video info')
          movie_data = self._download_json(
              'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
              video_id, 'Downloading video formats info')
  
+        # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
+        if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
+            return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
+
+        info = self._download_xml(
+            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
+            'Downloading video info')
+
          formats = []
          for format_el in movie_data['output_list']['output_list']:
              profile = format_el['profile']
@@ -76,7 +111,7 @@ class DaumIE(InfoExtractor):
  
  
  class DaumClipIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
      IE_NAME = 'daum.net:clip'
  
      _TESTS = [{
@@ -90,6 +125,9 @@ class DaumClipIE(InfoExtractor):
              'duration': 3868,
              'view_count': int,
          },
+    }, {
+        'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):