[tvp] Telewizja Polska: new extractor for tvp.pl, fixes #1719
authorMarcin Cieślak <saper@saper.info>
Tue, 5 Nov 2013 22:30:25 +0000 (23:30 +0100)
committerMarcin Cieślak <saper@saper.info>
Tue, 5 Nov 2013 22:47:40 +0000 (23:47 +0100)
Thanks-To: mplonski
https://github.com/mplonski/linux/blob/master/tvp-dl.py

youtube_dl/extractor/__init__.py
youtube_dl/extractor/tvp.py [new file with mode: 0644]

index 888a91ccedc2900ddf1e3e6bf3d385ad6adb1b27..78f84cea3b9002f394f6ed13da8faae2da59baeb 100644 (file)
@@ -132,6 +132,7 @@ from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
+from .tvp import TvpIE
 from .unistra import UnistraIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .vbox7 import Vbox7IE
 from .unistra import UnistraIE
 from .ustream import UstreamIE, UstreamChannelIE
 from .vbox7 import Vbox7IE
diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
new file mode 100644 (file)
index 0000000..63fb57b
--- /dev/null
@@ -0,0 +1,60 @@
+# encoding: utf-8
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+    RegexNotFoundError,
+)
+
+class TvpIE(InfoExtractor):
+    IE_NAME = u'tvp.pl'
+    _VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
+    _INFO_URL = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s'
+
+
+    _TEST = {
+        u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
+        u'file': u'31.10.2013-12878238.wmv',
+        u'info_dict': {
+            u'title': u'31.10.2013',
+            u'description': u'31.10.2013',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id, "Downloading video webpage")
+        json_params = self._download_webpage(self._INFO_URL % video_id, video_id, "Downloading video metadata")
+
+        try:
+            params = json.loads(json_params)
+        except:
+            raise ExtractorError(u'Invalid JSON')
+
+        self.report_extraction(video_id)
+        try:
+            video_url = params['video_url']
+        except KeyError:
+            raise ExtractorError('Missing JSON parameter: ' + sys.exc_info()[1])
+
+        try:
+            title = self._og_search_title(webpage)
+        except RegexNotFoundError:
+            title = video_id
+        info = {
+            'id': video_id,
+            'title': title,
+            'ext': 'wmv',
+            'url': video_url,
+        }
+        try:
+            info['description'] = self._og_search_description(webpage)
+            info['thumbnail'] = self._og_search_thumbnail(webpage)
+        except RegexNotFoundError:
+            pass
+
+        return info