added "bild.de" as extractor
authorscheiba <dennis.scheiba@gmx.de>
Sat, 18 Oct 2014 20:15:47 +0000 (22:15 +0200)
committerscheiba <dennis.scheiba@gmx.de>
Sat, 18 Oct 2014 20:15:47 +0000 (22:15 +0200)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/bild.py [new file with mode: 0644]

index 070f9ff1977ccc2f2a556476a011404d5f1c503c..e0957987c77349e363a7c567bcf07327c89f28c3 100644 (file)
@@ -26,6 +26,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
+from .bild import BildIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py
new file mode 100644 (file)
index 0000000..3a822a5
--- /dev/null
@@ -0,0 +1,46 @@
+from __future__ import unicode_literals\r
+\r
+import re\r
+\r
+from .common import InfoExtractor\r
+\r
+\r
+class BildIE(InfoExtractor):\r
+    IE_NAME = 'bild'\r
+    _TEST = {\r
+        'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',\r
+        'info_dict': {\r
+            'id': '38184146',\r
+            'title': 'BILD hat sie getestet',\r
+            'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg',\r
+            'duration': 196,\r
+        }\r
+    }\r
+    \r
+    #http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html\r
+    _VALID_URL = r'http?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'\r
+    \r
+    def _real_extract(self, url):\r
+        m = re.match(self._VALID_URL, url)\r
+        video_id = m.group('id')\r
+        \r
+        #webpage = self._download_webpage(url, video_id)\r
+        \r
+        xml_url = url.split(".bild.html")[0]+",view=xml.bild.xml"\r
+        \r
+        doc = self._download_xml(xml_url, video_id)\r
+        \r
+        video_url = doc.attrib['src']\r
+        title = doc.attrib['ueberschrift']\r
+        description = doc.attrib['text']\r
+        thumbnail = doc.attrib['img']\r
+        duration = int(doc.attrib['duration'])/1000\r
+\r
+        return {\r
+            'id': video_id,\r
+            'title': title,\r
+            'description': description,\r
+            'url': video_url,\r
+            'thumbnail': thumbnail,\r
+            'duration': duration,\r
+        }\r