[npo:radio] Move to extractor to common npo place and add extractor for fragments
authorSergey M․ <dstftw@gmail.com>
Thu, 12 Feb 2015 19:36:54 +0000 (01:36 +0600)
committerSergey M․ <dstftw@gmail.com>
Thu, 12 Feb 2015 19:36:54 +0000 (01:36 +0600)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nporadio.py [deleted file]

index b118c3d1def33fbb95945fd07bdba609075bd902..a4fab540ba2927309ba57a4c17dffd7efb07b3c5 100644 (file)
@@ -318,9 +318,10 @@ from .nowvideo import NowVideoIE
 from .npo import (
     NPOIE,
     NPOLiveIE,
+    NPORadioIE,
+    NPORadioFragmentIE,
     TegenlichtVproIE,
 )
-from .nporadio import NPORadioIE
 from .nrk import (
     NRKIE,
     NRKTVIE,
index 54be06a4edc375f736cc038961eac67e73eecf85..c075618e84cb8181e27c2a9dc3cc033a16d5dea4 100644 (file)
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..utils import (
     fix_xml_ampersands,
     parse_duration,
@@ -22,7 +23,7 @@ class NPOBaseIE(SubtitlesInfoExtractor):
 
 class NPOIE(NPOBaseIE):
     IE_NAME = 'npo.nl'
-    _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
 
     _TESTS = [
         {
@@ -185,7 +186,7 @@ class NPOIE(NPOBaseIE):
 
 class NPOLiveIE(NPOBaseIE):
     IE_NAME = 'npo.nl:live'
-    _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
 
     _TEST = {
         'url': 'http://www.npo.nl/live/npo-1',
@@ -260,6 +261,84 @@ class NPOLiveIE(NPOBaseIE):
         }
 
 
+class NPORadioIE(InfoExtractor):
+    IE_NAME = 'npo.nl:radio'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
+
+    _TEST = {
+        'url': 'http://www.npo.nl/radio/radio-1',
+        'info_dict': {
+            'id': 'radio-1',
+            'ext': 'mp3',
+            'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    @staticmethod
+    def _html_get_attribute_regex(attribute):
+        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            self._html_get_attribute_regex('data-channel'), webpage, 'title')
+
+        stream = self._parse_json(
+            self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
+            video_id)
+
+        codec = stream.get('codec')
+
+        return {
+            'id': video_id,
+            'url': stream['url'],
+            'title': self._live_title(title),
+            'acodec': codec,
+            'ext': codec,
+            'is_live': True,
+        }
+
+
+class NPORadioFragmentIE(InfoExtractor):
+    IE_NAME = 'npo.nl:radio:fragment'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
+        'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
+        'info_dict': {
+            'id': '174356',
+            'ext': 'mp3',
+            'title': 'Jubileumconcert Willeke Alberti',
+        },
+    }
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, audio_id)
+
+        title = self._html_search_regex(
+            r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
+            webpage, 'title')
+
+        audio_url = self._search_regex(
+            r"data-streams='([^']+)'", webpage, 'audio url')
+
+        return {
+            'id': audio_id,
+            'url': audio_url,
+            'title': title,
+        }
+
+
 class TegenlichtVproIE(NPOIE):
     IE_NAME = 'tegenlicht.vpro.nl'
     _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
diff --git a/youtube_dl/extractor/nporadio.py b/youtube_dl/extractor/nporadio.py
deleted file mode 100644 (file)
index d1c4ec1..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-
-
-class NPORadioIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>.*)'
-    _TEST = {
-        'url': 'http://www.npo.nl/radio/radio-1',
-        'info_dict': {
-            'id': 'radio-1',
-            'ext': 'mp3',
-            'title': 'NPO Radio 1',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        title = self._html_search_regex(
-            self._html_get_attribute_regex('data-channel'), webpage, 'title')
-
-        json_data = json.loads(
-            self._html_search_regex(
-                self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'))
-
-        return {
-            'id': video_id,
-            'title': title,
-            'ext': json_data['codec'],
-            'url': json_data['url']
-        }
-
-    def _html_get_attribute_regex(self, attribute):
-        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)