The Daily Show Podcast support

author felix <m.p.isaev@yandex.com>

Mon, 16 Mar 2015 19:05:02 +0000 (20:05 +0100)

committer felix <m.p.isaev@yandex.com>

Mon, 16 Mar 2015 19:05:02 +0000 (20:05 +0100)
author felix <m.p.isaev@yandex.com>
Mon, 16 Mar 2015 19:05:02 +0000 (20:05 +0100)
committer felix <m.p.isaev@yandex.com>
Mon, 16 Mar 2015 19:05:02 +0000 (20:05 +0100)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 1bb3e1a1ce5042bd420a4f3fae1fb89ce2369ef5..e94779d40455f9069f679afb6e7dd7753147afa2 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -84,7 +84,7 @@ from .cnn import (
  )
  from .collegehumor import CollegeHumorIE
  from .collegerama import CollegeRamaIE
  )
  from .collegehumor import CollegeHumorIE
  from .collegerama import CollegeRamaIE
-from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE, TheDailyShowPodcastIE
  from .comcarcoff import ComCarCoffIE
  from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
  from .condenast import CondeNastIE
  from .comcarcoff import ComCarCoffIE
  from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
  from .condenast import CondeNastIE
@@ -250,6 +250,7 @@ from .letv import (
      LetvPlaylistIE
  )
  from .lifenews import LifeNewsIE
      LetvPlaylistIE
  )
  from .lifenews import LifeNewsIE
+from .libsyn import LibsynIE
  from .liveleak import LiveLeakIE
  from .livestream import (
      LivestreamIE,
  from .liveleak import LiveLeakIE
  from .livestream import (
      LivestreamIE,
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index e5edcc84b69ef7bdffdbb7ed158c901c560a7575..e427b9821cfe8b7aab793e7c814474efbac4477e 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
  
  import re
  
  
  import re
  
+from .common import InfoExtractor
  from .mtv import MTVServicesInfoExtractor
  from ..compat import (
      compat_str,
  from .mtv import MTVServicesInfoExtractor
  from ..compat import (
      compat_str,
@@ -272,3 +273,23 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
              'title': show_name + ' ' + title,
              'description': description,
          }
              'title': show_name + ' ' + title,
              'description': description,
          }
+
+class TheDailyShowPodcastIE(InfoExtractor):
+    _VALID_URL = r'(?P<scheme>https?:)?//thedailyshow\.cc\.com/podcast/(?P<id>[a-z\-]+)'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        player_url = self._search_regex(r'<iframe(?:\s+[^>]+)?\s*src="((?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/[0-9]+)', webpage, 'player URL')
+        if player_url.startswith('//'):
+            mobj = re.match(self._VALID_URL, url)
+            scheme = mobj.group('scheme')
+            if not scheme:
+                scheme = 'https:'
+            player_url = scheme + player_url
+
+        return {
+            '_type': 'url_transparent',
+            'url': player_url,
+        }
diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py

new file mode 100644 (file)

index 0000000..4b5029f
--- /dev/null
+++ b/youtube_dl/extractor/libsyn.py
@@ -0,0 +1,41 @@
+# encoding: utf-8
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate,
+)
+
+class LibsynIE(InfoExtractor):
+    _VALID_URL = r'(?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)(?:/.*)?'
+
+    def _real_extract(self, url):
+        if url.startswith('//'):
+            url = 'https:' + url
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        podcast_title         = self._search_regex(r'<h2>(.*?)</h2>', webpage, 'show title')
+        podcast_episode_title = self._search_regex(r'<h3>(.*?)</h3>', webpage, 'episode title')
+        podcast_date          = unified_strdate(self._search_regex(r'<div class="release_date">Released: (.*?)</div>', webpage, 'release date'))
+        podcast_description   = self._search_regex(r'<div id="info_text_body">(.*?)</div>', webpage, 'description')
+
+        url0 = self._search_regex(r'var mediaURLLibsyn = "(?P<url0>https?://.*)";', webpage, 'first media URL')
+        url1 = self._search_regex(r'var mediaURL = "(?P<url1>https?://.*)";', webpage, 'second media URL')
+
+        if url0 != url1:
+            formats = [{
+                'url': url0
+            }, {
+                'url': url1
+            }]
+        else:
+            formats = [{
+                'url': url0
+            }]
+
+        return {
+            'id': display_id,
+            'title': podcast_episode_title,
+            'description': podcast_description,
+            'upload_date': podcast_date,
+            'formats': formats,
+        }
author	felix <m.p.isaev@yandex.com>
	Mon, 16 Mar 2015 19:05:02 +0000 (20:05 +0100)
committer	felix <m.p.isaev@yandex.com>
	Mon, 16 Mar 2015 19:05:02 +0000 (20:05 +0100)
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/libsyn.py	[new file with mode: 0644]	patch \| blob