from .common import InfoExtractor
from ..utils import (
+ int_or_none,
unescapeHTML,
find_xpath_attr,
+ smuggle_url,
)
+from .senateisvp import SenateISVPIE
class CSpanIE(InfoExtractor):
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
# For whatever reason, the served video alternates between
# two different ones
- #'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
'info_dict': {
'id': '340723',
'ext': 'mp4',
'title': 'International Health Care Models',
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
}
+ }, {
+ 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+ 'info_dict': {
+ 'id': '342759',
+ 'title': 'General Motors Ignition Switch Recall',
+ },
+ 'playlist_duration_sum': 14855,
+ }, {
+ # Video from senate.gov
+ 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
+ 'md5': '7314c4b96dad66dd8e63dc3518ceaa6f',
+ 'info_dict': {
+ 'id': 'judiciary031715',
+ 'ext': 'flv',
+ 'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
+ }
}]
def _real_extract(self, url):
# present, otherwise this is a stripped version
r'<p class=\'initial\'>(.*?)</p>'
],
- webpage, 'description', flags=re.DOTALL)
+ webpage, 'description', flags=re.DOTALL, default=None)
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
data = self._download_json(info_url, video_id)
- url = unescapeHTML(data['video']['files'][0]['path']['#text'])
-
- doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
+ doc = self._download_xml(
+ 'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
video_id)
- def find_string(s):
- return find_xpath_attr(doc, './/string', 'name', s).text
+ title = find_xpath_attr(doc, './/string', 'name', 'title').text
+ thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
+
+ senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ if senate_isvp_url:
+ surl = smuggle_url(senate_isvp_url, {'force_title': title})
+ return self.url_result(surl, 'SenateISVP', video_id, title)
+
+ files = data['video']['files']
+
+ entries = [{
+ 'id': '%s_%d' % (video_id, partnum + 1),
+ 'title': (
+ title if len(files) == 1 else
+ '%s part %d' % (title, partnum + 1)),
+ 'url': unescapeHTML(f['path']['#text']),
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': int_or_none(f.get('length', {}).get('#text')),
+ } for partnum, f in enumerate(files)]
return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
'id': video_id,
- 'title': find_string('title'),
- 'url': url,
- 'description': description,
- 'thumbnail': find_string('poster'),
}