[lynda] Check for the empty subtitle

[youtube-dl] / youtube_dl / extractor / lynda.py
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py

index 109055e720a6908b748c08ddd70fd69026f192ab..a84019bdf69b6e4588ccd6d4eff928e3a7ffdfcd 100644 (file)
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -18,7 +18,7 @@ from ..utils import (
  class LyndaIE(InfoExtractor):
      IE_NAME = 'lynda'
      IE_DESC = 'lynda.com videos'
-    _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
+    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
      _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
      _NETRC_MACHINE = 'lynda'
  
@@ -27,7 +27,7 @@ class LyndaIE(InfoExtractor):
  
      ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
          'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
          'info_dict': {
@@ -36,7 +36,10 @@ class LyndaIE(InfoExtractor):
              'title': 'Using the exercise files',
              'duration': 68
          }
-    }
+    }, {
+        'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
+        'only_matching': True,
+    }]
  
      def _real_initialize(self):
          self._login()
@@ -141,6 +144,7 @@ class LyndaIE(InfoExtractor):
  
      def _fix_subtitles(self, subs):
          srt = ''
+        seq_counter = 0
          for pos in range(0, len(subs) - 1):
              seq_current = subs[pos]
              m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
@@ -152,8 +156,10 @@ class LyndaIE(InfoExtractor):
                  continue
              appear_time = m_current.group('timecode')
              disappear_time = m_next.group('timecode')
-            text = seq_current['Caption']
-            srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
+            text = seq_current['Caption'].strip()
+            if text:
+                seq_counter += 1
+                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
          if srt:
              return srt