Merge branch 'vgtv' of https://github.com/mrkolby/youtube-dl into mrkolby-vgtv

[youtube-dl] / test / test_download.py
diff --git a/test/test_download.py b/test/test_download.py

index 815f5bb093ba3c541b5615c276f6bae258071a9a..2b8ac69754502457ea2e09aa026f3eceaee732f4 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -7,28 +7,28 @@ import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  from test.helper import (
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
  from test.helper import (
+    assertGreaterEqual,
      get_params,
      gettestcases,
      get_params,
      gettestcases,
+    expect_info_dict,
      try_rm,
      try_rm,
-    md5,
-    report_warning
+    report_warning,
  )
  
  
  import hashlib
  import io
  import json
  )
  
  
  import hashlib
  import io
  import json
-import re
  import socket
  
  import youtube_dl.YoutubeDL
  from youtube_dl.utils import (
      compat_http_client,
  import socket
  
  import youtube_dl.YoutubeDL
  from youtube_dl.utils import (
      compat_http_client,
-    compat_str,
      compat_urllib_error,
      compat_HTTPError,
      DownloadError,
      ExtractorError,
      compat_urllib_error,
      compat_HTTPError,
      DownloadError,
      ExtractorError,
+    format_bytes,
      UnavailableVideoError,
  )
  from youtube_dl.extractor import get_info_extractor
      UnavailableVideoError,
  )
  from youtube_dl.extractor import get_info_extractor
@@ -65,15 +65,21 @@ def generator(test_case):
      def test_template(self):
          ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
          other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
      def test_template(self):
          ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
          other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
+        is_playlist = any(k.startswith('playlist') for k in test_case)
+        test_cases = test_case.get(
+            'playlist', [] if is_playlist else [test_case])
+
          def print_skipping(reason):
              print('Skipping %s: %s' % (test_case['name'], reason))
          if not ie.working():
              print_skipping('IE marked as not _WORKING')
              return
          def print_skipping(reason):
              print('Skipping %s: %s' % (test_case['name'], reason))
          if not ie.working():
              print_skipping('IE marked as not _WORKING')
              return
-        if 'playlist' not in test_case:
-            info_dict = test_case.get('info_dict', {})
-            if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
+
+        for tc in test_cases:
+            info_dict = tc.get('info_dict', {})
+            if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
                  raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
                  raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
+
          if 'skip' in test_case:
              print_skipping(test_case['skip'])
              return
          if 'skip' in test_case:
              print_skipping(test_case['skip'])
              return
@@ -83,6 +89,9 @@ def generator(test_case):
                  return
  
          params = get_params(test_case.get('params', {}))
                  return
  
          params = get_params(test_case.get('params', {}))
+        if is_playlist and 'playlist' not in test_case:
+            params.setdefault('extract_flat', True)
+            params.setdefault('skip_download', True)
  
          ydl = YoutubeDL(params)
          ydl.add_default_info_extractors()
  
          ydl = YoutubeDL(params)
          ydl.add_default_info_extractors()
@@ -95,9 +104,11 @@ def generator(test_case):
          def get_tc_filename(tc):
              return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
  
          def get_tc_filename(tc):
              return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
  
-        test_cases = test_case.get('playlist', [test_case])
-        def try_rm_tcs_files():
-            for tc in test_cases:
+        res_dict = None
+        def try_rm_tcs_files(tcs=None):
+            if tcs is None:
+                tcs = test_cases
+            for tc in tcs:
                  tc_filename = get_tc_filename(tc)
                  try_rm(tc_filename)
                  try_rm(tc_filename + '.part')
                  tc_filename = get_tc_filename(tc)
                  try_rm(tc_filename)
                  try_rm(tc_filename + '.part')
@@ -107,7 +118,10 @@ def generator(test_case):
              try_num = 1
              while True:
                  try:
              try_num = 1
              while True:
                  try:
-                    ydl.download([test_case['url']])
+                    # We're not using .download here sine that is just a shim
+                    # for outside error handling, and returns the exit code
+                    # instead of the result dict.
+                    res_dict = ydl.extract_info(test_case['url'])
                  except (DownloadError, ExtractorError) as err:
                      # Check if the exception is not a network related one
                      if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
                  except (DownloadError, ExtractorError) as err:
                      # Check if the exception is not a network related one
                      if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
@@ -123,59 +137,62 @@ def generator(test_case):
                  else:
                      break
  
                  else:
                      break
  
+            if is_playlist:
+                self.assertEqual(res_dict['_type'], 'playlist')
+                expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
+            if 'playlist_mincount' in test_case:
+                assertGreaterEqual(
+                    self,
+                    len(res_dict['entries']),
+                    test_case['playlist_mincount'],
+                    'Expected at least %d in playlist %s, but got only %d' % (
+                        test_case['playlist_mincount'], test_case['url'],
+                        len(res_dict['entries'])))
+            if 'playlist_count' in test_case:
+                self.assertEqual(
+                    len(res_dict['entries']),
+                    test_case['playlist_count'],
+                    'Expected %d entries in playlist %s, but got %d.' % (
+                        test_case['playlist_count'],
+                        test_case['url'],
+                        len(res_dict['entries']),
+                    ))
+            if 'playlist_duration_sum' in test_case:
+                got_duration = sum(e['duration'] for e in res_dict['entries'])
+                self.assertEqual(
+                    test_case['playlist_duration_sum'], got_duration)
+
              for tc in test_cases:
                  tc_filename = get_tc_filename(tc)
                  if not test_case.get('params', {}).get('skip_download', False):
                      self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
                      self.assertTrue(tc_filename in finished_hook_called)
              for tc in test_cases:
                  tc_filename = get_tc_filename(tc)
                  if not test_case.get('params', {}).get('skip_download', False):
                      self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
                      self.assertTrue(tc_filename in finished_hook_called)
+                    expected_minsize = tc.get('file_minsize', 10000)
+                    if expected_minsize is not None:
+                        if params.get('test'):
+                            expected_minsize = max(expected_minsize, 10000)
+                        got_fsize = os.path.getsize(tc_filename)
+                        assertGreaterEqual(
+                            self, got_fsize, expected_minsize,
+                            'Expected %s to be at least %s, but it\'s only %s ' %
+                            (tc_filename, format_bytes(expected_minsize),
+                                format_bytes(got_fsize)))
+                    if 'md5' in tc:
+                        md5_for_file = _file_md5(tc_filename)
+                        self.assertEqual(md5_for_file, tc['md5'])
                  info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
                  self.assertTrue(os.path.exists(info_json_fn))
                  info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
                  self.assertTrue(os.path.exists(info_json_fn))
-                if 'md5' in tc:
-                    md5_for_file = _file_md5(tc_filename)
-                    self.assertEqual(md5_for_file, tc['md5'])
                  with io.open(info_json_fn, encoding='utf-8') as infof:
                      info_dict = json.load(infof)
                  with io.open(info_json_fn, encoding='utf-8') as infof:
                      info_dict = json.load(infof)
-                for (info_field, expected) in tc.get('info_dict', {}).items():
-                    if isinstance(expected, compat_str) and expected.startswith('re:'):
-                        got = info_dict.get(info_field)
-                        match_str = expected[len('re:'):]
-                        match_rex = re.compile(match_str)
-
-                        self.assertTrue(
-                            isinstance(got, compat_str) and match_rex.match(got),
-                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
-                    elif isinstance(expected, type):
-                        got = info_dict.get(info_field)
-                        self.assertTrue(isinstance(got, expected),
-                            u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
-                    else:
-                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
-                            got = 'md5:' + md5(info_dict.get(info_field))
-                        else:
-                            got = info_dict.get(info_field)
-                        self.assertEqual(expected, got,
-                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
-
-                # Check for the presence of mandatory fields
-                for key in ('id', 'url', 'title', 'ext'):
-                    self.assertTrue(key in info_dict.keys() and info_dict[key])
-                # Check for mandatory fields that are automatically set by YoutubeDL
-                for key in ['webpage_url', 'extractor', 'extractor_key']:
-                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
-
-                # Are checkable fields missing from the test case definition?
-                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
-                    for key, value in info_dict.items()
-                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
-                missing_keys = set(test_info_dict.keys()) - set(tc.get('info_dict', {}).keys())
-                if missing_keys:
-                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
-                    self.assertFalse(
-                        missing_keys,
-                        'Missing keys in test definition: %s' % (
-                            ','.join(sorted(missing_keys))))
+
+                expect_info_dict(self, tc.get('info_dict', {}), info_dict)
          finally:
              try_rm_tcs_files()
          finally:
              try_rm_tcs_files()
+            if is_playlist and res_dict is not None:
+                # Remove all other files that may have been extracted if the
+                # extractor returns full results even with extract_flat
+                res_tcs = [{'info_dict': e} for e in res_dict['entries']]
+                try_rm_tcs_files(res_tcs)
  
      return test_template
  
  
      return test_template