[instagram] Add support for user profiles (Fixes #2606)

author Philipp Hagemeister <phihag@phihag.de>

Sun, 23 Mar 2014 15:06:03 +0000 (16:06 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Sun, 23 Mar 2014 15:06:07 +0000 (16:06 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Sun, 23 Mar 2014 15:06:03 +0000 (16:06 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Sun, 23 Mar 2014 15:06:07 +0000 (16:06 +0100)
diff --git a/test/helper.py b/test/helper.py

index 9e255878fdde106c9691821fc0c1997f92b3611e..8739f816c148729158bf9859a8569e9167846d7f 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -110,3 +110,21 @@ def expect_info_dict(self, expected_dict, got_dict):
              self.assertEqual(expected, got,
                  u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
  
+    # Check for the presence of mandatory fields
+    for key in ('id', 'url', 'title', 'ext'):
+        self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+    # Check for mandatory fields that are automatically set by YoutubeDL
+    for key in ['webpage_url', 'extractor', 'extractor_key']:
+        self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
+
+    # Are checkable fields missing from the test case definition?
+    test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
+        for key, value in got_dict.items()
+        if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+    missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
+    if missing_keys:
+        sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
+        self.assertFalse(
+            missing_keys,
+            'Missing keys in test definition: %s' % (
+                ', '.join(sorted(missing_keys))))
diff --git a/test/test_download.py b/test/test_download.py

index f4e5d120efdf129acff7cdbb04e36755b4bace12..f171c10bad84a876a9fe4caba2b71a984c3169ec 100644 (file)
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -137,25 +137,6 @@ def generator(test_case):
                      info_dict = json.load(infof)
  
                  expect_info_dict(self, tc.get('info_dict', {}), info_dict)
-
-                # Check for the presence of mandatory fields
-                for key in ('id', 'url', 'title', 'ext'):
-                    self.assertTrue(key in info_dict.keys() and info_dict[key])
-                # Check for mandatory fields that are automatically set by YoutubeDL
-                for key in ['webpage_url', 'extractor', 'extractor_key']:
-                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
-
-                # Are checkable fields missing from the test case definition?
-                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
-                    for key, value in info_dict.items()
-                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
-                missing_keys = set(test_info_dict.keys()) - set(tc.get('info_dict', {}).keys())
-                if missing_keys:
-                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
-                    self.assertFalse(
-                        missing_keys,
-                        'Missing keys in test definition: %s' % (
-                            ','.join(sorted(missing_keys))))
          finally:
              try_rm_tcs_files()
  
diff --git a/test/test_playlists.py b/test/test_playlists.py

index 2b1a7e849af597eeea461183d1f0942a058e671d..b1e38e7e9ef29d4fa5bbabcecabec26bfce4bad6 100644 (file)
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -9,8 +9,10 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-from test.helper import FakeYDL
-
+from test.helper import (
+    expect_info_dict,
+    FakeYDL,
+)
  
  from youtube_dl.extractor import (
      AcademicEarthCourseIE,
@@ -39,6 +41,7 @@ from youtube_dl.extractor import (
      TEDIE,
      ToypicsUserIE,
      XTubeUserIE,
+    InstagramUserIE,
  )
  
  
@@ -287,5 +290,28 @@ class TestPlaylists(unittest.TestCase):
          self.assertEqual(result['id'], 'greenshowers')
          self.assertTrue(len(result['entries']) >= 155)
  
+    def test_InstagramUser(self):
+        dl = FakeYDL()
+        ie = InstagramUserIE(dl)
+        result = ie.extract('http://instagram.com/porsche')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], 'porsche')
+        self.assertTrue(len(result['entries']) >= 2)
+        test_video = next(
+            e for e in result['entries']
+            if e['id'] == '614605558512799803_462752227')
+        dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
+        dl.process_video_result(test_video, download=False)
+        EXPECTED = {
+            'id': '614605558512799803_462752227',
+            'ext': 'mp4',
+            'title': '#Porsche Intelligent Performance.',
+            'thumbnail': 're:^https?://.*\.jpg',
+            'uploader': 'Porsche',
+            'uploader_id': 'porsche',
+        }
+        expect_info_dict(self, EXPECTED, test_video)
+
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index c5d08b0bbabb572c3711d1ae8119e7eeb7e40e71..d18d6dd00e57e6eb7d8c5213a4b6d46ffb65df13 100644 (file)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -512,13 +512,7 @@ class YoutubeDL(object):
                          '_type': 'compat_list',
                          'entries': ie_result,
                      }
-                self.add_extra_info(ie_result,
-                    {
-                        'extractor': ie.IE_NAME,
-                        'webpage_url': url,
-                        'webpage_url_basename': url_basename(url),
-                        'extractor_key': ie.ie_key(),
-                    })
+                self.add_default_extra_info(ie_result, ie, url)
                  if process:
                      return self.process_ie_result(ie_result, download, extra_info)
                  else:
@@ -537,6 +531,14 @@ class YoutubeDL(object):
          else:
              self.report_error('no suitable InfoExtractor for URL %s' % url)
  
+    def add_default_extra_info(self, ie_result, ie, url):
+        self.add_extra_info(ie_result, {
+            'extractor': ie.IE_NAME,
+            'webpage_url': url,
+            'webpage_url_basename': url_basename(url),
+            'extractor_key': ie.ie_key(),
+        })
+
      def process_ie_result(self, ie_result, download=True, extra_info={}):
          """
          Take the result of the ie(may be modified) and resolve all unresolved
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index b5c8ef6820689f9988065d18730aef0776c5827f..3e728e87606f5e08bb94a1a0fa36735b35cc3796 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -112,7 +112,7 @@ from .imdb import (
  )
  from .ina import InaIE
  from .infoq import InfoQIE
-from .instagram import InstagramIE
+from .instagram import InstagramIE, InstagramUserIE
  from .internetvideoarchive import InternetVideoArchiveIE
  from .iprima import IPrimaIE
  from .ivi import (
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py

index 63141af272ac077ed97dcd5baf4c5a0dcb7d3b47..994f0e4aefa7f52bf8ba2273ab1426958a2f830e 100644 (file)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+)
  
  
  class InstagramIE(InfoExtractor):
@@ -37,3 +40,68 @@ class InstagramIE(InfoExtractor):
              'uploader_id': uploader_id,
              'description': desc,
          }
+
+
+class InstagramUserIE(InfoExtractor):
+    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+    IE_DESC = 'Instagram user profile'
+    IE_NAME = 'instagram:user'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uploader_id = mobj.group('username')
+
+        entries = []
+        page_count = 0
+        media_url = 'http://instagram.com/%s/media' % uploader_id
+        while True:
+            page = self._download_json(
+                media_url, uploader_id,
+                note='Downloading page %d ' % (page_count + 1),
+            )
+            page_count += 1
+
+            for it in page['items']:
+                if it.get('type') != 'video':
+                    continue
+                like_count = int_or_none(it.get('likes', {}).get('count'))
+                user = it.get('user', {})
+
+                formats = [{
+                    'format_id': k,
+                    'height': v.get('height'),
+                    'width': v.get('width'),
+                    'url': v['url'],
+                } for k, v in it['videos'].items()]
+                self._sort_formats(formats)
+
+                thumbnails_el = it.get('images', {})
+                thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
+
+                title = it.get('caption', {}).get('text', it['id'])
+
+                entries.append({
+                    'id': it['id'],
+                    'title': title,
+                    'formats': formats,
+                    'thumbnail': thumbnail,
+                    'webpage_url': it.get('link'),
+                    'uploader': user.get('full_name'),
+                    'uploader_id': user.get('username'),
+                    'like_count': like_count,
+                    'upload_timestamp': int_or_none(it.get('created_time')),
+                })
+
+            if not page['items']:
+                break
+            max_id = page['items'][-1]['id']
+            media_url = (
+                'http://instagram.com/%s/media?max_id=%s' % (
+                    uploader_id, max_id))
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': uploader_id,
+            'title': uploader_id,
+        }
author	Philipp Hagemeister <phihag@phihag.de>
	Sun, 23 Mar 2014 15:06:03 +0000 (16:06 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Sun, 23 Mar 2014 15:06:07 +0000 (16:06 +0100)
test/helper.py		patch \| blob \| history
test/test_download.py		patch \| blob \| history
test/test_playlists.py		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/instagram.py		patch \| blob \| history