[extractor/common] Extract interaction statistic
authorSergey M․ <dstftw@gmail.com>
Fri, 27 Apr 2018 19:48:03 +0000 (02:48 +0700)
committerSergey M․ <dstftw@gmail.com>
Fri, 27 Apr 2018 19:48:03 +0000 (02:48 +0700)
youtube_dl/extractor/common.py

index e0c3c8eb0c81fab1ed7d6567d923ec4b2f809d95..a9939b0fdc817c42e4c09999560dfd8c76873a5c 100644 (file)
@@ -1020,6 +1020,40 @@ class InfoExtractor(object):
         if isinstance(json_ld, dict):
             json_ld = [json_ld]
 
+        INTERACTION_TYPE_MAP = {
+            'CommentAction': 'comment',
+            'AgreeAction': 'like',
+            'DisagreeAction': 'dislike',
+            'LikeAction': 'like',
+            'DislikeAction': 'dislike',
+            'ListenAction': 'view',
+            'WatchAction': 'view',
+            'ViewAction': 'view',
+        }
+
+        def extract_interaction_statistic(e):
+            interaction_statistic = e.get('interactionStatistic')
+            if not isinstance(interaction_statistic, list):
+                return
+            for is_e in interaction_statistic:
+                if not isinstance(is_e, dict):
+                    continue
+                if is_e.get('@type') != 'InteractionCounter':
+                    continue
+                interaction_type = is_e.get('interactionType')
+                if not isinstance(interaction_type, compat_str):
+                    continue
+                interaction_count = int_or_none(is_e.get('userInteractionCount'))
+                if interaction_count is None:
+                    continue
+                count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
+                if not count_kind:
+                    continue
+                count_key = '%s_count' % count_kind
+                if info.get(count_key) is not None:
+                    continue
+                info[count_key] = interaction_count
+
         def extract_video_object(e):
             assert e['@type'] == 'VideoObject'
             info.update({
@@ -1035,6 +1069,7 @@ class InfoExtractor(object):
                 'height': int_or_none(e.get('height')),
                 'view_count': int_or_none(e.get('interactionCount')),
             })
+            extract_interaction_statistic(e)
 
         for e in json_ld:
             if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):