dlc2action.feature_extraction
Feature extraction
Feature extractors generate feature dictionaries that are then passed to SSL transformations
(see dlc2action.ssl
) and finally to
transformers that perform augmentations and merge all features into a tensor (see dlc2action.transformer
).
The keys of the dictionaries are the feature names ('coords'
, 'speeds'
and so on) and the values are the
feature tensors. It is generally assumed that the tensors have shape (F, ..., L)
where F
is the variable
number of features (per frame, keypoint, pixel...) and L
is the length of the segment in frames. The F
value can be different for every tensor in the dictionary and the rest of the shape should be constant.
1# 2# Copyright 2020-2022 by A. Mathis Group and contributors. All rights reserved. 3# 4# This project and all its files are licensed under GNU AGPLv3 or later version. A copy is included in dlc2action/LICENSE.AGPL. 5# 6""" 7## Feature extraction 8 9Feature extractors generate feature dictionaries that are then passed to SSL transformations 10(see `dlc2action.ssl`) and finally to 11transformers that perform augmentations and merge all features into a tensor (see `dlc2action.transformer`). 12The keys of the dictionaries are the feature names (`'coords'`, `'speeds'` and so on) and the values are the 13feature tensors. It is generally assumed that the tensors have shape `(F, ..., L)` where `F` is the variable 14number of features (per frame, keypoint, pixel...) and `L` is the length of the segment in frames. The `F` 15value can be different for every tensor in the dictionary and the rest of the shape should be constant. 16""" 17import copy 18from typing import Dict, Tuple, List, Set 19import numpy as np 20from abc import ABC, abstractmethod 21from scipy.spatial.distance import cdist 22import math 23from itertools import combinations 24from matplotlib.cm import get_cmap 25from dlc2action.data.base_store import PoseInputStore 26from scipy.ndimage.filters import gaussian_filter 27 28 29class FeatureExtractor(ABC): 30 """ 31 The base class for feature extractors 32 33 The `extract_features` method receives a data dictionary as input. 34 We do not assume a specific 35 structure in the values and all necessary information (coordinates of a bodypart, number 36 of frames, list of bodyparts) is inferred using input store methods. Therefore, each child class 37 of `FeatureExtractor` is written for a specific subclass of `dlc2action.data.base_Store.InputStore` 38 with the data inference 39 functions defined (i.e. `dlc2action.data.base_store.PoseInputStore`). 40 """ 41 42 input_store_class = None 43 """ 44 The `dlc2action.data.base_Store.InputStore` child class paired with this feature extractor 45 """ 46 47 @abstractmethod 48 def __init__(self, ignored_clips: List = None, **kwargs): 49 """ 50 Parameters 51 ---------- 52 ignored_clips : list 53 a list of string names of clip ids to ignore 54 """ 55 56 @abstractmethod 57 def extract_features( 58 self, data_dict: Dict, video_id: str, one_clip: bool = False 59 ) -> Dict: 60 """ 61 Extract features from a data dictionary 62 63 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 64 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 65 and clip ids. We do not assume a specific 66 structure in the values, so all necessary information (coordinates of a bodypart, number 67 of frames, list of bodyparts) is inferred using input store methods. 68 69 Parameters 70 ---------- 71 data_dict : dict 72 the data dictionary 73 video_id : str 74 the id of the video associated with the data dictionary 75 one_clip : bool, default False 76 if `True`, all features will be concatenated and assigned to one clip named `'all'` 77 78 Returns 79 ------- 80 features : dict 81 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 82 values are numpy arrays of shape `(#features, ..., #frames)` 83 """ 84 85 86class PoseFeatureExtractor(FeatureExtractor): 87 """ 88 The base class for pose feature extractors 89 90 Pose feature extractors work with `dlc2action.data.base_store.InputStore` instances 91 that inherit from `dlc2action.data.base_store.PoseInputStore`. 92 """ 93 94 input_store_class = PoseInputStore 95 96 def __init__(self, input_store: PoseInputStore, *args, **kwargs): 97 """ 98 Parameters 99 ---------- 100 input_store : PoseInputStore 101 the input store object 102 """ 103 104 self.get_bodyparts = input_store.get_bodyparts 105 self.get_coords = input_store.get_coords 106 self.get_n_frames = input_store.get_n_frames 107 self.get_likelihood = input_store.get_likelihood 108 109 110# class KinematicBones(PoseFeatures): 111# 112# def __init__(self, dataset, bone_pairs, *args, **kwargs): 113# self.bone_starts, self.bone_ends = zip(*bone_pairs) 114# self.keys = ["bones", "speed_bones", "acc_bones"] 115# super().__init__(dataset) 116# 117# def extract_features(self, data_dict: Dict, clip_id: str, name: str) -> Dict: 118# if isinstance(clip_id, list): 119# clip_id = clip_id[0] 120# bodyparts = np.array(self.get_bodyparts(data_dict, clip_id)) 121# bone_starts = np.where( 122# np.array(self.bone_starts)[:, None] == bodyparts[None, :] 123# )[1] 124# bone_ends = np.where(np.array(self.bone_ends)[:, None] == bodyparts[None, :])[1] 125# coords = np.stack( 126# [self.get_coords(data_dict, clip_id, bp) for bp in bodyparts], axis=1 127# ) 128# bones = coords[:, bone_ends, :] - coords[:, bone_starts, :] 129# speeds = bones[1:] - bones[:-1] 130# speeds = np.concatenate([speeds[:1], speeds], axis=0) 131# acc = speeds[1:] - speeds[:-1] 132# acc = np.concatenate([acc[:1], acc], axis=0) 133# n_frames = bones.shape[0] 134# features = { 135# "bones": bones.reshape((n_frames, -1)), 136# "speed_bones": speeds.reshape((n_frames, -1)), 137# "acc_bones": acc.reshape((n_frames, -1)), 138# } 139# return features 140 141 142class KinematicExtractor(PoseFeatureExtractor): 143 """ 144 A feature extractor for basic kinematic features: speeds, accelerations, distances. 145 146 The available keys are: 147 - coords: the allocentric bodypart coordinates, 148 - coord_diff: the egocentric bodypart coordinates, 149 - center: the body center (mean of bodyparts) coordinates, 150 - intra_distance: distances between bodyparts (pairs set in `distance_pairs` or all combinations by default), 151 - inter_distance: computed in interactive mode (for pairs of animals); distances from each bodypart of each animal to the centroid between them, 152 - speed_direction: unit vector of speed approximation for each bodypart, 153 - speed_value: l2 norm of the speed approximation vector for each bodypart, 154 - acc_joints: l2 norm of the acceleration approximation vector for each bodypart, 155 - angle_speeds: vector of angle speed approximation for each bodypart, 156 - angles: cosines of angles set in `angle_pairs`, 157 - areas: areas of polygons set in `area_vertices`, 158 - zone_bools: binary identifier of zone visitation, defined in `zone_bools`, 159 - zone_distances: distance to zone boundary, defined in `zone_distances'`, 160 - likelihood: pose estimation likelihood (if known). 161 162 The default set is `{coord_diff, center, intra_distance, inter_distance, speed_direction, speed_value, acc_joints, angle_speeds}` 163 """ 164 165 def __init__( 166 self, 167 input_store: PoseInputStore, 168 keys: List = None, 169 ignored_clips: List = None, 170 interactive: bool = False, 171 averaging_window: int = 1, 172 distance_pairs: List = None, 173 angle_pairs: List = None, 174 neighboring_frames: int = 0, 175 area_vertices: List = None, 176 zone_vertices: Dict = None, 177 zone_bools: List = None, 178 zone_distances: List = None, 179 *args, 180 **kwargs, 181 ) -> None: 182 """ 183 Parameters 184 ---------- 185 input_store : PoseInputStore 186 the input store object 187 keys : list, optional 188 a list of names of the features to extract 189 ignored_clips : list, optional 190 a list of clip ids to ignore 191 interactive : bool, default False 192 if `True`, features for pairs of clips will be computed 193 averaging_window : int, default 1 194 if >1, features are averaged with a moving window of this size (in frames) 195 distance_pairs : list, optional 196 a list of bodypart name tuples (e.g. `[("tail", "nose")]`) to compute distances for when `"intra_distance"` 197 is in `keys` (by default all distances are computed) 198 angle_pairs : list, optional 199 a list of bodypart name tuples (e.g. `[("ear1", "nose", "ear2")]`) for the angle between `"ear1"--"nose"` and 200 `"nose"--"ear2"` lines) to compute angle cosines for when `"angles"` is in `keys` (by default no angles are computed) 201 neighboring_frames : int, default 0 202 if >0, this number of neighboring frames is aggregated in the center frame features (generally not recommended) 203 area_vertices : list, optional 204 a list of bodypart name tuples of any length >= 3 (e.g. `[("ear1", "nose", "ear2", "spine1")]`) that define polygons 205 to compute areas for when `"areas"` is in `keys` (by default no areas are computed) 206 zone_vertices : dict, optional 207 a dictionary of bodypart name tuples of any length >= 3 that define zones for `"zone_bools"`and `"zone_distances"` 208 featyres; keys should be zone names and values should be tuples that define the polygons (e.g. 209 `{"main_area": ("x_min", "x_max", "y_max", "y_min"))}`) 210 zone_bools : list, optional 211 a list of zone and bodypart name tuples to compute binary identifiers for (1 if an animal is within the polygon or 212 0 if it's outside) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 213 this is only computed if `"zone_bools"` is in `keys` 214 zone_distances : list, optional 215 a list of zone and bodypart name tuples to compute distances for (distance from the bodypart to the closest of the 216 boundaries) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 217 this is only computed if `"zone_distances"` is in `keys` 218 """ 219 220 if keys is None: 221 keys = [ 222 "coord_diff", 223 "center", 224 "intra_distance", 225 "speed_direction", 226 "speed_value", 227 "angle_speeds", 228 "acc_joints", 229 "inter_distance", 230 ] 231 if ignored_clips is None: 232 ignored_clips = [] 233 if zone_vertices is None: 234 zone_vertices = {} 235 if zone_bools is None: 236 zone_bools = [] 237 if zone_distances is None: 238 zone_distances = [] 239 self.keys = keys 240 self.ignored_clips = ignored_clips 241 self.interactive = interactive 242 self.w = averaging_window 243 self.distance_pairs = distance_pairs 244 self.angle_pairs = angle_pairs 245 self.area_vertices = area_vertices 246 self.neighboring_frames = int(neighboring_frames) 247 self.zone_vertices = zone_vertices 248 self.zone_bools = zone_bools 249 self.zone_distances = zone_distances 250 super().__init__(input_store) 251 252 def _angle_speed(self, xy_coord_joint: np.array, n_frames: int) -> np.array: 253 """ 254 Compute the angle speed 255 """ 256 257 if xy_coord_joint.shape[1] == 2: 258 x_diff = np.diff(xy_coord_joint[:, 0]) 259 y_diff = np.diff(xy_coord_joint[:, 1]) 260 x_diff[xy_coord_joint[:-1, 0] == 0] = 0 261 y_diff[xy_coord_joint[:-1, 1] == 0] = 0 262 angle_dir_radians = [ 263 math.atan2(y_diff[i], x_diff[i]) for i in range(n_frames - 1) 264 ] 265 angle_dir_radians = np.insert( 266 angle_dir_radians, 0, angle_dir_radians[0], axis=0 267 ) 268 else: 269 x_diff = np.diff(xy_coord_joint[:, 0]) 270 y_diff = np.diff(xy_coord_joint[:, 1]) 271 z_diff = np.diff(xy_coord_joint[:, 2]) 272 x_diff[xy_coord_joint[:-1, 0] == 0] = 0 273 y_diff[xy_coord_joint[:-1, 1] == 0] = 0 274 y_diff[xy_coord_joint[:-1, 2] == 0] = 0 275 angle_dir_radians = [] 276 for x, y in combinations([x_diff, y_diff, z_diff], 2): 277 radians = [math.atan2(x[i], y[i]) for i in range(n_frames - 1)] 278 radians = np.insert(radians, 0, radians[0], axis=0) 279 angle_dir_radians.append(radians) 280 angle_dir_radians = np.concatenate(angle_dir_radians) 281 282 return angle_dir_radians 283 284 def _poly_area(self, x, y): 285 return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) 286 287 def _cdist_keep_zeros(self, a: np.array, b: np.array) -> np.array: 288 """ 289 Compute all distance combinations while setting the distance to zero if at least one of the elements is at zero 290 """ 291 292 dist = cdist(a, b, "euclidean") 293 a_zero = np.sum(a == 0, axis=1) > 0 294 b_zero = np.sum(b == 0, axis=1) > 0 295 dist[a_zero, :] = 0 296 dist[:, a_zero] = 0 297 dist[b_zero, :] = 0 298 dist[:, b_zero] = 0 299 return dist 300 301 def _distance( 302 self, data_dict: Dict, clip1: str, clip2: str, name: str, centroid: bool = False 303 ) -> Tuple: 304 """ 305 Compute the distances between all keypoints 306 """ 307 308 if not isinstance(clip1, list): 309 body_parts_1 = self.get_bodyparts() 310 else: 311 body_parts_1 = clip1 312 n_body_parts = len(body_parts_1) 313 body_parts_2 = self.get_bodyparts() 314 n_frames = self.get_n_frames(data_dict, clip1) 315 if n_frames != self.get_n_frames(data_dict, clip2): 316 raise RuntimeError( 317 f"The numbers of frames for {clip1} and {clip2} are not equal at {name}!" 318 ) 319 320 # joint distances for single agent 321 upper_indices = np.triu_indices(n_body_parts, 1) 322 323 xy_coord_joints_1 = np.stack( 324 [self.get_coords(data_dict, clip1, bp) for bp in body_parts_1], axis=1 325 ) 326 if self.w > 1: 327 for i in range(xy_coord_joints_1.shape[0]): 328 for j in range(xy_coord_joints_1.shape[1]): 329 xy_coord_joints_1[i, j, :] = np.convolve( 330 xy_coord_joints_1[i, j, :], (1 / self.w) * np.ones(self.w) 331 )[self.w // 2 : -self.w // 2 + (self.w + 1) % 2] 332 if clip1 != clip2: 333 xy_coord_joints_2 = np.stack( 334 [self.get_coords(data_dict, clip2, bp) for bp in body_parts_2], axis=1 335 ) 336 if self.w > 1: 337 for i in range(xy_coord_joints_2.shape[0]): 338 for j in range(xy_coord_joints_2.shape[1]): 339 xy_coord_joints_2[i, j, :] = np.convolve( 340 xy_coord_joints_2[i, j, :], (1 / self.w) * np.ones(self.w) 341 )[self.w // 2 : -self.w // 2 + (self.w + 1) % 2] 342 else: 343 xy_coord_joints_2 = copy.copy(xy_coord_joints_1) 344 345 if clip1 != clip2 and centroid: 346 centroid_1 = np.expand_dims(np.mean(xy_coord_joints_1, axis=1), 1) 347 distance_1 = np.linalg.norm(xy_coord_joints_2 - centroid_1, axis=-1) 348 centroid_2 = np.expand_dims(np.mean(xy_coord_joints_2, axis=1), 1) 349 distance_2 = np.linalg.norm(xy_coord_joints_1 - centroid_2, axis=-1) 350 intra_distance = np.concatenate([distance_1, distance_2], axis=-1) 351 else: 352 if self.distance_pairs is None: 353 n_distances = n_body_parts * (n_body_parts - 1) // 2 354 intra_distance = np.asarray( 355 [ 356 self._cdist_keep_zeros( 357 xy_coord_joints_1[i], xy_coord_joints_2[i] 358 )[upper_indices].reshape(-1, n_distances) 359 for i in range(n_frames) 360 ] 361 ).reshape(n_frames, n_distances) 362 else: 363 intra_distance = [] 364 for x, y in self.distance_pairs: 365 x_ind = body_parts_1.index(x) 366 y_ind = body_parts_1.index(y) 367 intra_distance.append( 368 np.sqrt( 369 np.sum( 370 ( 371 xy_coord_joints_1[:, x_ind, :] 372 - xy_coord_joints_1[:, y_ind, :] 373 ) 374 ** 2, 375 axis=1, 376 ) 377 ) 378 ) 379 intra_distance = np.stack(intra_distance, axis=1) 380 381 if clip1 == clip2: 382 angle_joints_radian = np.stack( 383 [ 384 self._angle_speed(xy_coord_joints_1[:, i, :], n_frames) 385 for i in range(xy_coord_joints_1.shape[1]) 386 ], 387 axis=1, 388 ) 389 if self.angle_pairs is None: 390 angles = None 391 else: 392 angles = [] 393 for x0, x1, y0, y1 in self.angle_pairs: 394 x0_ind = body_parts_1.index(x0) 395 x1_ind = body_parts_1.index(x1) 396 y0_ind = body_parts_1.index(y0) 397 y1_ind = body_parts_1.index(y1) 398 diff_x = ( 399 xy_coord_joints_1[:, x0_ind, :] 400 - xy_coord_joints_1[:, x1_ind, :] 401 ) 402 diff_y = ( 403 xy_coord_joints_1[:, y0_ind, :] 404 - xy_coord_joints_1[:, y1_ind, :] 405 ) 406 dist_x = np.linalg.norm(diff_x, axis=-1) 407 dist_y = np.linalg.norm(diff_y, axis=-1) 408 denom = dist_x * dist_y + 1e-7 409 mult = np.einsum("ij,ij->i", diff_x, diff_y) 410 angles.append(mult / denom) 411 angles = np.stack(angles, axis=1) 412 if self.area_vertices is not None: 413 areas = [] 414 for points in self.area_vertices: 415 point_areas = [] 416 inds = [body_parts_1.index(x) for x in points] 417 for f_i in range(xy_coord_joints_1.shape[0]): 418 x = xy_coord_joints_1[f_i, inds, 0] 419 y = xy_coord_joints_1[f_i, inds, 1] 420 point_areas.append(self._poly_area(x, y)) 421 areas.append(np.array(point_areas)) 422 areas = np.stack(areas, axis=-1) 423 else: 424 areas = None 425 426 zone_bools = [] 427 for zone, vertex in self.zone_bools: 428 if zone not in self.zone_vertices: 429 raise ValueError(f"The {zone} zone is not in zone_vertices!") 430 if vertex not in body_parts_1: 431 raise ValueError(f"The {vertex} bodypart not in bodyparts!") 432 zone_bool = np.ones((xy_coord_joints_1.shape[0], 1)) 433 vertex_coords = self.get_coords(data_dict, clip1, vertex) 434 for i, x in enumerate(self.zone_vertices[zone]): 435 v1 = self.get_coords(data_dict, clip1, x) 436 next_i = (i + 1) % len(self.zone_vertices[zone]) 437 next_next_i = (i + 2) % len(self.zone_vertices[zone]) 438 v2 = self.get_coords( 439 data_dict, clip1, self.zone_vertices[zone][next_i] 440 ) 441 v3 = self.get_coords( 442 data_dict, clip1, self.zone_vertices[zone][next_next_i] 443 ) 444 v3_above = ( 445 v1[:, 1] 446 + ((v3[:, 0] - v1[:, 0]) / (v2[:, 0] - v1[:, 0] + 1e-7)) 447 * (v2[:, 1] - v1[:, 1]) 448 > v3[:, 1] 449 ) 450 vertex_above = ( 451 v1[:, 1] 452 + ( 453 (vertex_coords[:, 0] - v1[:, 0]) 454 / (v2[:, 0] - v1[:, 0] + 1e-7) 455 ) 456 * (v2[:, 1] - v1[:, 1]) 457 > vertex_coords[:, 1] 458 ) 459 edge_bool = v3_above == vertex_above 460 edge_bool[v2[:, 0] == v1[:, 0]] = ( 461 (vertex_coords[:, 0] > v2[:, 0]) == (v3[:, 0] > v2[:, 0]) 462 )[v2[:, 0] == v1[:, 0]] 463 zone_bool *= np.expand_dims(edge_bool, 1) 464 zone_bools.append(zone_bool) 465 if len(zone_bools) == 0: 466 zone_bools = None 467 else: 468 zone_bools = np.concatenate(zone_bools, axis=1) 469 470 distances = [] 471 for zone, vertex in self.zone_distances: 472 if zone not in self.zone_vertices: 473 raise ValueError(f"The {zone} zone is not in zone_vertices!") 474 if vertex not in body_parts_1: 475 raise ValueError(f"The {vertex} bodypart not in bodyparts!") 476 v0 = self.get_coords(data_dict, clip1, vertex) 477 dd = [] 478 for i, x in enumerate(self.zone_vertices[zone]): 479 v1 = self.get_coords(data_dict, clip1, x) 480 next_i = (i + 1) % len(self.zone_vertices[zone]) 481 v2 = self.get_coords( 482 data_dict, clip1, self.zone_vertices[zone][next_i] 483 ) 484 d = np.abs( 485 (v2[:, 0] - v2[:, 0]) * (v1[:, 1] - v0[:, 1]) 486 - (v1[:, 0] - v0[:, 0]) * (v2[:, 1] - v1[:, 1]) 487 ) / np.sqrt( 488 (v2[:, 0] - v1[:, 0]) ** 2 + (v2[:, 1] - v1[:, 1]) ** 2 + 1e-7 489 ) 490 d[(v2[:, 0] == v1[:, 0]) * (v2[:, 1] == v1[:, 1])] = 0 491 dd.append(d) 492 dd = np.stack(dd, axis=0) 493 dd = np.min(dd, 0) 494 distances.append(dd) 495 if len(distances) == 0: 496 distances = None 497 else: 498 distances = np.stack(distances, axis=1) 499 500 if clip1 != clip2: 501 return intra_distance, xy_coord_joints_1, xy_coord_joints_2, n_frames 502 else: 503 return ( 504 intra_distance, 505 xy_coord_joints_1, 506 n_frames, 507 angle_joints_radian, 508 areas, 509 angles, 510 zone_bools, 511 distances, 512 ) 513 514 def _kinematic_features_pair( 515 self, data_dict: Dict, clip1: str, clip2: str, name: str 516 ) -> Dict: 517 """ 518 Compute features for a pair of clips 519 """ 520 521 if clip1 == clip2: 522 ( 523 intra_distance, 524 xy_coord_joints, 525 n_frames, 526 angle_joints_radian, 527 areas, 528 angles, 529 zone_bools, 530 zone_distances, 531 ) = self._distance(data_dict, clip1, clip2, name) 532 else: 533 ( 534 intra_distance, 535 xy_coord_joints_1, 536 xy_coord_joints_2, 537 n_frames, 538 ) = self._distance(data_dict, clip1, clip2, name) 539 xy_coord_joints = xy_coord_joints_2 - xy_coord_joints_1 540 541 xy_coord_joints = xy_coord_joints.transpose((1, 2, 0)) 542 543 speed_joints = np.diff(xy_coord_joints, axis=-1) 544 speed_joints[xy_coord_joints[..., :-1] == 0] = 0 545 speed_joints = np.insert(speed_joints, 0, speed_joints[:, :, 0], axis=-1) 546 547 # acceleration 548 acc_joints = np.asarray([np.diff(speed_joint) for speed_joint in speed_joints]) 549 acc_joints = np.insert(acc_joints, 0, acc_joints[:, :, 0], axis=-1) 550 acc_joints = np.linalg.norm(acc_joints, axis=1) 551 552 # from matplotlib import pyplot as plt 553 # print(f'{xy_coord_joints.shape=}') 554 # plt.scatter(xy_coord_joints[:, 0, 0], 555 # xy_coord_joints[:, 1, 0]) 556 # plt.xlim(-0.5, 0.5) 557 # plt.ylim(-0.5, 0.5) 558 # plt.show() 559 560 features = {} 561 if "coords" in self.keys: 562 features["coords"] = copy.copy(xy_coord_joints).reshape((-1, n_frames)).T 563 if "center" in self.keys: 564 features["center"] = xy_coord_joints.mean(0).T 565 if "coord_diff" in self.keys: 566 features["coord_diff"] = ( 567 (xy_coord_joints - np.expand_dims(xy_coord_joints.mean(0), 0)) 568 .reshape((-1, n_frames)) 569 .T 570 ) 571 if "intra_distance" in self.keys: 572 features["intra_distance"] = intra_distance 573 if "speed_joints" in self.keys: 574 features["speed_joints"] = speed_joints.reshape((-1, n_frames)).T 575 if "speed_direction" in self.keys or "speed_value" in self.keys: 576 values = np.expand_dims(np.linalg.norm(speed_joints, axis=1), 1) + 1e-7 577 directions = speed_joints / values 578 if "speed_direction" in self.keys: 579 features["speed_direction"] = directions.reshape((-1, n_frames)).T 580 if "speed_value" in self.keys: 581 features["speed_value"] = values.reshape((-1, n_frames)).T 582 if ( 583 "angle_speeds" in self.keys or "angle_joints_radian" in self.keys 584 ) and clip1 == clip2: 585 features["angle_speeds"] = angle_joints_radian 586 if "angles" in self.keys and clip1 == clip2 and self.angle_pairs is not None: 587 features["angles"] = angles 588 if "acc_joints" in self.keys: 589 features["acc_joints"] = acc_joints.T 590 if "areas" in self.keys and clip1 == clip2 and areas is not None: 591 features["areas"] = areas * 10 592 if "zone_bools" in self.keys and clip1 == clip2 and zone_bools is not None: 593 features["zone_bools"] = zone_bools 594 if ( 595 "zone_distances" in self.keys 596 and clip1 == clip2 597 and zone_distances is not None 598 ): 599 features["zone_distances"] = zone_distances 600 if clip1 == clip2 and "likelihood" in self.keys: 601 likelihood = [ 602 self.get_likelihood(data_dict, clip1, bp) for bp in self.get_bodyparts() 603 ] 604 if likelihood[0] is not None: 605 likelihood = np.stack(likelihood, 1) 606 features["likelihood"] = likelihood 607 return features 608 609 def extract_features( 610 self, 611 data_dict: Dict, 612 video_id: str, 613 prefix: str = None, 614 one_clip: bool = False, 615 ) -> Dict: 616 """ 617 Extract features from a data dictionary 618 619 An input store will call this method while pre-computing a dataset. We do not assume a specific 620 structure in the data dictionary, so all necessary information (coordinates of a bodypart, number 621 of frames, list of bodyparts) is inferred using input store methods. 622 623 Parameters 624 ---------- 625 data_dict : dict 626 the data dictionary 627 video_id : str 628 the id of the video associated with the data dictionary 629 prefix : str, optional 630 a prefix for the feature names 631 one_clip : bool, default False 632 if `True`, all features will be concatenated and assigned to one clip named `'all'` 633 634 Returns 635 ------- 636 features : dict 637 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 638 values are numpy arrays of shape `(#features, #frames)` 639 """ 640 641 features = {} 642 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 643 if self.interactive: 644 if one_clip: 645 agents = [keys] 646 else: 647 agents = combinations(keys, 2) 648 else: 649 agents = [[x] for x in keys] 650 for clip_ids in agents: 651 clip_features = {} 652 for clip in clip_ids: 653 single_features = self._kinematic_features_pair( 654 data_dict, clip, clip, video_id 655 ) 656 for key, value in single_features.items(): 657 name = key 658 if prefix is not None or len(clip_ids) > 1: 659 name += "---" 660 if prefix is not None: 661 name += prefix 662 if len(clip_ids) > 1: 663 name += clip 664 clip_features[name] = single_features[key] 665 if len(clip_ids) > 1 and "inter_distance" in self.keys: 666 for clip1, clip2 in combinations(clip_ids, 2): 667 distance, *_ = self._distance( 668 data_dict, clip1, clip2, video_id, centroid=True 669 ) 670 name = "inter_distance---" 671 if prefix is not None: 672 name += prefix 673 name += f"{clip1}+{clip2}" 674 clip_features[name] = distance 675 if one_clip: 676 combo_name = "all" 677 else: 678 combo_name = "+".join(map(str, clip_ids)) 679 features[video_id + "---" + combo_name] = clip_features 680 if self.neighboring_frames != 0: 681 for key in features.keys(): 682 for clip_key in features[key].keys(): 683 new_feature = [] 684 for i in range( 685 self.neighboring_frames + 1, 686 features[key][clip_key].shape[0] - self.neighboring_frames, 687 ): 688 new_feature.append( 689 features[key][clip_key][ 690 i 691 - self.neighboring_frames : i 692 + self.neighboring_frames, 693 :, 694 ].flatten() 695 ) 696 features[key][clip_key] = np.stack(new_feature, axis=0) 697 return features
30class FeatureExtractor(ABC): 31 """ 32 The base class for feature extractors 33 34 The `extract_features` method receives a data dictionary as input. 35 We do not assume a specific 36 structure in the values and all necessary information (coordinates of a bodypart, number 37 of frames, list of bodyparts) is inferred using input store methods. Therefore, each child class 38 of `FeatureExtractor` is written for a specific subclass of `dlc2action.data.base_Store.InputStore` 39 with the data inference 40 functions defined (i.e. `dlc2action.data.base_store.PoseInputStore`). 41 """ 42 43 input_store_class = None 44 """ 45 The `dlc2action.data.base_Store.InputStore` child class paired with this feature extractor 46 """ 47 48 @abstractmethod 49 def __init__(self, ignored_clips: List = None, **kwargs): 50 """ 51 Parameters 52 ---------- 53 ignored_clips : list 54 a list of string names of clip ids to ignore 55 """ 56 57 @abstractmethod 58 def extract_features( 59 self, data_dict: Dict, video_id: str, one_clip: bool = False 60 ) -> Dict: 61 """ 62 Extract features from a data dictionary 63 64 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 65 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 66 and clip ids. We do not assume a specific 67 structure in the values, so all necessary information (coordinates of a bodypart, number 68 of frames, list of bodyparts) is inferred using input store methods. 69 70 Parameters 71 ---------- 72 data_dict : dict 73 the data dictionary 74 video_id : str 75 the id of the video associated with the data dictionary 76 one_clip : bool, default False 77 if `True`, all features will be concatenated and assigned to one clip named `'all'` 78 79 Returns 80 ------- 81 features : dict 82 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 83 values are numpy arrays of shape `(#features, ..., #frames)` 84 """
The base class for feature extractors
The extract_features
method receives a data dictionary as input.
We do not assume a specific
structure in the values and all necessary information (coordinates of a bodypart, number
of frames, list of bodyparts) is inferred using input store methods. Therefore, each child class
of FeatureExtractor
is written for a specific subclass of dlc2action.data.base_Store.InputStore
with the data inference
functions defined (i.e. dlc2action.data.base_store.PoseInputStore
).
48 @abstractmethod 49 def __init__(self, ignored_clips: List = None, **kwargs): 50 """ 51 Parameters 52 ---------- 53 ignored_clips : list 54 a list of string names of clip ids to ignore 55 """
Parameters
ignored_clips : list a list of string names of clip ids to ignore
The dlc2action.data.base_Store.InputStore
child class paired with this feature extractor
57 @abstractmethod 58 def extract_features( 59 self, data_dict: Dict, video_id: str, one_clip: bool = False 60 ) -> Dict: 61 """ 62 Extract features from a data dictionary 63 64 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 65 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 66 and clip ids. We do not assume a specific 67 structure in the values, so all necessary information (coordinates of a bodypart, number 68 of frames, list of bodyparts) is inferred using input store methods. 69 70 Parameters 71 ---------- 72 data_dict : dict 73 the data dictionary 74 video_id : str 75 the id of the video associated with the data dictionary 76 one_clip : bool, default False 77 if `True`, all features will be concatenated and assigned to one clip named `'all'` 78 79 Returns 80 ------- 81 features : dict 82 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 83 values are numpy arrays of shape `(#features, ..., #frames)` 84 """
Extract features from a data dictionary
An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one
video id and have clip ids as keys. Read the documentation at dlc2action.data
to find out more about video
and clip ids. We do not assume a specific
structure in the values, so all necessary information (coordinates of a bodypart, number
of frames, list of bodyparts) is inferred using input store methods.
Parameters
data_dict : dict
the data dictionary
video_id : str
the id of the video associated with the data dictionary
one_clip : bool, default False
if True
, all features will be concatenated and assigned to one clip named 'all'
Returns
features : dict
a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the
values are numpy arrays of shape (#features, ..., #frames)
87class PoseFeatureExtractor(FeatureExtractor): 88 """ 89 The base class for pose feature extractors 90 91 Pose feature extractors work with `dlc2action.data.base_store.InputStore` instances 92 that inherit from `dlc2action.data.base_store.PoseInputStore`. 93 """ 94 95 input_store_class = PoseInputStore 96 97 def __init__(self, input_store: PoseInputStore, *args, **kwargs): 98 """ 99 Parameters 100 ---------- 101 input_store : PoseInputStore 102 the input store object 103 """ 104 105 self.get_bodyparts = input_store.get_bodyparts 106 self.get_coords = input_store.get_coords 107 self.get_n_frames = input_store.get_n_frames 108 self.get_likelihood = input_store.get_likelihood
The base class for pose feature extractors
Pose feature extractors work with dlc2action.data.base_store.InputStore
instances
that inherit from dlc2action.data.base_store.PoseInputStore
.
97 def __init__(self, input_store: PoseInputStore, *args, **kwargs): 98 """ 99 Parameters 100 ---------- 101 input_store : PoseInputStore 102 the input store object 103 """ 104 105 self.get_bodyparts = input_store.get_bodyparts 106 self.get_coords = input_store.get_coords 107 self.get_n_frames = input_store.get_n_frames 108 self.get_likelihood = input_store.get_likelihood
Parameters
input_store : PoseInputStore the input store object
Inherited Members
617class PoseInputStore(InputStore): 618 """ 619 A subclass of InputStore for pose estimation data 620 621 Contains methods used by pose estimation feature extractors. 622 All methods receive a data dictionary as input. This dictionary is the same as what is passed to the 623 feature extractor and the only limitations for the structure are that it has to relate to one video id 624 and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about videos 625 and clips. 626 """ 627 628 def get_likelihood( 629 self, data_dict: Dict, clip_id: str, bodypart: str 630 ) -> Union[np.ndarray, None]: 631 """ 632 Get the likelihood values 633 634 Parameters 635 ---------- 636 data_dict : dict 637 the data dictionary 638 clip_id : str 639 the clip id 640 bodypart : str 641 the name of the body part 642 643 Returns 644 ------- 645 likelihoods: np.ndarrray | None 646 `None` if the dataset doesn't have likelihoods or an array of shape (#timestamps) 647 """ 648 649 return None 650 651 @abstractmethod 652 def get_coords(self, data_dict: Dict, clip_id: str, bodypart: str) -> np.ndarray: 653 """ 654 Get the coordinates array of a specific body part in a specific clip 655 656 Parameters 657 ---------- 658 data_dict : dict 659 the data dictionary 660 clip_id : str 661 the clip id 662 bodypart : str 663 the name of the body part 664 665 Returns 666 ------- 667 coords : np.ndarray 668 the coordinates array of shape (#timesteps, #coordinates) 669 """ 670 671 @abstractmethod 672 def get_bodyparts(self) -> List: 673 """ 674 Get a list of bodypart names 675 676 Returns 677 ------- 678 bodyparts : list 679 a list of string or integer body part names 680 """ 681 682 @abstractmethod 683 def get_n_frames(self, data_dict: Dict, clip_id: str) -> int: 684 """ 685 Get the length of the clip 686 687 Parameters 688 ---------- 689 data_dict : dict 690 the data dictionary 691 clip_id : str 692 the clip id 693 694 Returns 695 ------- 696 n_frames : int 697 the length of the clip 698 """
The dlc2action.data.base_Store.InputStore
child class paired with this feature extractor
Inherited Members
143class KinematicExtractor(PoseFeatureExtractor): 144 """ 145 A feature extractor for basic kinematic features: speeds, accelerations, distances. 146 147 The available keys are: 148 - coords: the allocentric bodypart coordinates, 149 - coord_diff: the egocentric bodypart coordinates, 150 - center: the body center (mean of bodyparts) coordinates, 151 - intra_distance: distances between bodyparts (pairs set in `distance_pairs` or all combinations by default), 152 - inter_distance: computed in interactive mode (for pairs of animals); distances from each bodypart of each animal to the centroid between them, 153 - speed_direction: unit vector of speed approximation for each bodypart, 154 - speed_value: l2 norm of the speed approximation vector for each bodypart, 155 - acc_joints: l2 norm of the acceleration approximation vector for each bodypart, 156 - angle_speeds: vector of angle speed approximation for each bodypart, 157 - angles: cosines of angles set in `angle_pairs`, 158 - areas: areas of polygons set in `area_vertices`, 159 - zone_bools: binary identifier of zone visitation, defined in `zone_bools`, 160 - zone_distances: distance to zone boundary, defined in `zone_distances'`, 161 - likelihood: pose estimation likelihood (if known). 162 163 The default set is `{coord_diff, center, intra_distance, inter_distance, speed_direction, speed_value, acc_joints, angle_speeds}` 164 """ 165 166 def __init__( 167 self, 168 input_store: PoseInputStore, 169 keys: List = None, 170 ignored_clips: List = None, 171 interactive: bool = False, 172 averaging_window: int = 1, 173 distance_pairs: List = None, 174 angle_pairs: List = None, 175 neighboring_frames: int = 0, 176 area_vertices: List = None, 177 zone_vertices: Dict = None, 178 zone_bools: List = None, 179 zone_distances: List = None, 180 *args, 181 **kwargs, 182 ) -> None: 183 """ 184 Parameters 185 ---------- 186 input_store : PoseInputStore 187 the input store object 188 keys : list, optional 189 a list of names of the features to extract 190 ignored_clips : list, optional 191 a list of clip ids to ignore 192 interactive : bool, default False 193 if `True`, features for pairs of clips will be computed 194 averaging_window : int, default 1 195 if >1, features are averaged with a moving window of this size (in frames) 196 distance_pairs : list, optional 197 a list of bodypart name tuples (e.g. `[("tail", "nose")]`) to compute distances for when `"intra_distance"` 198 is in `keys` (by default all distances are computed) 199 angle_pairs : list, optional 200 a list of bodypart name tuples (e.g. `[("ear1", "nose", "ear2")]`) for the angle between `"ear1"--"nose"` and 201 `"nose"--"ear2"` lines) to compute angle cosines for when `"angles"` is in `keys` (by default no angles are computed) 202 neighboring_frames : int, default 0 203 if >0, this number of neighboring frames is aggregated in the center frame features (generally not recommended) 204 area_vertices : list, optional 205 a list of bodypart name tuples of any length >= 3 (e.g. `[("ear1", "nose", "ear2", "spine1")]`) that define polygons 206 to compute areas for when `"areas"` is in `keys` (by default no areas are computed) 207 zone_vertices : dict, optional 208 a dictionary of bodypart name tuples of any length >= 3 that define zones for `"zone_bools"`and `"zone_distances"` 209 featyres; keys should be zone names and values should be tuples that define the polygons (e.g. 210 `{"main_area": ("x_min", "x_max", "y_max", "y_min"))}`) 211 zone_bools : list, optional 212 a list of zone and bodypart name tuples to compute binary identifiers for (1 if an animal is within the polygon or 213 0 if it's outside) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 214 this is only computed if `"zone_bools"` is in `keys` 215 zone_distances : list, optional 216 a list of zone and bodypart name tuples to compute distances for (distance from the bodypart to the closest of the 217 boundaries) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 218 this is only computed if `"zone_distances"` is in `keys` 219 """ 220 221 if keys is None: 222 keys = [ 223 "coord_diff", 224 "center", 225 "intra_distance", 226 "speed_direction", 227 "speed_value", 228 "angle_speeds", 229 "acc_joints", 230 "inter_distance", 231 ] 232 if ignored_clips is None: 233 ignored_clips = [] 234 if zone_vertices is None: 235 zone_vertices = {} 236 if zone_bools is None: 237 zone_bools = [] 238 if zone_distances is None: 239 zone_distances = [] 240 self.keys = keys 241 self.ignored_clips = ignored_clips 242 self.interactive = interactive 243 self.w = averaging_window 244 self.distance_pairs = distance_pairs 245 self.angle_pairs = angle_pairs 246 self.area_vertices = area_vertices 247 self.neighboring_frames = int(neighboring_frames) 248 self.zone_vertices = zone_vertices 249 self.zone_bools = zone_bools 250 self.zone_distances = zone_distances 251 super().__init__(input_store) 252 253 def _angle_speed(self, xy_coord_joint: np.array, n_frames: int) -> np.array: 254 """ 255 Compute the angle speed 256 """ 257 258 if xy_coord_joint.shape[1] == 2: 259 x_diff = np.diff(xy_coord_joint[:, 0]) 260 y_diff = np.diff(xy_coord_joint[:, 1]) 261 x_diff[xy_coord_joint[:-1, 0] == 0] = 0 262 y_diff[xy_coord_joint[:-1, 1] == 0] = 0 263 angle_dir_radians = [ 264 math.atan2(y_diff[i], x_diff[i]) for i in range(n_frames - 1) 265 ] 266 angle_dir_radians = np.insert( 267 angle_dir_radians, 0, angle_dir_radians[0], axis=0 268 ) 269 else: 270 x_diff = np.diff(xy_coord_joint[:, 0]) 271 y_diff = np.diff(xy_coord_joint[:, 1]) 272 z_diff = np.diff(xy_coord_joint[:, 2]) 273 x_diff[xy_coord_joint[:-1, 0] == 0] = 0 274 y_diff[xy_coord_joint[:-1, 1] == 0] = 0 275 y_diff[xy_coord_joint[:-1, 2] == 0] = 0 276 angle_dir_radians = [] 277 for x, y in combinations([x_diff, y_diff, z_diff], 2): 278 radians = [math.atan2(x[i], y[i]) for i in range(n_frames - 1)] 279 radians = np.insert(radians, 0, radians[0], axis=0) 280 angle_dir_radians.append(radians) 281 angle_dir_radians = np.concatenate(angle_dir_radians) 282 283 return angle_dir_radians 284 285 def _poly_area(self, x, y): 286 return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) 287 288 def _cdist_keep_zeros(self, a: np.array, b: np.array) -> np.array: 289 """ 290 Compute all distance combinations while setting the distance to zero if at least one of the elements is at zero 291 """ 292 293 dist = cdist(a, b, "euclidean") 294 a_zero = np.sum(a == 0, axis=1) > 0 295 b_zero = np.sum(b == 0, axis=1) > 0 296 dist[a_zero, :] = 0 297 dist[:, a_zero] = 0 298 dist[b_zero, :] = 0 299 dist[:, b_zero] = 0 300 return dist 301 302 def _distance( 303 self, data_dict: Dict, clip1: str, clip2: str, name: str, centroid: bool = False 304 ) -> Tuple: 305 """ 306 Compute the distances between all keypoints 307 """ 308 309 if not isinstance(clip1, list): 310 body_parts_1 = self.get_bodyparts() 311 else: 312 body_parts_1 = clip1 313 n_body_parts = len(body_parts_1) 314 body_parts_2 = self.get_bodyparts() 315 n_frames = self.get_n_frames(data_dict, clip1) 316 if n_frames != self.get_n_frames(data_dict, clip2): 317 raise RuntimeError( 318 f"The numbers of frames for {clip1} and {clip2} are not equal at {name}!" 319 ) 320 321 # joint distances for single agent 322 upper_indices = np.triu_indices(n_body_parts, 1) 323 324 xy_coord_joints_1 = np.stack( 325 [self.get_coords(data_dict, clip1, bp) for bp in body_parts_1], axis=1 326 ) 327 if self.w > 1: 328 for i in range(xy_coord_joints_1.shape[0]): 329 for j in range(xy_coord_joints_1.shape[1]): 330 xy_coord_joints_1[i, j, :] = np.convolve( 331 xy_coord_joints_1[i, j, :], (1 / self.w) * np.ones(self.w) 332 )[self.w // 2 : -self.w // 2 + (self.w + 1) % 2] 333 if clip1 != clip2: 334 xy_coord_joints_2 = np.stack( 335 [self.get_coords(data_dict, clip2, bp) for bp in body_parts_2], axis=1 336 ) 337 if self.w > 1: 338 for i in range(xy_coord_joints_2.shape[0]): 339 for j in range(xy_coord_joints_2.shape[1]): 340 xy_coord_joints_2[i, j, :] = np.convolve( 341 xy_coord_joints_2[i, j, :], (1 / self.w) * np.ones(self.w) 342 )[self.w // 2 : -self.w // 2 + (self.w + 1) % 2] 343 else: 344 xy_coord_joints_2 = copy.copy(xy_coord_joints_1) 345 346 if clip1 != clip2 and centroid: 347 centroid_1 = np.expand_dims(np.mean(xy_coord_joints_1, axis=1), 1) 348 distance_1 = np.linalg.norm(xy_coord_joints_2 - centroid_1, axis=-1) 349 centroid_2 = np.expand_dims(np.mean(xy_coord_joints_2, axis=1), 1) 350 distance_2 = np.linalg.norm(xy_coord_joints_1 - centroid_2, axis=-1) 351 intra_distance = np.concatenate([distance_1, distance_2], axis=-1) 352 else: 353 if self.distance_pairs is None: 354 n_distances = n_body_parts * (n_body_parts - 1) // 2 355 intra_distance = np.asarray( 356 [ 357 self._cdist_keep_zeros( 358 xy_coord_joints_1[i], xy_coord_joints_2[i] 359 )[upper_indices].reshape(-1, n_distances) 360 for i in range(n_frames) 361 ] 362 ).reshape(n_frames, n_distances) 363 else: 364 intra_distance = [] 365 for x, y in self.distance_pairs: 366 x_ind = body_parts_1.index(x) 367 y_ind = body_parts_1.index(y) 368 intra_distance.append( 369 np.sqrt( 370 np.sum( 371 ( 372 xy_coord_joints_1[:, x_ind, :] 373 - xy_coord_joints_1[:, y_ind, :] 374 ) 375 ** 2, 376 axis=1, 377 ) 378 ) 379 ) 380 intra_distance = np.stack(intra_distance, axis=1) 381 382 if clip1 == clip2: 383 angle_joints_radian = np.stack( 384 [ 385 self._angle_speed(xy_coord_joints_1[:, i, :], n_frames) 386 for i in range(xy_coord_joints_1.shape[1]) 387 ], 388 axis=1, 389 ) 390 if self.angle_pairs is None: 391 angles = None 392 else: 393 angles = [] 394 for x0, x1, y0, y1 in self.angle_pairs: 395 x0_ind = body_parts_1.index(x0) 396 x1_ind = body_parts_1.index(x1) 397 y0_ind = body_parts_1.index(y0) 398 y1_ind = body_parts_1.index(y1) 399 diff_x = ( 400 xy_coord_joints_1[:, x0_ind, :] 401 - xy_coord_joints_1[:, x1_ind, :] 402 ) 403 diff_y = ( 404 xy_coord_joints_1[:, y0_ind, :] 405 - xy_coord_joints_1[:, y1_ind, :] 406 ) 407 dist_x = np.linalg.norm(diff_x, axis=-1) 408 dist_y = np.linalg.norm(diff_y, axis=-1) 409 denom = dist_x * dist_y + 1e-7 410 mult = np.einsum("ij,ij->i", diff_x, diff_y) 411 angles.append(mult / denom) 412 angles = np.stack(angles, axis=1) 413 if self.area_vertices is not None: 414 areas = [] 415 for points in self.area_vertices: 416 point_areas = [] 417 inds = [body_parts_1.index(x) for x in points] 418 for f_i in range(xy_coord_joints_1.shape[0]): 419 x = xy_coord_joints_1[f_i, inds, 0] 420 y = xy_coord_joints_1[f_i, inds, 1] 421 point_areas.append(self._poly_area(x, y)) 422 areas.append(np.array(point_areas)) 423 areas = np.stack(areas, axis=-1) 424 else: 425 areas = None 426 427 zone_bools = [] 428 for zone, vertex in self.zone_bools: 429 if zone not in self.zone_vertices: 430 raise ValueError(f"The {zone} zone is not in zone_vertices!") 431 if vertex not in body_parts_1: 432 raise ValueError(f"The {vertex} bodypart not in bodyparts!") 433 zone_bool = np.ones((xy_coord_joints_1.shape[0], 1)) 434 vertex_coords = self.get_coords(data_dict, clip1, vertex) 435 for i, x in enumerate(self.zone_vertices[zone]): 436 v1 = self.get_coords(data_dict, clip1, x) 437 next_i = (i + 1) % len(self.zone_vertices[zone]) 438 next_next_i = (i + 2) % len(self.zone_vertices[zone]) 439 v2 = self.get_coords( 440 data_dict, clip1, self.zone_vertices[zone][next_i] 441 ) 442 v3 = self.get_coords( 443 data_dict, clip1, self.zone_vertices[zone][next_next_i] 444 ) 445 v3_above = ( 446 v1[:, 1] 447 + ((v3[:, 0] - v1[:, 0]) / (v2[:, 0] - v1[:, 0] + 1e-7)) 448 * (v2[:, 1] - v1[:, 1]) 449 > v3[:, 1] 450 ) 451 vertex_above = ( 452 v1[:, 1] 453 + ( 454 (vertex_coords[:, 0] - v1[:, 0]) 455 / (v2[:, 0] - v1[:, 0] + 1e-7) 456 ) 457 * (v2[:, 1] - v1[:, 1]) 458 > vertex_coords[:, 1] 459 ) 460 edge_bool = v3_above == vertex_above 461 edge_bool[v2[:, 0] == v1[:, 0]] = ( 462 (vertex_coords[:, 0] > v2[:, 0]) == (v3[:, 0] > v2[:, 0]) 463 )[v2[:, 0] == v1[:, 0]] 464 zone_bool *= np.expand_dims(edge_bool, 1) 465 zone_bools.append(zone_bool) 466 if len(zone_bools) == 0: 467 zone_bools = None 468 else: 469 zone_bools = np.concatenate(zone_bools, axis=1) 470 471 distances = [] 472 for zone, vertex in self.zone_distances: 473 if zone not in self.zone_vertices: 474 raise ValueError(f"The {zone} zone is not in zone_vertices!") 475 if vertex not in body_parts_1: 476 raise ValueError(f"The {vertex} bodypart not in bodyparts!") 477 v0 = self.get_coords(data_dict, clip1, vertex) 478 dd = [] 479 for i, x in enumerate(self.zone_vertices[zone]): 480 v1 = self.get_coords(data_dict, clip1, x) 481 next_i = (i + 1) % len(self.zone_vertices[zone]) 482 v2 = self.get_coords( 483 data_dict, clip1, self.zone_vertices[zone][next_i] 484 ) 485 d = np.abs( 486 (v2[:, 0] - v2[:, 0]) * (v1[:, 1] - v0[:, 1]) 487 - (v1[:, 0] - v0[:, 0]) * (v2[:, 1] - v1[:, 1]) 488 ) / np.sqrt( 489 (v2[:, 0] - v1[:, 0]) ** 2 + (v2[:, 1] - v1[:, 1]) ** 2 + 1e-7 490 ) 491 d[(v2[:, 0] == v1[:, 0]) * (v2[:, 1] == v1[:, 1])] = 0 492 dd.append(d) 493 dd = np.stack(dd, axis=0) 494 dd = np.min(dd, 0) 495 distances.append(dd) 496 if len(distances) == 0: 497 distances = None 498 else: 499 distances = np.stack(distances, axis=1) 500 501 if clip1 != clip2: 502 return intra_distance, xy_coord_joints_1, xy_coord_joints_2, n_frames 503 else: 504 return ( 505 intra_distance, 506 xy_coord_joints_1, 507 n_frames, 508 angle_joints_radian, 509 areas, 510 angles, 511 zone_bools, 512 distances, 513 ) 514 515 def _kinematic_features_pair( 516 self, data_dict: Dict, clip1: str, clip2: str, name: str 517 ) -> Dict: 518 """ 519 Compute features for a pair of clips 520 """ 521 522 if clip1 == clip2: 523 ( 524 intra_distance, 525 xy_coord_joints, 526 n_frames, 527 angle_joints_radian, 528 areas, 529 angles, 530 zone_bools, 531 zone_distances, 532 ) = self._distance(data_dict, clip1, clip2, name) 533 else: 534 ( 535 intra_distance, 536 xy_coord_joints_1, 537 xy_coord_joints_2, 538 n_frames, 539 ) = self._distance(data_dict, clip1, clip2, name) 540 xy_coord_joints = xy_coord_joints_2 - xy_coord_joints_1 541 542 xy_coord_joints = xy_coord_joints.transpose((1, 2, 0)) 543 544 speed_joints = np.diff(xy_coord_joints, axis=-1) 545 speed_joints[xy_coord_joints[..., :-1] == 0] = 0 546 speed_joints = np.insert(speed_joints, 0, speed_joints[:, :, 0], axis=-1) 547 548 # acceleration 549 acc_joints = np.asarray([np.diff(speed_joint) for speed_joint in speed_joints]) 550 acc_joints = np.insert(acc_joints, 0, acc_joints[:, :, 0], axis=-1) 551 acc_joints = np.linalg.norm(acc_joints, axis=1) 552 553 # from matplotlib import pyplot as plt 554 # print(f'{xy_coord_joints.shape=}') 555 # plt.scatter(xy_coord_joints[:, 0, 0], 556 # xy_coord_joints[:, 1, 0]) 557 # plt.xlim(-0.5, 0.5) 558 # plt.ylim(-0.5, 0.5) 559 # plt.show() 560 561 features = {} 562 if "coords" in self.keys: 563 features["coords"] = copy.copy(xy_coord_joints).reshape((-1, n_frames)).T 564 if "center" in self.keys: 565 features["center"] = xy_coord_joints.mean(0).T 566 if "coord_diff" in self.keys: 567 features["coord_diff"] = ( 568 (xy_coord_joints - np.expand_dims(xy_coord_joints.mean(0), 0)) 569 .reshape((-1, n_frames)) 570 .T 571 ) 572 if "intra_distance" in self.keys: 573 features["intra_distance"] = intra_distance 574 if "speed_joints" in self.keys: 575 features["speed_joints"] = speed_joints.reshape((-1, n_frames)).T 576 if "speed_direction" in self.keys or "speed_value" in self.keys: 577 values = np.expand_dims(np.linalg.norm(speed_joints, axis=1), 1) + 1e-7 578 directions = speed_joints / values 579 if "speed_direction" in self.keys: 580 features["speed_direction"] = directions.reshape((-1, n_frames)).T 581 if "speed_value" in self.keys: 582 features["speed_value"] = values.reshape((-1, n_frames)).T 583 if ( 584 "angle_speeds" in self.keys or "angle_joints_radian" in self.keys 585 ) and clip1 == clip2: 586 features["angle_speeds"] = angle_joints_radian 587 if "angles" in self.keys and clip1 == clip2 and self.angle_pairs is not None: 588 features["angles"] = angles 589 if "acc_joints" in self.keys: 590 features["acc_joints"] = acc_joints.T 591 if "areas" in self.keys and clip1 == clip2 and areas is not None: 592 features["areas"] = areas * 10 593 if "zone_bools" in self.keys and clip1 == clip2 and zone_bools is not None: 594 features["zone_bools"] = zone_bools 595 if ( 596 "zone_distances" in self.keys 597 and clip1 == clip2 598 and zone_distances is not None 599 ): 600 features["zone_distances"] = zone_distances 601 if clip1 == clip2 and "likelihood" in self.keys: 602 likelihood = [ 603 self.get_likelihood(data_dict, clip1, bp) for bp in self.get_bodyparts() 604 ] 605 if likelihood[0] is not None: 606 likelihood = np.stack(likelihood, 1) 607 features["likelihood"] = likelihood 608 return features 609 610 def extract_features( 611 self, 612 data_dict: Dict, 613 video_id: str, 614 prefix: str = None, 615 one_clip: bool = False, 616 ) -> Dict: 617 """ 618 Extract features from a data dictionary 619 620 An input store will call this method while pre-computing a dataset. We do not assume a specific 621 structure in the data dictionary, so all necessary information (coordinates of a bodypart, number 622 of frames, list of bodyparts) is inferred using input store methods. 623 624 Parameters 625 ---------- 626 data_dict : dict 627 the data dictionary 628 video_id : str 629 the id of the video associated with the data dictionary 630 prefix : str, optional 631 a prefix for the feature names 632 one_clip : bool, default False 633 if `True`, all features will be concatenated and assigned to one clip named `'all'` 634 635 Returns 636 ------- 637 features : dict 638 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 639 values are numpy arrays of shape `(#features, #frames)` 640 """ 641 642 features = {} 643 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 644 if self.interactive: 645 if one_clip: 646 agents = [keys] 647 else: 648 agents = combinations(keys, 2) 649 else: 650 agents = [[x] for x in keys] 651 for clip_ids in agents: 652 clip_features = {} 653 for clip in clip_ids: 654 single_features = self._kinematic_features_pair( 655 data_dict, clip, clip, video_id 656 ) 657 for key, value in single_features.items(): 658 name = key 659 if prefix is not None or len(clip_ids) > 1: 660 name += "---" 661 if prefix is not None: 662 name += prefix 663 if len(clip_ids) > 1: 664 name += clip 665 clip_features[name] = single_features[key] 666 if len(clip_ids) > 1 and "inter_distance" in self.keys: 667 for clip1, clip2 in combinations(clip_ids, 2): 668 distance, *_ = self._distance( 669 data_dict, clip1, clip2, video_id, centroid=True 670 ) 671 name = "inter_distance---" 672 if prefix is not None: 673 name += prefix 674 name += f"{clip1}+{clip2}" 675 clip_features[name] = distance 676 if one_clip: 677 combo_name = "all" 678 else: 679 combo_name = "+".join(map(str, clip_ids)) 680 features[video_id + "---" + combo_name] = clip_features 681 if self.neighboring_frames != 0: 682 for key in features.keys(): 683 for clip_key in features[key].keys(): 684 new_feature = [] 685 for i in range( 686 self.neighboring_frames + 1, 687 features[key][clip_key].shape[0] - self.neighboring_frames, 688 ): 689 new_feature.append( 690 features[key][clip_key][ 691 i 692 - self.neighboring_frames : i 693 + self.neighboring_frames, 694 :, 695 ].flatten() 696 ) 697 features[key][clip_key] = np.stack(new_feature, axis=0) 698 return features
A feature extractor for basic kinematic features: speeds, accelerations, distances.
The available keys are:
- coords: the allocentric bodypart coordinates,
- coord_diff: the egocentric bodypart coordinates,
- center: the body center (mean of bodyparts) coordinates,
- intra_distance: distances between bodyparts (pairs set in distance_pairs
or all combinations by default),
- inter_distance: computed in interactive mode (for pairs of animals); distances from each bodypart of each animal to the centroid between them,
- speed_direction: unit vector of speed approximation for each bodypart,
- speed_value: l2 norm of the speed approximation vector for each bodypart,
- acc_joints: l2 norm of the acceleration approximation vector for each bodypart,
- angle_speeds: vector of angle speed approximation for each bodypart,
- angles: cosines of angles set in angle_pairs
,
- areas: areas of polygons set in area_vertices
,
- zone_bools: binary identifier of zone visitation, defined in zone_bools
,
- zone_distances: distance to zone boundary, defined in zone_distances'
,
- likelihood: pose estimation likelihood (if known).
The default set is {coord_diff, center, intra_distance, inter_distance, speed_direction, speed_value, acc_joints, angle_speeds}
166 def __init__( 167 self, 168 input_store: PoseInputStore, 169 keys: List = None, 170 ignored_clips: List = None, 171 interactive: bool = False, 172 averaging_window: int = 1, 173 distance_pairs: List = None, 174 angle_pairs: List = None, 175 neighboring_frames: int = 0, 176 area_vertices: List = None, 177 zone_vertices: Dict = None, 178 zone_bools: List = None, 179 zone_distances: List = None, 180 *args, 181 **kwargs, 182 ) -> None: 183 """ 184 Parameters 185 ---------- 186 input_store : PoseInputStore 187 the input store object 188 keys : list, optional 189 a list of names of the features to extract 190 ignored_clips : list, optional 191 a list of clip ids to ignore 192 interactive : bool, default False 193 if `True`, features for pairs of clips will be computed 194 averaging_window : int, default 1 195 if >1, features are averaged with a moving window of this size (in frames) 196 distance_pairs : list, optional 197 a list of bodypart name tuples (e.g. `[("tail", "nose")]`) to compute distances for when `"intra_distance"` 198 is in `keys` (by default all distances are computed) 199 angle_pairs : list, optional 200 a list of bodypart name tuples (e.g. `[("ear1", "nose", "ear2")]`) for the angle between `"ear1"--"nose"` and 201 `"nose"--"ear2"` lines) to compute angle cosines for when `"angles"` is in `keys` (by default no angles are computed) 202 neighboring_frames : int, default 0 203 if >0, this number of neighboring frames is aggregated in the center frame features (generally not recommended) 204 area_vertices : list, optional 205 a list of bodypart name tuples of any length >= 3 (e.g. `[("ear1", "nose", "ear2", "spine1")]`) that define polygons 206 to compute areas for when `"areas"` is in `keys` (by default no areas are computed) 207 zone_vertices : dict, optional 208 a dictionary of bodypart name tuples of any length >= 3 that define zones for `"zone_bools"`and `"zone_distances"` 209 featyres; keys should be zone names and values should be tuples that define the polygons (e.g. 210 `{"main_area": ("x_min", "x_max", "y_max", "y_min"))}`) 211 zone_bools : list, optional 212 a list of zone and bodypart name tuples to compute binary identifiers for (1 if an animal is within the polygon or 213 0 if it's outside) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 214 this is only computed if `"zone_bools"` is in `keys` 215 zone_distances : list, optional 216 a list of zone and bodypart name tuples to compute distances for (distance from the bodypart to the closest of the 217 boundaries) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 218 this is only computed if `"zone_distances"` is in `keys` 219 """ 220 221 if keys is None: 222 keys = [ 223 "coord_diff", 224 "center", 225 "intra_distance", 226 "speed_direction", 227 "speed_value", 228 "angle_speeds", 229 "acc_joints", 230 "inter_distance", 231 ] 232 if ignored_clips is None: 233 ignored_clips = [] 234 if zone_vertices is None: 235 zone_vertices = {} 236 if zone_bools is None: 237 zone_bools = [] 238 if zone_distances is None: 239 zone_distances = [] 240 self.keys = keys 241 self.ignored_clips = ignored_clips 242 self.interactive = interactive 243 self.w = averaging_window 244 self.distance_pairs = distance_pairs 245 self.angle_pairs = angle_pairs 246 self.area_vertices = area_vertices 247 self.neighboring_frames = int(neighboring_frames) 248 self.zone_vertices = zone_vertices 249 self.zone_bools = zone_bools 250 self.zone_distances = zone_distances 251 super().__init__(input_store)
Parameters
input_store : PoseInputStore
the input store object
keys : list, optional
a list of names of the features to extract
ignored_clips : list, optional
a list of clip ids to ignore
interactive : bool, default False
if True
, features for pairs of clips will be computed
averaging_window : int, default 1
if >1, features are averaged with a moving window of this size (in frames)
distance_pairs : list, optional
a list of bodypart name tuples (e.g. [("tail", "nose")]
) to compute distances for when "intra_distance"
is in keys
(by default all distances are computed)
angle_pairs : list, optional
a list of bodypart name tuples (e.g. [("ear1", "nose", "ear2")]
) for the angle between "ear1"--"nose"
and
"nose"--"ear2"
lines) to compute angle cosines for when "angles"
is in keys
(by default no angles are computed)
neighboring_frames : int, default 0
if >0, this number of neighboring frames is aggregated in the center frame features (generally not recommended)
area_vertices : list, optional
a list of bodypart name tuples of any length >= 3 (e.g. [("ear1", "nose", "ear2", "spine1")]
) that define polygons
to compute areas for when "areas"
is in keys
(by default no areas are computed)
zone_vertices : dict, optional
a dictionary of bodypart name tuples of any length >= 3 that define zones for "zone_bools"
and "zone_distances"
featyres; keys should be zone names and values should be tuples that define the polygons (e.g.
{"main_area": ("x_min", "x_max", "y_max", "y_min"))}
)
zone_bools : list, optional
a list of zone and bodypart name tuples to compute binary identifiers for (1 if an animal is within the polygon or
0 if it's outside) (e.g. [("main_area", "nose")]
); the zones should be defined in the zone_vertices
parameter;
this is only computed if "zone_bools"
is in keys
zone_distances : list, optional
a list of zone and bodypart name tuples to compute distances for (distance from the bodypart to the closest of the
boundaries) (e.g. [("main_area", "nose")]
); the zones should be defined in the zone_vertices
parameter;
this is only computed if "zone_distances"
is in keys
610 def extract_features( 611 self, 612 data_dict: Dict, 613 video_id: str, 614 prefix: str = None, 615 one_clip: bool = False, 616 ) -> Dict: 617 """ 618 Extract features from a data dictionary 619 620 An input store will call this method while pre-computing a dataset. We do not assume a specific 621 structure in the data dictionary, so all necessary information (coordinates of a bodypart, number 622 of frames, list of bodyparts) is inferred using input store methods. 623 624 Parameters 625 ---------- 626 data_dict : dict 627 the data dictionary 628 video_id : str 629 the id of the video associated with the data dictionary 630 prefix : str, optional 631 a prefix for the feature names 632 one_clip : bool, default False 633 if `True`, all features will be concatenated and assigned to one clip named `'all'` 634 635 Returns 636 ------- 637 features : dict 638 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 639 values are numpy arrays of shape `(#features, #frames)` 640 """ 641 642 features = {} 643 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 644 if self.interactive: 645 if one_clip: 646 agents = [keys] 647 else: 648 agents = combinations(keys, 2) 649 else: 650 agents = [[x] for x in keys] 651 for clip_ids in agents: 652 clip_features = {} 653 for clip in clip_ids: 654 single_features = self._kinematic_features_pair( 655 data_dict, clip, clip, video_id 656 ) 657 for key, value in single_features.items(): 658 name = key 659 if prefix is not None or len(clip_ids) > 1: 660 name += "---" 661 if prefix is not None: 662 name += prefix 663 if len(clip_ids) > 1: 664 name += clip 665 clip_features[name] = single_features[key] 666 if len(clip_ids) > 1 and "inter_distance" in self.keys: 667 for clip1, clip2 in combinations(clip_ids, 2): 668 distance, *_ = self._distance( 669 data_dict, clip1, clip2, video_id, centroid=True 670 ) 671 name = "inter_distance---" 672 if prefix is not None: 673 name += prefix 674 name += f"{clip1}+{clip2}" 675 clip_features[name] = distance 676 if one_clip: 677 combo_name = "all" 678 else: 679 combo_name = "+".join(map(str, clip_ids)) 680 features[video_id + "---" + combo_name] = clip_features 681 if self.neighboring_frames != 0: 682 for key in features.keys(): 683 for clip_key in features[key].keys(): 684 new_feature = [] 685 for i in range( 686 self.neighboring_frames + 1, 687 features[key][clip_key].shape[0] - self.neighboring_frames, 688 ): 689 new_feature.append( 690 features[key][clip_key][ 691 i 692 - self.neighboring_frames : i 693 + self.neighboring_frames, 694 :, 695 ].flatten() 696 ) 697 features[key][clip_key] = np.stack(new_feature, axis=0) 698 return features
Extract features from a data dictionary
An input store will call this method while pre-computing a dataset. We do not assume a specific structure in the data dictionary, so all necessary information (coordinates of a bodypart, number of frames, list of bodyparts) is inferred using input store methods.
Parameters
data_dict : dict
the data dictionary
video_id : str
the id of the video associated with the data dictionary
prefix : str, optional
a prefix for the feature names
one_clip : bool, default False
if True
, all features will be concatenated and assigned to one clip named 'all'
Returns
features : dict
a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the
values are numpy arrays of shape (#features, #frames)