dlc2action.feature_extraction     
                        Feature extraction.
Feature extractors generate feature dictionaries that are then passed to SSL transformations
(see dlc2action.ssl) and finally to
transformers that perform augmentations and merge all features into a tensor (see dlc2action.transformer).
The keys of the dictionaries are the feature names ('coords', 'speeds' and so on) and the values are the
feature tensors. It is generally assumed that the tensors have shape (F, ..., L) where F is the variable
number of features (per frame, keypoint, pixel...) and L is the length of the segment in frames. The F
value can be different for every tensor in the dictionary and the rest of the shape should be constant.
1# 2# Copyright 2020-present by A. Mathis Group and contributors. All rights reserved. 3# 4# This project and all its files are licensed under GNU AGPLv3 or later version. 5# A copy is included in dlc2action/LICENSE.AGPL. 6# 7"""Feature extraction. 8 9Feature extractors generate feature dictionaries that are then passed to SSL transformations 10(see `dlc2action.ssl`) and finally to 11transformers that perform augmentations and merge all features into a tensor (see `dlc2action.transformer`). 12The keys of the dictionaries are the feature names (`'coords'`, `'speeds'` and so on) and the values are the 13feature tensors. It is generally assumed that the tensors have shape `(F, ..., L)` where `F` is the variable 14number of features (per frame, keypoint, pixel...) and `L` is the length of the segment in frames. The `F` 15value can be different for every tensor in the dictionary and the rest of the shape should be constant. 16""" 17 18import copy 19from typing import Dict, Tuple, List, Set 20import numpy as np 21from abc import ABC, abstractmethod 22from scipy.spatial.distance import cdist 23import math 24from itertools import combinations 25from matplotlib.cm import get_cmap 26from dlc2action.data.base_store import PoseInputStore 27from scipy.ndimage import gaussian_filter 28 29 30class FeatureExtractor(ABC): 31 """The base class for feature extractors. 32 33 The `extract_features` method receives a data dictionary as input. 34 We do not assume a specific 35 structure in the values and all necessary information (coordinates of a bodypart, number 36 of frames, list of bodyparts) is inferred using input store methods. Therefore, each child class 37 of `FeatureExtractor` is written for a specific subclass of `dlc2action.data.base_Store.InputStore` 38 with the data inference 39 functions defined (i.e. `dlc2action.data.base_store.PoseInputStore`). 40 """ 41 42 input_store_class = None 43 """The `dlc2action.data.base_Store.InputStore` child class paired with this feature extractor.""" 44 45 @abstractmethod 46 def __init__(self, ignored_clips: List = None, **kwargs): 47 """Initialize the feature extractor. 48 49 Parameters 50 ---------- 51 ignored_clips : list 52 a list of string names of clip ids to ignore 53 54 """ 55 56 @abstractmethod 57 def extract_features( 58 self, data_dict: Dict, video_id: str, one_clip: bool = False 59 ) -> Dict: 60 """Extract features from a data dictionary. 61 62 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 63 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 64 and clip ids. We do not assume a specific 65 structure in the values, so all necessary information (coordinates of a bodypart, number 66 of frames, list of bodyparts) is inferred using input store methods. 67 68 Parameters 69 ---------- 70 data_dict : dict 71 the data dictionary 72 video_id : str 73 the id of the video associated with the data dictionary 74 one_clip : bool, default False 75 if `True`, all features will be concatenated and assigned to one clip named `'all'` 76 77 Returns 78 ------- 79 features : dict 80 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 81 values are numpy arrays of shape `(#features, ..., #frames)` 82 83 """ 84 85 86class PoseFeatureExtractor(FeatureExtractor): 87 """The base class for pose feature extractors. 88 89 Pose feature extractors work with `dlc2action.data.base_store.InputStore` instances 90 that inherit from `dlc2action.data.base_store.PoseInputStore`. 91 """ 92 93 input_store_class = PoseInputStore 94 95 def __init__(self, input_store: PoseInputStore, *args, **kwargs): 96 """Initialize the extractor. 97 98 Parameters 99 ---------- 100 input_store : PoseInputStore 101 the input store object 102 103 """ 104 self.get_bodyparts = input_store.get_bodyparts 105 self.get_coords = input_store.get_coords 106 self.get_n_frames = input_store.get_n_frames 107 self.get_likelihood = input_store.get_likelihood 108 109 110# class KinematicBones(PoseFeatures): 111# 112# def __init__(self, dataset, bone_pairs, *args, **kwargs): 113# self.bone_starts, self.bone_ends = zip(*bone_pairs) 114# self.keys = ["bones", "speed_bones", "acc_bones"] 115# super().__init__(dataset) 116# 117# def extract_features(self, data_dict: Dict, clip_id: str, name: str) -> Dict: 118# if isinstance(clip_id, list): 119# clip_id = clip_id[0] 120# bodyparts = np.array(self.get_bodyparts(data_dict, clip_id)) 121# bone_starts = np.where( 122# np.array(self.bone_starts)[:, None] == bodyparts[None, :] 123# )[1] 124# bone_ends = np.where(np.array(self.bone_ends)[:, None] == bodyparts[None, :])[1] 125# coords = np.stack( 126# [self.get_coords(data_dict, clip_id, bp) for bp in bodyparts], axis=1 127# ) 128# bones = coords[:, bone_ends, :] - coords[:, bone_starts, :] 129# speeds = bones[1:] - bones[:-1] 130# speeds = np.concatenate([speeds[:1], speeds], axis=0) 131# acc = speeds[1:] - speeds[:-1] 132# acc = np.concatenate([acc[:1], acc], axis=0) 133# n_frames = bones.shape[0] 134# features = { 135# "bones": bones.reshape((n_frames, -1)), 136# "speed_bones": speeds.reshape((n_frames, -1)), 137# "acc_bones": acc.reshape((n_frames, -1)), 138# } 139# return features 140 141 142class KinematicExtractor(PoseFeatureExtractor): 143 """A feature extractor for basic kinematic features: speeds, accelerations, distances. 144 145 The available keys are: 146 147 - coords: the allocentric bodypart coordinates, 148 - coord_diff: the egocentric bodypart coordinates, 149 - center: the body center (mean of bodyparts) coordinates, 150 - intra_distance: distances between bodyparts (pairs set in `distance_pairs` or all combinations by default), 151 - inter_distance: computed in interactive mode (for pairs of animals); distances from each bodypart of each animal to the centroid between them, 152 - speed_direction: unit vector of speed approximation for each bodypart, 153 - speed_value: l2 norm of the speed approximation vector for each bodypart, 154 - acc_joints: l2 norm of the acceleration approximation vector for each bodypart, 155 - angle_speeds: vector of angle speed approximation for each bodypart, 156 - angles: cosines of angles set in `angle_pairs`, 157 - areas: areas of polygons set in `area_vertices`, 158 - zone_bools: binary identifier of zone visitation, defined in `zone_bools`, 159 - zone_distances: distance to zone boundary, defined in `zone_distances'`, 160 - likelihood: pose estimation likelihood (if known). 161 162 The default set is `{coord_diff, center, intra_distance, inter_distance, speed_direction, speed_value, acc_joints, angle_speeds}` 163 """ 164 165 def __init__( 166 self, 167 input_store: PoseInputStore, 168 keys: List = None, 169 ignored_clips: List = None, 170 interactive: bool = False, 171 averaging_window: int = 1, 172 distance_pairs: List = None, 173 angle_pairs: List = None, 174 neighboring_frames: int = 0, 175 area_vertices: List = None, 176 zone_vertices: Dict = None, 177 zone_bools: List = None, 178 zone_distances: List = None, 179 *args, 180 **kwargs, 181 ) -> None: 182 """Initialize the extractor. 183 184 Parameters 185 ---------- 186 input_store : PoseInputStore 187 the input store object 188 keys : list, optional 189 a list of names of the features to extract 190 ignored_clips : list, optional 191 a list of clip ids to ignore 192 interactive : bool, default False 193 if `True`, features for pairs of clips will be computed 194 averaging_window : int, default 1 195 if >1, features are averaged with a moving window of this size (in frames) 196 distance_pairs : list, optional 197 a list of bodypart name tuples (e.g. `[("tail", "nose")]`) to compute distances for when `"intra_distance"` 198 is in `keys` (by default all distances are computed) 199 angle_pairs : list, optional 200 a list of bodypart name tuples (e.g. `[("ear1", "nose", "ear2")]`) for the angle between `"ear1"--"nose"` and 201 `"nose"--"ear2"` lines) to compute angle cosines for when `"angles"` is in `keys` (by default no angles are computed) 202 neighboring_frames : int, default 0 203 if >0, this number of neighboring frames is aggregated in the center frame features (generally not recommended) 204 area_vertices : list, optional 205 a list of bodypart name tuples of any length >= 3 (e.g. `[("ear1", "nose", "ear2", "spine1")]`) that define polygons 206 to compute areas for when `"areas"` is in `keys` (by default no areas are computed) 207 zone_vertices : dict, optional 208 a dictionary of bodypart name tuples of any length >= 3 that define zones for `"zone_bools"`and `"zone_distances"` 209 featyres; keys should be zone names and values should be tuples that define the polygons (e.g. 210 `{"main_area": ("x_min", "x_max", "y_max", "y_min"))}`) 211 zone_bools : list, optional 212 a list of zone and bodypart name tuples to compute binary identifiers for (1 if an animal is within the polygon or 213 0 if it's outside) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 214 this is only computed if `"zone_bools"` is in `keys` 215 zone_distances : list, optional 216 a list of zone and bodypart name tuples to compute distances for (distance from the bodypart to the closest of the 217 boundaries) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 218 this is only computed if `"zone_distances"` is in `keys` 219 220 """ 221 if keys is None: 222 keys = [ 223 "coord_diff", 224 "center", 225 "intra_distance", 226 "speed_direction", 227 "speed_value", 228 "angle_speeds", 229 "acc_joints", 230 "inter_distance", 231 ] 232 if ignored_clips is None: 233 ignored_clips = [] 234 if zone_vertices is None: 235 zone_vertices = {} 236 if zone_bools is None: 237 zone_bools = [] 238 if zone_distances is None: 239 zone_distances = [] 240 self.keys = keys 241 self.ignored_clips = ignored_clips 242 self.interactive = interactive 243 self.averaging_window = int(averaging_window) 244 self.distance_pairs = distance_pairs 245 self.angle_pairs = angle_pairs 246 self.area_vertices = area_vertices 247 self.neighboring_frames = int(neighboring_frames) 248 self.zone_vertices = zone_vertices 249 self.zone_bools = zone_bools 250 self.zone_distances = zone_distances 251 super().__init__(input_store) 252 253 def _angle_speed(self, xy_coord_joint: np.array, n_frames: int) -> np.array: 254 """ 255 Compute the angle speed 256 """ 257 #2D coords 258 if xy_coord_joint.shape[1] == 2: 259 x_diff = np.diff(xy_coord_joint[:, 0]) 260 y_diff = np.diff(xy_coord_joint[:, 1]) 261 x_diff[xy_coord_joint[:-1, 0] == 0] = 0 262 y_diff[xy_coord_joint[:-1, 1] == 0] = 0 263 angle_dir_radians = [ 264 math.atan2(y_diff[i], x_diff[i]) for i in range(n_frames - 1) 265 ] 266 angle_dir_radians = np.insert( 267 angle_dir_radians, 0, angle_dir_radians[0], axis=0 268 ) 269 #3D coords 270 else: 271 x_diff = np.diff(xy_coord_joint[:, 0]) 272 y_diff = np.diff(xy_coord_joint[:, 1]) 273 z_diff = np.diff(xy_coord_joint[:, 2]) 274 x_diff[xy_coord_joint[:-1, 0] == 0] = 0 275 y_diff[xy_coord_joint[:-1, 1] == 0] = 0 276 y_diff[xy_coord_joint[:-1, 2] == 0] = 0 277 angle_dir_radians = [] 278 for x, y in combinations([x_diff, y_diff, z_diff], 2): 279 radians = [math.atan2(x[i], y[i]) for i in range(n_frames - 1)] 280 radians = np.insert(radians, 0, radians[0], axis=0) 281 angle_dir_radians.append(radians) 282 angle_dir_radians = np.concatenate(angle_dir_radians) 283 284 return angle_dir_radians 285 286 def _poly_area(self, x, y): 287 """Get polygon area.""" 288 return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) 289 290 def _cdist_keep_zeros(self, a: np.array, b: np.array) -> np.array: 291 """Compute all distance combinations while setting the distance to zero if at least one of the elements is at zero.""" 292 dist = cdist(a, b, "euclidean") 293 a_zero = np.sum(a == 0, axis=1) > 0 294 b_zero = np.sum(b == 0, axis=1) > 0 295 dist[a_zero, :] = 0 296 dist[:, a_zero] = 0 297 dist[b_zero, :] = 0 298 dist[:, b_zero] = 0 299 return dist 300 301 def _distance( 302 self, data_dict: Dict, clip1: str, clip2: str, name: str, centroid: bool = False 303 ) -> Tuple: 304 """Compute the distances between all keypoints.""" 305 if not isinstance(clip1, list): 306 body_parts_1 = self.get_bodyparts() 307 else: 308 body_parts_1 = clip1 309 n_body_parts = len(body_parts_1) 310 body_parts_2 = self.get_bodyparts() 311 n_frames = self.get_n_frames(data_dict, clip1) 312 if n_frames != self.get_n_frames(data_dict, clip2): 313 raise RuntimeError( 314 f"The numbers of frames for {clip1} and {clip2} are not equal at {name}!" 315 ) 316 # joint distances for single agent 317 upper_indices = np.triu_indices(n_body_parts, 1) 318 319 xy_coord_joints_1 = np.stack( 320 [self.get_coords(data_dict, clip1, bp) for bp in body_parts_1], axis=1 321 ) 322 if self.averaging_window > 1: 323 for i in range(xy_coord_joints_1.shape[0]): 324 for j in range(xy_coord_joints_1.shape[1]): 325 xy_coord_joints_1[i, j, :] = np.convolve( 326 xy_coord_joints_1[i, j, :], (1 / self.averaging_window) * np.ones(self.averaging_window) 327 )[self.averaging_window // 2 : -self.averaging_window // 2 + (self.averaging_window + 1) % 2] 328 if clip1 != clip2: 329 xy_coord_joints_2 = np.stack( 330 [self.get_coords(data_dict, clip2, bp) for bp in body_parts_2], axis=1 331 ) 332 if self.averaging_window > 1: 333 for i in range(xy_coord_joints_2.shape[0]): 334 for j in range(xy_coord_joints_2.shape[1]): 335 xy_coord_joints_2[i, j, :] = np.convolve( 336 xy_coord_joints_2[i, j, :], (1 / self.averaging_window) * np.ones(self.averaging_window) 337 )[self.averaging_window // 2 : -self.averaging_window // 2 + (self.averaging_window + 1) % 2] 338 else: 339 xy_coord_joints_2 = copy.copy(xy_coord_joints_1) 340 341 if clip1 != clip2 and centroid: 342 centroid_1 = np.expand_dims(np.mean(xy_coord_joints_1, axis=1), 1) 343 distance_1 = np.linalg.norm(xy_coord_joints_2 - centroid_1, axis=-1) 344 centroid_2 = np.expand_dims(np.mean(xy_coord_joints_2, axis=1), 1) 345 distance_2 = np.linalg.norm(xy_coord_joints_1 - centroid_2, axis=-1) 346 intra_distance = np.concatenate([distance_1, distance_2], axis=-1) 347 else: 348 if self.distance_pairs is None or len(self.distance_pairs) == 0: 349 n_distances = n_body_parts * (n_body_parts - 1) // 2 350 if n_distances: 351 intra_distance = np.asarray( 352 [ 353 self._cdist_keep_zeros( 354 xy_coord_joints_1[i], xy_coord_joints_2[i] 355 )[upper_indices].reshape(-1, n_distances) 356 for i in range(n_frames) 357 ] 358 ).reshape(n_frames, n_distances) 359 else: 360 intra_distance = [] 361 else: 362 intra_distance = [] 363 for x, y in self.distance_pairs: 364 x_ind = body_parts_1.index(x) 365 y_ind = body_parts_1.index(y) 366 intra_distance.append( 367 np.sqrt( 368 np.sum( 369 ( 370 xy_coord_joints_1[:, x_ind, :] 371 - xy_coord_joints_1[:, y_ind, :] 372 ) 373 ** 2, 374 axis=1, 375 ) 376 ) 377 ) 378 intra_distance = np.stack(intra_distance, axis=1) 379 380 if clip1 == clip2: 381 angle_joints_radian = np.stack( 382 [ 383 self._angle_speed(xy_coord_joints_1[:, i, :], n_frames) 384 for i in range(xy_coord_joints_1.shape[1]) 385 ], 386 axis=1, 387 ) 388 if self.angle_pairs is None: 389 angles = None 390 else: 391 angles = [] 392 for x0, x1, y0, y1 in self.angle_pairs: 393 x0_ind = body_parts_1.index(x0) 394 x1_ind = body_parts_1.index(x1) 395 y0_ind = body_parts_1.index(y0) 396 y1_ind = body_parts_1.index(y1) 397 diff_x = ( 398 xy_coord_joints_1[:, x0_ind, :] 399 - xy_coord_joints_1[:, x1_ind, :] 400 ) 401 diff_y = ( 402 xy_coord_joints_1[:, y0_ind, :] 403 - xy_coord_joints_1[:, y1_ind, :] 404 ) 405 dist_x = np.linalg.norm(diff_x, axis=-1) 406 dist_y = np.linalg.norm(diff_y, axis=-1) 407 denom = dist_x * dist_y + 1e-7 408 mult = np.einsum("ij,ij->i", diff_x, diff_y) 409 angles.append(mult / denom) 410 angles = np.stack(angles, axis=1) 411 if self.area_vertices is not None: 412 areas = [] 413 for points in self.area_vertices: 414 point_areas = [] 415 inds = [body_parts_1.index(x) for x in points] 416 for f_i in range(xy_coord_joints_1.shape[0]): 417 x = xy_coord_joints_1[f_i, inds, 0] 418 y = xy_coord_joints_1[f_i, inds, 1] 419 point_areas.append(self._poly_area(x, y)) 420 areas.append(np.array(point_areas)) 421 areas = np.stack(areas, axis=-1) 422 else: 423 areas = None 424 425 zone_bools = [] 426 for zone, vertex in self.zone_bools: 427 if zone not in self.zone_vertices: 428 raise ValueError(f"The {zone} zone is not in zone_vertices!") 429 if vertex not in body_parts_1: 430 raise ValueError(f"The {vertex} bodypart not in bodyparts!") 431 zone_bool = np.ones((xy_coord_joints_1.shape[0], 1)) 432 vertex_coords = self.get_coords(data_dict, clip1, vertex) 433 for i, x in enumerate(self.zone_vertices[zone]): 434 v1 = self.get_coords(data_dict, clip1, x) 435 next_i = (i + 1) % len(self.zone_vertices[zone]) 436 next_next_i = (i + 2) % len(self.zone_vertices[zone]) 437 v2 = self.get_coords( 438 data_dict, clip1, self.zone_vertices[zone][next_i] 439 ) 440 v3 = self.get_coords( 441 data_dict, clip1, self.zone_vertices[zone][next_next_i] 442 ) 443 v3_above = ( 444 v1[:, 1] 445 + ((v3[:, 0] - v1[:, 0]) / (v2[:, 0] - v1[:, 0] + 1e-7)) 446 * (v2[:, 1] - v1[:, 1]) 447 > v3[:, 1] 448 ) 449 vertex_above = ( 450 v1[:, 1] 451 + ( 452 (vertex_coords[:, 0] - v1[:, 0]) 453 / (v2[:, 0] - v1[:, 0] + 1e-7) 454 ) 455 * (v2[:, 1] - v1[:, 1]) 456 > vertex_coords[:, 1] 457 ) 458 edge_bool = v3_above == vertex_above 459 edge_bool[v2[:, 0] == v1[:, 0]] = ( 460 (vertex_coords[:, 0] > v2[:, 0]) == (v3[:, 0] > v2[:, 0]) 461 )[v2[:, 0] == v1[:, 0]] 462 zone_bool *= np.expand_dims(edge_bool, 1) 463 zone_bools.append(zone_bool) 464 if len(zone_bools) == 0: 465 zone_bools = None 466 else: 467 zone_bools = np.concatenate(zone_bools, axis=1) 468 469 distances = [] 470 for zone, vertex in self.zone_distances: 471 if zone not in self.zone_vertices: 472 raise ValueError(f"The {zone} zone is not in zone_vertices!") 473 if vertex not in body_parts_1: 474 raise ValueError(f"The {vertex} bodypart not in bodyparts!") 475 v0 = self.get_coords(data_dict, clip1, vertex) 476 dd = [] 477 for i, x in enumerate(self.zone_vertices[zone]): 478 v1 = self.get_coords(data_dict, clip1, x) 479 next_i = (i + 1) % len(self.zone_vertices[zone]) 480 v2 = self.get_coords( 481 data_dict, clip1, self.zone_vertices[zone][next_i] 482 ) 483 d = np.abs( 484 (v2[:, 0] - v2[:, 0]) * (v1[:, 1] - v0[:, 1]) 485 - (v1[:, 0] - v0[:, 0]) * (v2[:, 1] - v1[:, 1]) 486 ) / np.sqrt( 487 (v2[:, 0] - v1[:, 0]) ** 2 + (v2[:, 1] - v1[:, 1]) ** 2 + 1e-7 488 ) 489 d[(v2[:, 0] == v1[:, 0]) * (v2[:, 1] == v1[:, 1])] = 0 490 dd.append(d) 491 dd = np.stack(dd, axis=0) 492 dd = np.min(dd, 0) 493 distances.append(dd) 494 if len(distances) == 0: 495 distances = None 496 else: 497 distances = np.stack(distances, axis=1) 498 499 if clip1 != clip2: 500 return intra_distance, xy_coord_joints_1, xy_coord_joints_2, n_frames 501 else: 502 return ( 503 intra_distance, 504 xy_coord_joints_1, 505 n_frames, 506 angle_joints_radian, 507 areas, 508 angles, 509 zone_bools, 510 distances, 511 ) 512 513 def _kinematic_features_pair( 514 self, data_dict: Dict, clip1: str, clip2: str, name: str 515 ) -> Dict: 516 """Compute features for a pair of clips.""" 517 if clip1 == clip2: 518 ( 519 intra_distance, 520 xy_coord_joints, 521 n_frames, 522 angle_joints_radian, 523 areas, 524 angles, 525 zone_bools, 526 zone_distances, 527 ) = self._distance(data_dict, clip1, clip2, name) 528 else: 529 ( 530 intra_distance, 531 xy_coord_joints_1, 532 xy_coord_joints_2, 533 n_frames, 534 ) = self._distance(data_dict, clip1, clip2, name) 535 xy_coord_joints = xy_coord_joints_2 - xy_coord_joints_1 536 537 xy_coord_joints = xy_coord_joints.transpose((1, 2, 0)) 538 539 speed_joints = np.diff(xy_coord_joints, axis=-1) 540 speed_joints[xy_coord_joints[..., :-1] == 0] = 0 541 speed_joints = np.insert(speed_joints, 0, speed_joints[:, :, 0], axis=-1) 542 543 # acceleration 544 acc_joints = np.asarray([np.diff(speed_joint) for speed_joint in speed_joints]) 545 acc_joints = np.insert(acc_joints, 0, acc_joints[:, :, 0], axis=-1) 546 acc_joints = np.linalg.norm(acc_joints, axis=1) 547 548 # from matplotlib import pyplot as plt 549 # print(f'{xy_coord_joints.shape=}') 550 # plt.scatter(xy_coord_joints[:, 0, 0], 551 # xy_coord_joints[:, 1, 0]) 552 # plt.xlim(-0.5, 0.5) 553 # plt.ylim(-0.5, 0.5) 554 # plt.show() 555 556 features = {} 557 if "coords" in self.keys: 558 features["coords"] = copy.copy(xy_coord_joints).reshape((-1, n_frames)).T 559 if "center" in self.keys: 560 features["center"] = xy_coord_joints.mean(0).T 561 if "coord_diff" in self.keys: 562 features["coord_diff"] = ( 563 (xy_coord_joints - np.expand_dims(xy_coord_joints.mean(0), 0)) 564 .reshape((-1, n_frames)) 565 .T 566 ) 567 if "intra_distance" in self.keys: 568 features["intra_distance"] = intra_distance 569 if "speed_joints" in self.keys: 570 features["speed_joints"] = speed_joints.reshape((-1, n_frames)).T 571 if "speed_direction" in self.keys or "speed_value" in self.keys: 572 values = np.expand_dims(np.linalg.norm(speed_joints, axis=1), 1) + 1e-7 573 directions = speed_joints / values 574 if "speed_direction" in self.keys: 575 features["speed_direction"] = directions.reshape((-1, n_frames)).T 576 if "speed_value" in self.keys: 577 features["speed_value"] = values.reshape((-1, n_frames)).T 578 if ( 579 "angle_speeds" in self.keys or "angle_joints_radian" in self.keys 580 ) and clip1 == clip2: 581 features["angle_speeds"] = angle_joints_radian 582 if "angles" in self.keys and clip1 == clip2 and self.angle_pairs is not None: 583 features["angles"] = angles 584 if "acc_joints" in self.keys: 585 features["acc_joints"] = acc_joints.T 586 if "areas" in self.keys and clip1 == clip2 and areas is not None: 587 features["areas"] = areas * 10 588 if "zone_bools" in self.keys and clip1 == clip2 and zone_bools is not None: 589 features["zone_bools"] = zone_bools 590 if ( 591 "zone_distances" in self.keys 592 and clip1 == clip2 593 and zone_distances is not None 594 ): 595 features["zone_distances"] = zone_distances 596 if clip1 == clip2 and "likelihood" in self.keys: 597 likelihood = [ 598 self.get_likelihood(data_dict, clip1, bp) for bp in self.get_bodyparts() 599 ] 600 if likelihood[0] is not None: 601 likelihood = np.stack(likelihood, 1) 602 features["likelihood"] = likelihood 603 return features 604 605 def extract_features( 606 self, 607 data_dict: Dict, 608 video_id: str, 609 prefix: str = None, 610 one_clip: bool = False, 611 ) -> Dict: 612 """Extract features from a data dictionary. 613 614 An input store will call this method while pre-computing a dataset. We do not assume a specific 615 structure in the data dictionary, so all necessary information (coordinates of a bodypart, number 616 of frames, list of bodyparts) is inferred using input store methods. 617 618 Parameters 619 ---------- 620 data_dict : dict 621 the data dictionary 622 video_id : str 623 the id of the video associated with the data dictionary 624 prefix : str, optional 625 a prefix for the feature names 626 one_clip : bool, default False 627 if `True`, all features will be concatenated and assigned to one clip named `'all'` 628 629 Returns 630 ------- 631 features : dict 632 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 633 values are numpy arrays of shape `(#features, #frames)` 634 635 """ 636 features = {} 637 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 638 if self.interactive: 639 if one_clip: 640 agents = [keys] 641 else: 642 agents = combinations(keys, 2) 643 else: 644 agents = [[x] for x in keys] 645 for clip_ids in agents: 646 clip_features = {} 647 for clip in clip_ids: 648 single_features = self._kinematic_features_pair( 649 data_dict, clip, clip, video_id 650 ) 651 for key, value in single_features.items(): 652 name = key 653 if prefix is not None or len(clip_ids) > 1: 654 name += "---" 655 if prefix is not None: 656 name += prefix 657 if len(clip_ids) > 1: 658 name += clip 659 clip_features[name] = single_features[key] 660 if len(clip_ids) > 1 and "inter_distance" in self.keys: 661 for clip1, clip2 in combinations(clip_ids, 2): 662 distance, *_ = self._distance( 663 data_dict, clip1, clip2, video_id, centroid=True 664 ) 665 name = "inter_distance---" 666 if prefix is not None: 667 name += prefix 668 name += f"{clip1}+{clip2}" 669 clip_features[name] = distance 670 if one_clip: 671 combo_name = "all" 672 else: 673 combo_name = "+".join(map(str, clip_ids)) 674 features[video_id + "---" + combo_name] = clip_features 675 if self.neighboring_frames != 0: 676 for key in features.keys(): 677 for clip_key in features[key].keys(): 678 new_feature = [] 679 for i in range( 680 self.neighboring_frames + 1, 681 features[key][clip_key].shape[0] - self.neighboring_frames, 682 ): 683 new_feature.append( 684 features[key][clip_key][ 685 i 686 - self.neighboring_frames : i 687 + self.neighboring_frames, 688 :, 689 ].flatten() 690 ) 691 features[key][clip_key] = np.stack(new_feature, axis=0) 692 return features 693 694 695class HeatmapExtractor(PoseFeatureExtractor): 696 """A heatmap feature extractor. 697 698 Creates an image for every frame with keypoints as blurred points on the image. 699 """ 700 701 def __init__( 702 self, 703 input_store: PoseInputStore, 704 canvas_shape: List, 705 heatmap_width: int = 128, 706 keys: Set = None, 707 ignored_clips: List = None, 708 interactive: bool = False, 709 sigma: float = 0.1, 710 channel_policy: str = "color", 711 *args, 712 **kwargs, 713 ) -> None: 714 """Initialize the extractor. 715 716 Parameters 717 ---------- 718 input_store : PoseInputStore 719 the input store object 720 canvas_shape : list 721 the shape of the input data canvas 722 heatmap_width : int, default 128 723 the width of the resulting images (in pixels) 724 keys : set, optional 725 a set of string keys to use (choose from `['coords_heatmap', 'motion_heatmap']`, by default all are used) 726 ignored_clips : set, optional 727 a set of string clip ids to ignore 728 interactive : bool, default False 729 if `True`, features are extracted for pairs of clips 730 sigma : float, default 0.7 731 the standard deviation of the gaussian kernel (0 for no smoothing) 732 channel_policy : {"color", "black&white", "bp"} 733 if "black&white" the heatmaps have one channel and all keypoints are 734 equivalent; if "color" each keypoint is assigned a unique color; if "bp" each keypoint has a separate channel 735 736 """ 737 if ignored_clips is None: 738 ignored_clips = [] 739 if keys is None: 740 keys = ["coords_heatmap", "motion_heatmap"] 741 self.keys = keys 742 self.ignored_clips = ignored_clips 743 self.interactive = interactive 744 self.sigma = sigma 745 self.canvas_shape = canvas_shape 746 self.heatmap_width = heatmap_width - 1 747 x, y = canvas_shape 748 self.image_shape = (heatmap_width, int(y * heatmap_width / x) + 1) 749 self.channel_policy = channel_policy 750 self.cmap = get_cmap("gist_rainbow") 751 super().__init__(input_store) 752 753 def _get_image( 754 self, data_dict: Dict, clip_id: str, n_frames: int, bodyparts: List 755 ) -> np.ndarray: 756 """Generate an array of images from coordinates for one clip.""" 757 policy_dict = {"color": 3, "bp": len(bodyparts), "black&white": 1} 758 image = np.zeros( 759 (n_frames, policy_dict[self.channel_policy], *self.image_shape) 760 ) 761 values = ( 762 np.round( 763 np.stack( 764 [self.get_coords(data_dict, clip_id, bp) for bp in bodyparts], 765 axis=1, 766 ) 767 * self.heatmap_width 768 ) 769 ).astype(int) 770 sample = np.zeros((5, 5)) 771 sample[3, 3] = 1 772 m = np.max(gaussian_filter(sample, sigma=self.sigma)) 773 for i, frame_values in enumerate(values): 774 if self.channel_policy == "black&white": 775 image[i, 0, frame_values[:, 0], frame_values[:, 1]] = 1 776 elif self.channel_policy == "bp": 777 image[ 778 i, 779 list(range(len(frame_values))), 780 frame_values[:, 0], 781 frame_values[:, 1], 782 ] = 1 783 elif self.channel_policy == "color": 784 arr = np.linspace(0, 1, frame_values.shape[0]) 785 for j in range(frame_values.shape[0]): 786 image[i, :, frame_values[j, 0], frame_values[j, 1]] = self.cmap( 787 arr[j] 788 )[:-1] 789 if self.sigma > 0: 790 for channel in range(image.shape[1]): 791 image[i, channel] = gaussian_filter( 792 image[i, channel], sigma=self.sigma 793 ) 794 image /= m 795 return image 796 797 def extract_features( 798 self, 799 data_dict: Dict, 800 video_id: str, 801 prefix: str = None, 802 one_clip: bool = False, 803 ) -> Dict: 804 """Extract features from a data dictionary. 805 806 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 807 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 808 and clip ids. We do not assume a specific 809 structure in the values, so all necessary information (coordinates of a bodypart, number 810 of frames, list of bodyparts) is inferred using input store methods. 811 812 Parameters 813 ---------- 814 data_dict : dict 815 the data dictionary 816 video_id : str 817 the id of the video associated with the data dictionary 818 prefix : str, optional 819 a prefix to add to the feature names 820 one_clip : bool, default False 821 if `True`, all features will be concatenated and assigned to one clip named `'all'` 822 823 Returns 824 ------- 825 features : dict 826 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 827 values are numpy arrays of shape `(#features, ..., #frames)` 828 829 """ 830 features = {} 831 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 832 if self.interactive: 833 if one_clip: 834 agents = [keys] 835 else: 836 agents = combinations(keys, 2) 837 else: 838 agents = [[x] for x in keys] 839 bodyparts = self.get_bodyparts() 840 for clip_ids in agents: 841 clip_features = {} 842 n_frames = self.get_n_frames(data_dict, clip_ids[0]) 843 # n_frames = self.get_n_frames(data_dict, "+".join(sorted(clip_ids))) 844 policy_dict = {"color": 3, "bp": len(bodyparts), "black&white": 1} 845 image = np.zeros( 846 (n_frames, policy_dict[self.channel_policy], *self.image_shape) 847 ) 848 for clip in clip_ids: 849 image += self._get_image(data_dict, clip, n_frames, bodyparts) 850 image = np.clip(image, 0, 1) 851 if "coords_heatmap" in self.keys: 852 name = "coords_heatmap" 853 if prefix is not None: 854 name += "---" 855 name += prefix 856 clip_features[name] = image 857 if "motion_heatmap" in self.keys: 858 name = "motion_heatmap" 859 if prefix is not None: 860 name += "---" 861 name += prefix 862 image = np.diff(image, axis=0) 863 clip_features[name] = np.pad(image, ((1, 0), (0, 0), (0, 0), (0, 0))) 864 if one_clip: 865 combo_name = "all" 866 else: 867 combo_name = "+".join(map(str, clip_ids)) 868 features[video_id + "---" + combo_name] = clip_features 869 return features
31class FeatureExtractor(ABC): 32 """The base class for feature extractors. 33 34 The `extract_features` method receives a data dictionary as input. 35 We do not assume a specific 36 structure in the values and all necessary information (coordinates of a bodypart, number 37 of frames, list of bodyparts) is inferred using input store methods. Therefore, each child class 38 of `FeatureExtractor` is written for a specific subclass of `dlc2action.data.base_Store.InputStore` 39 with the data inference 40 functions defined (i.e. `dlc2action.data.base_store.PoseInputStore`). 41 """ 42 43 input_store_class = None 44 """The `dlc2action.data.base_Store.InputStore` child class paired with this feature extractor.""" 45 46 @abstractmethod 47 def __init__(self, ignored_clips: List = None, **kwargs): 48 """Initialize the feature extractor. 49 50 Parameters 51 ---------- 52 ignored_clips : list 53 a list of string names of clip ids to ignore 54 55 """ 56 57 @abstractmethod 58 def extract_features( 59 self, data_dict: Dict, video_id: str, one_clip: bool = False 60 ) -> Dict: 61 """Extract features from a data dictionary. 62 63 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 64 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 65 and clip ids. We do not assume a specific 66 structure in the values, so all necessary information (coordinates of a bodypart, number 67 of frames, list of bodyparts) is inferred using input store methods. 68 69 Parameters 70 ---------- 71 data_dict : dict 72 the data dictionary 73 video_id : str 74 the id of the video associated with the data dictionary 75 one_clip : bool, default False 76 if `True`, all features will be concatenated and assigned to one clip named `'all'` 77 78 Returns 79 ------- 80 features : dict 81 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 82 values are numpy arrays of shape `(#features, ..., #frames)` 83 84 """
The base class for feature extractors.
The extract_features method receives a data dictionary as input.
We do not assume a specific
structure in the values and all necessary information (coordinates of a bodypart, number
of frames, list of bodyparts) is inferred using input store methods. Therefore, each child class
of FeatureExtractor is written for a specific subclass of dlc2action.data.base_Store.InputStore
with the data inference
functions defined (i.e. dlc2action.data.base_store.PoseInputStore).
46 @abstractmethod 47 def __init__(self, ignored_clips: List = None, **kwargs): 48 """Initialize the feature extractor. 49 50 Parameters 51 ---------- 52 ignored_clips : list 53 a list of string names of clip ids to ignore 54 55 """
Initialize the feature extractor.
Parameters
ignored_clips : list a list of string names of clip ids to ignore
The dlc2action.data.base_Store.InputStore child class paired with this feature extractor.
57 @abstractmethod 58 def extract_features( 59 self, data_dict: Dict, video_id: str, one_clip: bool = False 60 ) -> Dict: 61 """Extract features from a data dictionary. 62 63 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 64 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 65 and clip ids. We do not assume a specific 66 structure in the values, so all necessary information (coordinates of a bodypart, number 67 of frames, list of bodyparts) is inferred using input store methods. 68 69 Parameters 70 ---------- 71 data_dict : dict 72 the data dictionary 73 video_id : str 74 the id of the video associated with the data dictionary 75 one_clip : bool, default False 76 if `True`, all features will be concatenated and assigned to one clip named `'all'` 77 78 Returns 79 ------- 80 features : dict 81 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 82 values are numpy arrays of shape `(#features, ..., #frames)` 83 84 """
Extract features from a data dictionary.
An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one
video id and have clip ids as keys. Read the documentation at dlc2action.data to find out more about video
and clip ids. We do not assume a specific
structure in the values, so all necessary information (coordinates of a bodypart, number
of frames, list of bodyparts) is inferred using input store methods.
Parameters
data_dict : dict
    the data dictionary
video_id : str
    the id of the video associated with the data dictionary
one_clip : bool, default False
    if True, all features will be concatenated and assigned to one clip named 'all'
Returns
features : dict
    a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the
    values are numpy arrays of shape (#features, ..., #frames)
87class PoseFeatureExtractor(FeatureExtractor): 88 """The base class for pose feature extractors. 89 90 Pose feature extractors work with `dlc2action.data.base_store.InputStore` instances 91 that inherit from `dlc2action.data.base_store.PoseInputStore`. 92 """ 93 94 input_store_class = PoseInputStore 95 96 def __init__(self, input_store: PoseInputStore, *args, **kwargs): 97 """Initialize the extractor. 98 99 Parameters 100 ---------- 101 input_store : PoseInputStore 102 the input store object 103 104 """ 105 self.get_bodyparts = input_store.get_bodyparts 106 self.get_coords = input_store.get_coords 107 self.get_n_frames = input_store.get_n_frames 108 self.get_likelihood = input_store.get_likelihood
The base class for pose feature extractors.
Pose feature extractors work with dlc2action.data.base_store.InputStore instances
that inherit from dlc2action.data.base_store.PoseInputStore.
96 def __init__(self, input_store: PoseInputStore, *args, **kwargs): 97 """Initialize the extractor. 98 99 Parameters 100 ---------- 101 input_store : PoseInputStore 102 the input store object 103 104 """ 105 self.get_bodyparts = input_store.get_bodyparts 106 self.get_coords = input_store.get_coords 107 self.get_n_frames = input_store.get_n_frames 108 self.get_likelihood = input_store.get_likelihood
Initialize the extractor.
Parameters
input_store : PoseInputStore the input store object
The dlc2action.data.base_Store.InputStore child class paired with this feature extractor.
Inherited Members
143class KinematicExtractor(PoseFeatureExtractor): 144 """A feature extractor for basic kinematic features: speeds, accelerations, distances. 145 146 The available keys are: 147 148 - coords: the allocentric bodypart coordinates, 149 - coord_diff: the egocentric bodypart coordinates, 150 - center: the body center (mean of bodyparts) coordinates, 151 - intra_distance: distances between bodyparts (pairs set in `distance_pairs` or all combinations by default), 152 - inter_distance: computed in interactive mode (for pairs of animals); distances from each bodypart of each animal to the centroid between them, 153 - speed_direction: unit vector of speed approximation for each bodypart, 154 - speed_value: l2 norm of the speed approximation vector for each bodypart, 155 - acc_joints: l2 norm of the acceleration approximation vector for each bodypart, 156 - angle_speeds: vector of angle speed approximation for each bodypart, 157 - angles: cosines of angles set in `angle_pairs`, 158 - areas: areas of polygons set in `area_vertices`, 159 - zone_bools: binary identifier of zone visitation, defined in `zone_bools`, 160 - zone_distances: distance to zone boundary, defined in `zone_distances'`, 161 - likelihood: pose estimation likelihood (if known). 162 163 The default set is `{coord_diff, center, intra_distance, inter_distance, speed_direction, speed_value, acc_joints, angle_speeds}` 164 """ 165 166 def __init__( 167 self, 168 input_store: PoseInputStore, 169 keys: List = None, 170 ignored_clips: List = None, 171 interactive: bool = False, 172 averaging_window: int = 1, 173 distance_pairs: List = None, 174 angle_pairs: List = None, 175 neighboring_frames: int = 0, 176 area_vertices: List = None, 177 zone_vertices: Dict = None, 178 zone_bools: List = None, 179 zone_distances: List = None, 180 *args, 181 **kwargs, 182 ) -> None: 183 """Initialize the extractor. 184 185 Parameters 186 ---------- 187 input_store : PoseInputStore 188 the input store object 189 keys : list, optional 190 a list of names of the features to extract 191 ignored_clips : list, optional 192 a list of clip ids to ignore 193 interactive : bool, default False 194 if `True`, features for pairs of clips will be computed 195 averaging_window : int, default 1 196 if >1, features are averaged with a moving window of this size (in frames) 197 distance_pairs : list, optional 198 a list of bodypart name tuples (e.g. `[("tail", "nose")]`) to compute distances for when `"intra_distance"` 199 is in `keys` (by default all distances are computed) 200 angle_pairs : list, optional 201 a list of bodypart name tuples (e.g. `[("ear1", "nose", "ear2")]`) for the angle between `"ear1"--"nose"` and 202 `"nose"--"ear2"` lines) to compute angle cosines for when `"angles"` is in `keys` (by default no angles are computed) 203 neighboring_frames : int, default 0 204 if >0, this number of neighboring frames is aggregated in the center frame features (generally not recommended) 205 area_vertices : list, optional 206 a list of bodypart name tuples of any length >= 3 (e.g. `[("ear1", "nose", "ear2", "spine1")]`) that define polygons 207 to compute areas for when `"areas"` is in `keys` (by default no areas are computed) 208 zone_vertices : dict, optional 209 a dictionary of bodypart name tuples of any length >= 3 that define zones for `"zone_bools"`and `"zone_distances"` 210 featyres; keys should be zone names and values should be tuples that define the polygons (e.g. 211 `{"main_area": ("x_min", "x_max", "y_max", "y_min"))}`) 212 zone_bools : list, optional 213 a list of zone and bodypart name tuples to compute binary identifiers for (1 if an animal is within the polygon or 214 0 if it's outside) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 215 this is only computed if `"zone_bools"` is in `keys` 216 zone_distances : list, optional 217 a list of zone and bodypart name tuples to compute distances for (distance from the bodypart to the closest of the 218 boundaries) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 219 this is only computed if `"zone_distances"` is in `keys` 220 221 """ 222 if keys is None: 223 keys = [ 224 "coord_diff", 225 "center", 226 "intra_distance", 227 "speed_direction", 228 "speed_value", 229 "angle_speeds", 230 "acc_joints", 231 "inter_distance", 232 ] 233 if ignored_clips is None: 234 ignored_clips = [] 235 if zone_vertices is None: 236 zone_vertices = {} 237 if zone_bools is None: 238 zone_bools = [] 239 if zone_distances is None: 240 zone_distances = [] 241 self.keys = keys 242 self.ignored_clips = ignored_clips 243 self.interactive = interactive 244 self.averaging_window = int(averaging_window) 245 self.distance_pairs = distance_pairs 246 self.angle_pairs = angle_pairs 247 self.area_vertices = area_vertices 248 self.neighboring_frames = int(neighboring_frames) 249 self.zone_vertices = zone_vertices 250 self.zone_bools = zone_bools 251 self.zone_distances = zone_distances 252 super().__init__(input_store) 253 254 def _angle_speed(self, xy_coord_joint: np.array, n_frames: int) -> np.array: 255 """ 256 Compute the angle speed 257 """ 258 #2D coords 259 if xy_coord_joint.shape[1] == 2: 260 x_diff = np.diff(xy_coord_joint[:, 0]) 261 y_diff = np.diff(xy_coord_joint[:, 1]) 262 x_diff[xy_coord_joint[:-1, 0] == 0] = 0 263 y_diff[xy_coord_joint[:-1, 1] == 0] = 0 264 angle_dir_radians = [ 265 math.atan2(y_diff[i], x_diff[i]) for i in range(n_frames - 1) 266 ] 267 angle_dir_radians = np.insert( 268 angle_dir_radians, 0, angle_dir_radians[0], axis=0 269 ) 270 #3D coords 271 else: 272 x_diff = np.diff(xy_coord_joint[:, 0]) 273 y_diff = np.diff(xy_coord_joint[:, 1]) 274 z_diff = np.diff(xy_coord_joint[:, 2]) 275 x_diff[xy_coord_joint[:-1, 0] == 0] = 0 276 y_diff[xy_coord_joint[:-1, 1] == 0] = 0 277 y_diff[xy_coord_joint[:-1, 2] == 0] = 0 278 angle_dir_radians = [] 279 for x, y in combinations([x_diff, y_diff, z_diff], 2): 280 radians = [math.atan2(x[i], y[i]) for i in range(n_frames - 1)] 281 radians = np.insert(radians, 0, radians[0], axis=0) 282 angle_dir_radians.append(radians) 283 angle_dir_radians = np.concatenate(angle_dir_radians) 284 285 return angle_dir_radians 286 287 def _poly_area(self, x, y): 288 """Get polygon area.""" 289 return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) 290 291 def _cdist_keep_zeros(self, a: np.array, b: np.array) -> np.array: 292 """Compute all distance combinations while setting the distance to zero if at least one of the elements is at zero.""" 293 dist = cdist(a, b, "euclidean") 294 a_zero = np.sum(a == 0, axis=1) > 0 295 b_zero = np.sum(b == 0, axis=1) > 0 296 dist[a_zero, :] = 0 297 dist[:, a_zero] = 0 298 dist[b_zero, :] = 0 299 dist[:, b_zero] = 0 300 return dist 301 302 def _distance( 303 self, data_dict: Dict, clip1: str, clip2: str, name: str, centroid: bool = False 304 ) -> Tuple: 305 """Compute the distances between all keypoints.""" 306 if not isinstance(clip1, list): 307 body_parts_1 = self.get_bodyparts() 308 else: 309 body_parts_1 = clip1 310 n_body_parts = len(body_parts_1) 311 body_parts_2 = self.get_bodyparts() 312 n_frames = self.get_n_frames(data_dict, clip1) 313 if n_frames != self.get_n_frames(data_dict, clip2): 314 raise RuntimeError( 315 f"The numbers of frames for {clip1} and {clip2} are not equal at {name}!" 316 ) 317 # joint distances for single agent 318 upper_indices = np.triu_indices(n_body_parts, 1) 319 320 xy_coord_joints_1 = np.stack( 321 [self.get_coords(data_dict, clip1, bp) for bp in body_parts_1], axis=1 322 ) 323 if self.averaging_window > 1: 324 for i in range(xy_coord_joints_1.shape[0]): 325 for j in range(xy_coord_joints_1.shape[1]): 326 xy_coord_joints_1[i, j, :] = np.convolve( 327 xy_coord_joints_1[i, j, :], (1 / self.averaging_window) * np.ones(self.averaging_window) 328 )[self.averaging_window // 2 : -self.averaging_window // 2 + (self.averaging_window + 1) % 2] 329 if clip1 != clip2: 330 xy_coord_joints_2 = np.stack( 331 [self.get_coords(data_dict, clip2, bp) for bp in body_parts_2], axis=1 332 ) 333 if self.averaging_window > 1: 334 for i in range(xy_coord_joints_2.shape[0]): 335 for j in range(xy_coord_joints_2.shape[1]): 336 xy_coord_joints_2[i, j, :] = np.convolve( 337 xy_coord_joints_2[i, j, :], (1 / self.averaging_window) * np.ones(self.averaging_window) 338 )[self.averaging_window // 2 : -self.averaging_window // 2 + (self.averaging_window + 1) % 2] 339 else: 340 xy_coord_joints_2 = copy.copy(xy_coord_joints_1) 341 342 if clip1 != clip2 and centroid: 343 centroid_1 = np.expand_dims(np.mean(xy_coord_joints_1, axis=1), 1) 344 distance_1 = np.linalg.norm(xy_coord_joints_2 - centroid_1, axis=-1) 345 centroid_2 = np.expand_dims(np.mean(xy_coord_joints_2, axis=1), 1) 346 distance_2 = np.linalg.norm(xy_coord_joints_1 - centroid_2, axis=-1) 347 intra_distance = np.concatenate([distance_1, distance_2], axis=-1) 348 else: 349 if self.distance_pairs is None or len(self.distance_pairs) == 0: 350 n_distances = n_body_parts * (n_body_parts - 1) // 2 351 if n_distances: 352 intra_distance = np.asarray( 353 [ 354 self._cdist_keep_zeros( 355 xy_coord_joints_1[i], xy_coord_joints_2[i] 356 )[upper_indices].reshape(-1, n_distances) 357 for i in range(n_frames) 358 ] 359 ).reshape(n_frames, n_distances) 360 else: 361 intra_distance = [] 362 else: 363 intra_distance = [] 364 for x, y in self.distance_pairs: 365 x_ind = body_parts_1.index(x) 366 y_ind = body_parts_1.index(y) 367 intra_distance.append( 368 np.sqrt( 369 np.sum( 370 ( 371 xy_coord_joints_1[:, x_ind, :] 372 - xy_coord_joints_1[:, y_ind, :] 373 ) 374 ** 2, 375 axis=1, 376 ) 377 ) 378 ) 379 intra_distance = np.stack(intra_distance, axis=1) 380 381 if clip1 == clip2: 382 angle_joints_radian = np.stack( 383 [ 384 self._angle_speed(xy_coord_joints_1[:, i, :], n_frames) 385 for i in range(xy_coord_joints_1.shape[1]) 386 ], 387 axis=1, 388 ) 389 if self.angle_pairs is None: 390 angles = None 391 else: 392 angles = [] 393 for x0, x1, y0, y1 in self.angle_pairs: 394 x0_ind = body_parts_1.index(x0) 395 x1_ind = body_parts_1.index(x1) 396 y0_ind = body_parts_1.index(y0) 397 y1_ind = body_parts_1.index(y1) 398 diff_x = ( 399 xy_coord_joints_1[:, x0_ind, :] 400 - xy_coord_joints_1[:, x1_ind, :] 401 ) 402 diff_y = ( 403 xy_coord_joints_1[:, y0_ind, :] 404 - xy_coord_joints_1[:, y1_ind, :] 405 ) 406 dist_x = np.linalg.norm(diff_x, axis=-1) 407 dist_y = np.linalg.norm(diff_y, axis=-1) 408 denom = dist_x * dist_y + 1e-7 409 mult = np.einsum("ij,ij->i", diff_x, diff_y) 410 angles.append(mult / denom) 411 angles = np.stack(angles, axis=1) 412 if self.area_vertices is not None: 413 areas = [] 414 for points in self.area_vertices: 415 point_areas = [] 416 inds = [body_parts_1.index(x) for x in points] 417 for f_i in range(xy_coord_joints_1.shape[0]): 418 x = xy_coord_joints_1[f_i, inds, 0] 419 y = xy_coord_joints_1[f_i, inds, 1] 420 point_areas.append(self._poly_area(x, y)) 421 areas.append(np.array(point_areas)) 422 areas = np.stack(areas, axis=-1) 423 else: 424 areas = None 425 426 zone_bools = [] 427 for zone, vertex in self.zone_bools: 428 if zone not in self.zone_vertices: 429 raise ValueError(f"The {zone} zone is not in zone_vertices!") 430 if vertex not in body_parts_1: 431 raise ValueError(f"The {vertex} bodypart not in bodyparts!") 432 zone_bool = np.ones((xy_coord_joints_1.shape[0], 1)) 433 vertex_coords = self.get_coords(data_dict, clip1, vertex) 434 for i, x in enumerate(self.zone_vertices[zone]): 435 v1 = self.get_coords(data_dict, clip1, x) 436 next_i = (i + 1) % len(self.zone_vertices[zone]) 437 next_next_i = (i + 2) % len(self.zone_vertices[zone]) 438 v2 = self.get_coords( 439 data_dict, clip1, self.zone_vertices[zone][next_i] 440 ) 441 v3 = self.get_coords( 442 data_dict, clip1, self.zone_vertices[zone][next_next_i] 443 ) 444 v3_above = ( 445 v1[:, 1] 446 + ((v3[:, 0] - v1[:, 0]) / (v2[:, 0] - v1[:, 0] + 1e-7)) 447 * (v2[:, 1] - v1[:, 1]) 448 > v3[:, 1] 449 ) 450 vertex_above = ( 451 v1[:, 1] 452 + ( 453 (vertex_coords[:, 0] - v1[:, 0]) 454 / (v2[:, 0] - v1[:, 0] + 1e-7) 455 ) 456 * (v2[:, 1] - v1[:, 1]) 457 > vertex_coords[:, 1] 458 ) 459 edge_bool = v3_above == vertex_above 460 edge_bool[v2[:, 0] == v1[:, 0]] = ( 461 (vertex_coords[:, 0] > v2[:, 0]) == (v3[:, 0] > v2[:, 0]) 462 )[v2[:, 0] == v1[:, 0]] 463 zone_bool *= np.expand_dims(edge_bool, 1) 464 zone_bools.append(zone_bool) 465 if len(zone_bools) == 0: 466 zone_bools = None 467 else: 468 zone_bools = np.concatenate(zone_bools, axis=1) 469 470 distances = [] 471 for zone, vertex in self.zone_distances: 472 if zone not in self.zone_vertices: 473 raise ValueError(f"The {zone} zone is not in zone_vertices!") 474 if vertex not in body_parts_1: 475 raise ValueError(f"The {vertex} bodypart not in bodyparts!") 476 v0 = self.get_coords(data_dict, clip1, vertex) 477 dd = [] 478 for i, x in enumerate(self.zone_vertices[zone]): 479 v1 = self.get_coords(data_dict, clip1, x) 480 next_i = (i + 1) % len(self.zone_vertices[zone]) 481 v2 = self.get_coords( 482 data_dict, clip1, self.zone_vertices[zone][next_i] 483 ) 484 d = np.abs( 485 (v2[:, 0] - v2[:, 0]) * (v1[:, 1] - v0[:, 1]) 486 - (v1[:, 0] - v0[:, 0]) * (v2[:, 1] - v1[:, 1]) 487 ) / np.sqrt( 488 (v2[:, 0] - v1[:, 0]) ** 2 + (v2[:, 1] - v1[:, 1]) ** 2 + 1e-7 489 ) 490 d[(v2[:, 0] == v1[:, 0]) * (v2[:, 1] == v1[:, 1])] = 0 491 dd.append(d) 492 dd = np.stack(dd, axis=0) 493 dd = np.min(dd, 0) 494 distances.append(dd) 495 if len(distances) == 0: 496 distances = None 497 else: 498 distances = np.stack(distances, axis=1) 499 500 if clip1 != clip2: 501 return intra_distance, xy_coord_joints_1, xy_coord_joints_2, n_frames 502 else: 503 return ( 504 intra_distance, 505 xy_coord_joints_1, 506 n_frames, 507 angle_joints_radian, 508 areas, 509 angles, 510 zone_bools, 511 distances, 512 ) 513 514 def _kinematic_features_pair( 515 self, data_dict: Dict, clip1: str, clip2: str, name: str 516 ) -> Dict: 517 """Compute features for a pair of clips.""" 518 if clip1 == clip2: 519 ( 520 intra_distance, 521 xy_coord_joints, 522 n_frames, 523 angle_joints_radian, 524 areas, 525 angles, 526 zone_bools, 527 zone_distances, 528 ) = self._distance(data_dict, clip1, clip2, name) 529 else: 530 ( 531 intra_distance, 532 xy_coord_joints_1, 533 xy_coord_joints_2, 534 n_frames, 535 ) = self._distance(data_dict, clip1, clip2, name) 536 xy_coord_joints = xy_coord_joints_2 - xy_coord_joints_1 537 538 xy_coord_joints = xy_coord_joints.transpose((1, 2, 0)) 539 540 speed_joints = np.diff(xy_coord_joints, axis=-1) 541 speed_joints[xy_coord_joints[..., :-1] == 0] = 0 542 speed_joints = np.insert(speed_joints, 0, speed_joints[:, :, 0], axis=-1) 543 544 # acceleration 545 acc_joints = np.asarray([np.diff(speed_joint) for speed_joint in speed_joints]) 546 acc_joints = np.insert(acc_joints, 0, acc_joints[:, :, 0], axis=-1) 547 acc_joints = np.linalg.norm(acc_joints, axis=1) 548 549 # from matplotlib import pyplot as plt 550 # print(f'{xy_coord_joints.shape=}') 551 # plt.scatter(xy_coord_joints[:, 0, 0], 552 # xy_coord_joints[:, 1, 0]) 553 # plt.xlim(-0.5, 0.5) 554 # plt.ylim(-0.5, 0.5) 555 # plt.show() 556 557 features = {} 558 if "coords" in self.keys: 559 features["coords"] = copy.copy(xy_coord_joints).reshape((-1, n_frames)).T 560 if "center" in self.keys: 561 features["center"] = xy_coord_joints.mean(0).T 562 if "coord_diff" in self.keys: 563 features["coord_diff"] = ( 564 (xy_coord_joints - np.expand_dims(xy_coord_joints.mean(0), 0)) 565 .reshape((-1, n_frames)) 566 .T 567 ) 568 if "intra_distance" in self.keys: 569 features["intra_distance"] = intra_distance 570 if "speed_joints" in self.keys: 571 features["speed_joints"] = speed_joints.reshape((-1, n_frames)).T 572 if "speed_direction" in self.keys or "speed_value" in self.keys: 573 values = np.expand_dims(np.linalg.norm(speed_joints, axis=1), 1) + 1e-7 574 directions = speed_joints / values 575 if "speed_direction" in self.keys: 576 features["speed_direction"] = directions.reshape((-1, n_frames)).T 577 if "speed_value" in self.keys: 578 features["speed_value"] = values.reshape((-1, n_frames)).T 579 if ( 580 "angle_speeds" in self.keys or "angle_joints_radian" in self.keys 581 ) and clip1 == clip2: 582 features["angle_speeds"] = angle_joints_radian 583 if "angles" in self.keys and clip1 == clip2 and self.angle_pairs is not None: 584 features["angles"] = angles 585 if "acc_joints" in self.keys: 586 features["acc_joints"] = acc_joints.T 587 if "areas" in self.keys and clip1 == clip2 and areas is not None: 588 features["areas"] = areas * 10 589 if "zone_bools" in self.keys and clip1 == clip2 and zone_bools is not None: 590 features["zone_bools"] = zone_bools 591 if ( 592 "zone_distances" in self.keys 593 and clip1 == clip2 594 and zone_distances is not None 595 ): 596 features["zone_distances"] = zone_distances 597 if clip1 == clip2 and "likelihood" in self.keys: 598 likelihood = [ 599 self.get_likelihood(data_dict, clip1, bp) for bp in self.get_bodyparts() 600 ] 601 if likelihood[0] is not None: 602 likelihood = np.stack(likelihood, 1) 603 features["likelihood"] = likelihood 604 return features 605 606 def extract_features( 607 self, 608 data_dict: Dict, 609 video_id: str, 610 prefix: str = None, 611 one_clip: bool = False, 612 ) -> Dict: 613 """Extract features from a data dictionary. 614 615 An input store will call this method while pre-computing a dataset. We do not assume a specific 616 structure in the data dictionary, so all necessary information (coordinates of a bodypart, number 617 of frames, list of bodyparts) is inferred using input store methods. 618 619 Parameters 620 ---------- 621 data_dict : dict 622 the data dictionary 623 video_id : str 624 the id of the video associated with the data dictionary 625 prefix : str, optional 626 a prefix for the feature names 627 one_clip : bool, default False 628 if `True`, all features will be concatenated and assigned to one clip named `'all'` 629 630 Returns 631 ------- 632 features : dict 633 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 634 values are numpy arrays of shape `(#features, #frames)` 635 636 """ 637 features = {} 638 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 639 if self.interactive: 640 if one_clip: 641 agents = [keys] 642 else: 643 agents = combinations(keys, 2) 644 else: 645 agents = [[x] for x in keys] 646 for clip_ids in agents: 647 clip_features = {} 648 for clip in clip_ids: 649 single_features = self._kinematic_features_pair( 650 data_dict, clip, clip, video_id 651 ) 652 for key, value in single_features.items(): 653 name = key 654 if prefix is not None or len(clip_ids) > 1: 655 name += "---" 656 if prefix is not None: 657 name += prefix 658 if len(clip_ids) > 1: 659 name += clip 660 clip_features[name] = single_features[key] 661 if len(clip_ids) > 1 and "inter_distance" in self.keys: 662 for clip1, clip2 in combinations(clip_ids, 2): 663 distance, *_ = self._distance( 664 data_dict, clip1, clip2, video_id, centroid=True 665 ) 666 name = "inter_distance---" 667 if prefix is not None: 668 name += prefix 669 name += f"{clip1}+{clip2}" 670 clip_features[name] = distance 671 if one_clip: 672 combo_name = "all" 673 else: 674 combo_name = "+".join(map(str, clip_ids)) 675 features[video_id + "---" + combo_name] = clip_features 676 if self.neighboring_frames != 0: 677 for key in features.keys(): 678 for clip_key in features[key].keys(): 679 new_feature = [] 680 for i in range( 681 self.neighboring_frames + 1, 682 features[key][clip_key].shape[0] - self.neighboring_frames, 683 ): 684 new_feature.append( 685 features[key][clip_key][ 686 i 687 - self.neighboring_frames : i 688 + self.neighboring_frames, 689 :, 690 ].flatten() 691 ) 692 features[key][clip_key] = np.stack(new_feature, axis=0) 693 return features
A feature extractor for basic kinematic features: speeds, accelerations, distances.
The available keys are:
- coords: the allocentric bodypart coordinates,
- coord_diff: the egocentric bodypart coordinates,
- center: the body center (mean of bodyparts) coordinates,
- intra_distance: distances between bodyparts (pairs set in `distance_pairs` or all combinations by default),
- inter_distance: computed in interactive mode (for pairs of animals); distances from each bodypart of each animal to the centroid between them,
- speed_direction: unit vector of speed approximation for each bodypart,
- speed_value: l2 norm of the speed approximation vector for each bodypart,
- acc_joints: l2 norm of the acceleration approximation vector for each bodypart,
- angle_speeds: vector of angle speed approximation for each bodypart,
- angles: cosines of angles set in `angle_pairs`,
- areas: areas of polygons set in `area_vertices`,
- zone_bools: binary identifier of zone visitation, defined in `zone_bools`,
- zone_distances: distance to zone boundary, defined in `zone_distances'`,
- likelihood: pose estimation likelihood (if known).
The default set is {coord_diff, center, intra_distance, inter_distance, speed_direction, speed_value, acc_joints, angle_speeds}
166 def __init__( 167 self, 168 input_store: PoseInputStore, 169 keys: List = None, 170 ignored_clips: List = None, 171 interactive: bool = False, 172 averaging_window: int = 1, 173 distance_pairs: List = None, 174 angle_pairs: List = None, 175 neighboring_frames: int = 0, 176 area_vertices: List = None, 177 zone_vertices: Dict = None, 178 zone_bools: List = None, 179 zone_distances: List = None, 180 *args, 181 **kwargs, 182 ) -> None: 183 """Initialize the extractor. 184 185 Parameters 186 ---------- 187 input_store : PoseInputStore 188 the input store object 189 keys : list, optional 190 a list of names of the features to extract 191 ignored_clips : list, optional 192 a list of clip ids to ignore 193 interactive : bool, default False 194 if `True`, features for pairs of clips will be computed 195 averaging_window : int, default 1 196 if >1, features are averaged with a moving window of this size (in frames) 197 distance_pairs : list, optional 198 a list of bodypart name tuples (e.g. `[("tail", "nose")]`) to compute distances for when `"intra_distance"` 199 is in `keys` (by default all distances are computed) 200 angle_pairs : list, optional 201 a list of bodypart name tuples (e.g. `[("ear1", "nose", "ear2")]`) for the angle between `"ear1"--"nose"` and 202 `"nose"--"ear2"` lines) to compute angle cosines for when `"angles"` is in `keys` (by default no angles are computed) 203 neighboring_frames : int, default 0 204 if >0, this number of neighboring frames is aggregated in the center frame features (generally not recommended) 205 area_vertices : list, optional 206 a list of bodypart name tuples of any length >= 3 (e.g. `[("ear1", "nose", "ear2", "spine1")]`) that define polygons 207 to compute areas for when `"areas"` is in `keys` (by default no areas are computed) 208 zone_vertices : dict, optional 209 a dictionary of bodypart name tuples of any length >= 3 that define zones for `"zone_bools"`and `"zone_distances"` 210 featyres; keys should be zone names and values should be tuples that define the polygons (e.g. 211 `{"main_area": ("x_min", "x_max", "y_max", "y_min"))}`) 212 zone_bools : list, optional 213 a list of zone and bodypart name tuples to compute binary identifiers for (1 if an animal is within the polygon or 214 0 if it's outside) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 215 this is only computed if `"zone_bools"` is in `keys` 216 zone_distances : list, optional 217 a list of zone and bodypart name tuples to compute distances for (distance from the bodypart to the closest of the 218 boundaries) (e.g. `[("main_area", "nose")]`); the zones should be defined in the `zone_vertices` parameter; 219 this is only computed if `"zone_distances"` is in `keys` 220 221 """ 222 if keys is None: 223 keys = [ 224 "coord_diff", 225 "center", 226 "intra_distance", 227 "speed_direction", 228 "speed_value", 229 "angle_speeds", 230 "acc_joints", 231 "inter_distance", 232 ] 233 if ignored_clips is None: 234 ignored_clips = [] 235 if zone_vertices is None: 236 zone_vertices = {} 237 if zone_bools is None: 238 zone_bools = [] 239 if zone_distances is None: 240 zone_distances = [] 241 self.keys = keys 242 self.ignored_clips = ignored_clips 243 self.interactive = interactive 244 self.averaging_window = int(averaging_window) 245 self.distance_pairs = distance_pairs 246 self.angle_pairs = angle_pairs 247 self.area_vertices = area_vertices 248 self.neighboring_frames = int(neighboring_frames) 249 self.zone_vertices = zone_vertices 250 self.zone_bools = zone_bools 251 self.zone_distances = zone_distances 252 super().__init__(input_store)
Initialize the extractor.
Parameters
input_store : PoseInputStore
    the input store object
keys : list, optional
    a list of names of the features to extract
ignored_clips : list, optional
    a list of clip ids to ignore
interactive : bool, default False
    if True, features for pairs of clips will be computed
averaging_window : int, default 1
    if >1, features are averaged with a moving window of this size (in frames)
distance_pairs : list, optional
    a list of bodypart name tuples (e.g. [("tail", "nose")]) to compute distances for when "intra_distance"
    is in keys (by default all distances are computed)
angle_pairs : list, optional
    a list of bodypart name tuples (e.g. [("ear1", "nose", "ear2")]) for the angle between "ear1"--"nose" and
    "nose"--"ear2" lines) to compute angle cosines for when "angles" is in keys (by default no angles are computed)
neighboring_frames : int, default 0
    if >0, this number of neighboring frames is aggregated in the center frame features (generally not recommended)
area_vertices : list, optional
    a list of bodypart name tuples of any length >= 3 (e.g. [("ear1", "nose", "ear2", "spine1")]) that define polygons
    to compute areas for when "areas" is in keys (by default no areas are computed)
zone_vertices : dict, optional
    a dictionary of bodypart name tuples of any length >= 3 that define zones for "zone_bools"and "zone_distances"
    featyres; keys should be zone names and values should be tuples that define the polygons (e.g.
    {"main_area": ("x_min", "x_max", "y_max", "y_min"))})
zone_bools : list, optional
    a list of zone and bodypart name tuples to compute binary identifiers for (1 if an animal is within the polygon or
    0 if it's outside) (e.g. [("main_area", "nose")]); the zones should be defined in the zone_vertices parameter;
    this is only computed if "zone_bools" is in keys
zone_distances : list, optional
    a list of zone and bodypart name tuples to compute distances for (distance from the bodypart to the closest of the
    boundaries) (e.g. [("main_area", "nose")]); the zones should be defined in the zone_vertices parameter;
    this is only computed if "zone_distances" is in keys
606 def extract_features( 607 self, 608 data_dict: Dict, 609 video_id: str, 610 prefix: str = None, 611 one_clip: bool = False, 612 ) -> Dict: 613 """Extract features from a data dictionary. 614 615 An input store will call this method while pre-computing a dataset. We do not assume a specific 616 structure in the data dictionary, so all necessary information (coordinates of a bodypart, number 617 of frames, list of bodyparts) is inferred using input store methods. 618 619 Parameters 620 ---------- 621 data_dict : dict 622 the data dictionary 623 video_id : str 624 the id of the video associated with the data dictionary 625 prefix : str, optional 626 a prefix for the feature names 627 one_clip : bool, default False 628 if `True`, all features will be concatenated and assigned to one clip named `'all'` 629 630 Returns 631 ------- 632 features : dict 633 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 634 values are numpy arrays of shape `(#features, #frames)` 635 636 """ 637 features = {} 638 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 639 if self.interactive: 640 if one_clip: 641 agents = [keys] 642 else: 643 agents = combinations(keys, 2) 644 else: 645 agents = [[x] for x in keys] 646 for clip_ids in agents: 647 clip_features = {} 648 for clip in clip_ids: 649 single_features = self._kinematic_features_pair( 650 data_dict, clip, clip, video_id 651 ) 652 for key, value in single_features.items(): 653 name = key 654 if prefix is not None or len(clip_ids) > 1: 655 name += "---" 656 if prefix is not None: 657 name += prefix 658 if len(clip_ids) > 1: 659 name += clip 660 clip_features[name] = single_features[key] 661 if len(clip_ids) > 1 and "inter_distance" in self.keys: 662 for clip1, clip2 in combinations(clip_ids, 2): 663 distance, *_ = self._distance( 664 data_dict, clip1, clip2, video_id, centroid=True 665 ) 666 name = "inter_distance---" 667 if prefix is not None: 668 name += prefix 669 name += f"{clip1}+{clip2}" 670 clip_features[name] = distance 671 if one_clip: 672 combo_name = "all" 673 else: 674 combo_name = "+".join(map(str, clip_ids)) 675 features[video_id + "---" + combo_name] = clip_features 676 if self.neighboring_frames != 0: 677 for key in features.keys(): 678 for clip_key in features[key].keys(): 679 new_feature = [] 680 for i in range( 681 self.neighboring_frames + 1, 682 features[key][clip_key].shape[0] - self.neighboring_frames, 683 ): 684 new_feature.append( 685 features[key][clip_key][ 686 i 687 - self.neighboring_frames : i 688 + self.neighboring_frames, 689 :, 690 ].flatten() 691 ) 692 features[key][clip_key] = np.stack(new_feature, axis=0) 693 return features
Extract features from a data dictionary.
An input store will call this method while pre-computing a dataset. We do not assume a specific structure in the data dictionary, so all necessary information (coordinates of a bodypart, number of frames, list of bodyparts) is inferred using input store methods.
Parameters
data_dict : dict
    the data dictionary
video_id : str
    the id of the video associated with the data dictionary
prefix : str, optional
    a prefix for the feature names
one_clip : bool, default False
    if True, all features will be concatenated and assigned to one clip named 'all'
Returns
features : dict
    a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the
    values are numpy arrays of shape (#features, #frames)
Inherited Members
696class HeatmapExtractor(PoseFeatureExtractor): 697 """A heatmap feature extractor. 698 699 Creates an image for every frame with keypoints as blurred points on the image. 700 """ 701 702 def __init__( 703 self, 704 input_store: PoseInputStore, 705 canvas_shape: List, 706 heatmap_width: int = 128, 707 keys: Set = None, 708 ignored_clips: List = None, 709 interactive: bool = False, 710 sigma: float = 0.1, 711 channel_policy: str = "color", 712 *args, 713 **kwargs, 714 ) -> None: 715 """Initialize the extractor. 716 717 Parameters 718 ---------- 719 input_store : PoseInputStore 720 the input store object 721 canvas_shape : list 722 the shape of the input data canvas 723 heatmap_width : int, default 128 724 the width of the resulting images (in pixels) 725 keys : set, optional 726 a set of string keys to use (choose from `['coords_heatmap', 'motion_heatmap']`, by default all are used) 727 ignored_clips : set, optional 728 a set of string clip ids to ignore 729 interactive : bool, default False 730 if `True`, features are extracted for pairs of clips 731 sigma : float, default 0.7 732 the standard deviation of the gaussian kernel (0 for no smoothing) 733 channel_policy : {"color", "black&white", "bp"} 734 if "black&white" the heatmaps have one channel and all keypoints are 735 equivalent; if "color" each keypoint is assigned a unique color; if "bp" each keypoint has a separate channel 736 737 """ 738 if ignored_clips is None: 739 ignored_clips = [] 740 if keys is None: 741 keys = ["coords_heatmap", "motion_heatmap"] 742 self.keys = keys 743 self.ignored_clips = ignored_clips 744 self.interactive = interactive 745 self.sigma = sigma 746 self.canvas_shape = canvas_shape 747 self.heatmap_width = heatmap_width - 1 748 x, y = canvas_shape 749 self.image_shape = (heatmap_width, int(y * heatmap_width / x) + 1) 750 self.channel_policy = channel_policy 751 self.cmap = get_cmap("gist_rainbow") 752 super().__init__(input_store) 753 754 def _get_image( 755 self, data_dict: Dict, clip_id: str, n_frames: int, bodyparts: List 756 ) -> np.ndarray: 757 """Generate an array of images from coordinates for one clip.""" 758 policy_dict = {"color": 3, "bp": len(bodyparts), "black&white": 1} 759 image = np.zeros( 760 (n_frames, policy_dict[self.channel_policy], *self.image_shape) 761 ) 762 values = ( 763 np.round( 764 np.stack( 765 [self.get_coords(data_dict, clip_id, bp) for bp in bodyparts], 766 axis=1, 767 ) 768 * self.heatmap_width 769 ) 770 ).astype(int) 771 sample = np.zeros((5, 5)) 772 sample[3, 3] = 1 773 m = np.max(gaussian_filter(sample, sigma=self.sigma)) 774 for i, frame_values in enumerate(values): 775 if self.channel_policy == "black&white": 776 image[i, 0, frame_values[:, 0], frame_values[:, 1]] = 1 777 elif self.channel_policy == "bp": 778 image[ 779 i, 780 list(range(len(frame_values))), 781 frame_values[:, 0], 782 frame_values[:, 1], 783 ] = 1 784 elif self.channel_policy == "color": 785 arr = np.linspace(0, 1, frame_values.shape[0]) 786 for j in range(frame_values.shape[0]): 787 image[i, :, frame_values[j, 0], frame_values[j, 1]] = self.cmap( 788 arr[j] 789 )[:-1] 790 if self.sigma > 0: 791 for channel in range(image.shape[1]): 792 image[i, channel] = gaussian_filter( 793 image[i, channel], sigma=self.sigma 794 ) 795 image /= m 796 return image 797 798 def extract_features( 799 self, 800 data_dict: Dict, 801 video_id: str, 802 prefix: str = None, 803 one_clip: bool = False, 804 ) -> Dict: 805 """Extract features from a data dictionary. 806 807 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 808 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 809 and clip ids. We do not assume a specific 810 structure in the values, so all necessary information (coordinates of a bodypart, number 811 of frames, list of bodyparts) is inferred using input store methods. 812 813 Parameters 814 ---------- 815 data_dict : dict 816 the data dictionary 817 video_id : str 818 the id of the video associated with the data dictionary 819 prefix : str, optional 820 a prefix to add to the feature names 821 one_clip : bool, default False 822 if `True`, all features will be concatenated and assigned to one clip named `'all'` 823 824 Returns 825 ------- 826 features : dict 827 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 828 values are numpy arrays of shape `(#features, ..., #frames)` 829 830 """ 831 features = {} 832 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 833 if self.interactive: 834 if one_clip: 835 agents = [keys] 836 else: 837 agents = combinations(keys, 2) 838 else: 839 agents = [[x] for x in keys] 840 bodyparts = self.get_bodyparts() 841 for clip_ids in agents: 842 clip_features = {} 843 n_frames = self.get_n_frames(data_dict, clip_ids[0]) 844 # n_frames = self.get_n_frames(data_dict, "+".join(sorted(clip_ids))) 845 policy_dict = {"color": 3, "bp": len(bodyparts), "black&white": 1} 846 image = np.zeros( 847 (n_frames, policy_dict[self.channel_policy], *self.image_shape) 848 ) 849 for clip in clip_ids: 850 image += self._get_image(data_dict, clip, n_frames, bodyparts) 851 image = np.clip(image, 0, 1) 852 if "coords_heatmap" in self.keys: 853 name = "coords_heatmap" 854 if prefix is not None: 855 name += "---" 856 name += prefix 857 clip_features[name] = image 858 if "motion_heatmap" in self.keys: 859 name = "motion_heatmap" 860 if prefix is not None: 861 name += "---" 862 name += prefix 863 image = np.diff(image, axis=0) 864 clip_features[name] = np.pad(image, ((1, 0), (0, 0), (0, 0), (0, 0))) 865 if one_clip: 866 combo_name = "all" 867 else: 868 combo_name = "+".join(map(str, clip_ids)) 869 features[video_id + "---" + combo_name] = clip_features 870 return features
A heatmap feature extractor.
Creates an image for every frame with keypoints as blurred points on the image.
702 def __init__( 703 self, 704 input_store: PoseInputStore, 705 canvas_shape: List, 706 heatmap_width: int = 128, 707 keys: Set = None, 708 ignored_clips: List = None, 709 interactive: bool = False, 710 sigma: float = 0.1, 711 channel_policy: str = "color", 712 *args, 713 **kwargs, 714 ) -> None: 715 """Initialize the extractor. 716 717 Parameters 718 ---------- 719 input_store : PoseInputStore 720 the input store object 721 canvas_shape : list 722 the shape of the input data canvas 723 heatmap_width : int, default 128 724 the width of the resulting images (in pixels) 725 keys : set, optional 726 a set of string keys to use (choose from `['coords_heatmap', 'motion_heatmap']`, by default all are used) 727 ignored_clips : set, optional 728 a set of string clip ids to ignore 729 interactive : bool, default False 730 if `True`, features are extracted for pairs of clips 731 sigma : float, default 0.7 732 the standard deviation of the gaussian kernel (0 for no smoothing) 733 channel_policy : {"color", "black&white", "bp"} 734 if "black&white" the heatmaps have one channel and all keypoints are 735 equivalent; if "color" each keypoint is assigned a unique color; if "bp" each keypoint has a separate channel 736 737 """ 738 if ignored_clips is None: 739 ignored_clips = [] 740 if keys is None: 741 keys = ["coords_heatmap", "motion_heatmap"] 742 self.keys = keys 743 self.ignored_clips = ignored_clips 744 self.interactive = interactive 745 self.sigma = sigma 746 self.canvas_shape = canvas_shape 747 self.heatmap_width = heatmap_width - 1 748 x, y = canvas_shape 749 self.image_shape = (heatmap_width, int(y * heatmap_width / x) + 1) 750 self.channel_policy = channel_policy 751 self.cmap = get_cmap("gist_rainbow") 752 super().__init__(input_store)
Initialize the extractor.
Parameters
input_store : PoseInputStore
    the input store object
canvas_shape : list
    the shape of the input data canvas
heatmap_width : int, default 128
    the width of the resulting images (in pixels)
keys : set, optional
    a set of string keys to use (choose from ['coords_heatmap', 'motion_heatmap'], by default all are used)
ignored_clips : set, optional
    a set of string clip ids to ignore
interactive : bool, default False
    if True, features are extracted for pairs of clips
sigma : float, default 0.7
    the standard deviation of the gaussian kernel (0 for no smoothing)
channel_policy : {"color", "black&white", "bp"}
    if "black&white" the heatmaps have one channel and all keypoints are
    equivalent; if "color" each keypoint is assigned a unique color; if "bp" each keypoint has a separate channel
798 def extract_features( 799 self, 800 data_dict: Dict, 801 video_id: str, 802 prefix: str = None, 803 one_clip: bool = False, 804 ) -> Dict: 805 """Extract features from a data dictionary. 806 807 An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one 808 video id and have clip ids as keys. Read the documentation at `dlc2action.data` to find out more about video 809 and clip ids. We do not assume a specific 810 structure in the values, so all necessary information (coordinates of a bodypart, number 811 of frames, list of bodyparts) is inferred using input store methods. 812 813 Parameters 814 ---------- 815 data_dict : dict 816 the data dictionary 817 video_id : str 818 the id of the video associated with the data dictionary 819 prefix : str, optional 820 a prefix to add to the feature names 821 one_clip : bool, default False 822 if `True`, all features will be concatenated and assigned to one clip named `'all'` 823 824 Returns 825 ------- 826 features : dict 827 a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the 828 values are numpy arrays of shape `(#features, ..., #frames)` 829 830 """ 831 features = {} 832 keys = [x for x in data_dict.keys() if x not in self.ignored_clips] 833 if self.interactive: 834 if one_clip: 835 agents = [keys] 836 else: 837 agents = combinations(keys, 2) 838 else: 839 agents = [[x] for x in keys] 840 bodyparts = self.get_bodyparts() 841 for clip_ids in agents: 842 clip_features = {} 843 n_frames = self.get_n_frames(data_dict, clip_ids[0]) 844 # n_frames = self.get_n_frames(data_dict, "+".join(sorted(clip_ids))) 845 policy_dict = {"color": 3, "bp": len(bodyparts), "black&white": 1} 846 image = np.zeros( 847 (n_frames, policy_dict[self.channel_policy], *self.image_shape) 848 ) 849 for clip in clip_ids: 850 image += self._get_image(data_dict, clip, n_frames, bodyparts) 851 image = np.clip(image, 0, 1) 852 if "coords_heatmap" in self.keys: 853 name = "coords_heatmap" 854 if prefix is not None: 855 name += "---" 856 name += prefix 857 clip_features[name] = image 858 if "motion_heatmap" in self.keys: 859 name = "motion_heatmap" 860 if prefix is not None: 861 name += "---" 862 name += prefix 863 image = np.diff(image, axis=0) 864 clip_features[name] = np.pad(image, ((1, 0), (0, 0), (0, 0), (0, 0))) 865 if one_clip: 866 combo_name = "all" 867 else: 868 combo_name = "+".join(map(str, clip_ids)) 869 features[video_id + "---" + combo_name] = clip_features 870 return features
Extract features from a data dictionary.
An input store will call this method while pre-computing a dataset. The data dictionary has to relate to one
video id and have clip ids as keys. Read the documentation at dlc2action.data to find out more about video
and clip ids. We do not assume a specific
structure in the values, so all necessary information (coordinates of a bodypart, number
of frames, list of bodyparts) is inferred using input store methods.
Parameters
data_dict : dict
    the data dictionary
video_id : str
    the id of the video associated with the data dictionary
prefix : str, optional
    a prefix to add to the feature names
one_clip : bool, default False
    if True, all features will be concatenated and assigned to one clip named 'all'
Returns
features : dict
    a features dictionary where the keys are the feature names (e.g. 'coords', 'distances') and the
    values are numpy arrays of shape (#features, ..., #frames)