From 8c84bbfde4cbe3e88216c804c92a4d89b861decc Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Mon, 4 Sep 2023 21:46:57 +0200 Subject: [PATCH] Prune old egui memory data when reaching some limit (#3299) * Add generations to serialized state * Make IdTypeMap into a field struct * Less code duplication * Implement garbage collection during serialization * Add unit-test * Add docstring * Build fix * another fix --- crates/egui/src/util/id_type_map.rs | 378 +++++++++++++++++++++++----- 1 file changed, 312 insertions(+), 66 deletions(-) diff --git a/crates/egui/src/util/id_type_map.rs b/crates/egui/src/util/id_type_map.rs index 1f2960e6..a9867eb1 100644 --- a/crates/egui/src/util/id_type_map.rs +++ b/crates/egui/src/util/id_type_map.rs @@ -3,8 +3,7 @@ // For non-serializable types, these simply return `None`. // This will also allow users to pick their own serialization format per type. -use std::any::Any; -use std::sync::Arc; +use std::{any::Any, sync::Arc}; // ----------------------------------------------------------------------------------------------- @@ -32,6 +31,8 @@ impl From for TypeId { } } +impl nohash_hasher::IsEnabled for TypeId {} + // ----------------------------------------------------------------------------------------------- #[cfg(feature = "persistence")] @@ -54,11 +55,21 @@ impl SerializableAny for T where T: 'static + Any + Clone + for<'a> Send + Sy // ----------------------------------------------------------------------------------------------- -#[cfg(feature = "persistence")] #[cfg_attr(feature = "persistence", derive(serde::Deserialize, serde::Serialize))] +#[derive(Clone, Debug)] struct SerializedElement { + /// The type of value we are storing. type_id: TypeId, + + /// The ron data we can deserialize. ron: Arc, + + /// Increased by one each time we re-serialize an element that was never deserialized. + /// + /// Large value = old value that hasn't been read in a while. + /// + /// Used to garbage collect old values that hasn't been read in a while. + generation: usize, } #[cfg(feature = "persistence")] @@ -80,13 +91,7 @@ enum Element { }, /// A serialized value - Serialized { - /// The type of value we are storing. - type_id: TypeId, - - /// The ron data we can deserialize. - ron: Arc, - }, + Serialized(SerializedElement), } impl Clone for Element { @@ -104,10 +109,7 @@ impl Clone for Element { serialize_fn: *serialize_fn, }, - Self::Serialized { type_id, ron } => Self::Serialized { - type_id: *type_id, - ron: ron.clone(), - }, + Self::Serialized(element) => Self::Serialized(element.clone()), } } } @@ -116,13 +118,18 @@ impl std::fmt::Debug for Element { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match &self { Self::Value { value, .. } => f - .debug_struct("MaybeSerializable::Value") + .debug_struct("Element::Value") .field("type_id", &value.type_id()) .finish_non_exhaustive(), - Self::Serialized { type_id, ron } => f - .debug_struct("MaybeSerializable::Serialized") - .field("type_id", &type_id) - .field("ron", &ron) + Self::Serialized(SerializedElement { + type_id, + ron, + generation, + }) => f + .debug_struct("Element::Serialized") + .field("type_id", type_id) + .field("ron", ron) + .field("generation", generation) .finish(), } } @@ -165,7 +172,7 @@ impl Element { pub(crate) fn type_id(&self) -> TypeId { match self { Self::Value { value, .. } => (**value).type_id().into(), - Self::Serialized { type_id, .. } => *type_id, + Self::Serialized(SerializedElement { type_id, .. }) => *type_id, } } @@ -173,7 +180,7 @@ impl Element { pub(crate) fn get_temp(&self) -> Option<&T> { match self { Self::Value { value, .. } => value.downcast_ref(), - Self::Serialized { .. } => None, + Self::Serialized(_) => None, } } @@ -181,7 +188,7 @@ impl Element { pub(crate) fn get_mut_temp(&mut self) -> Option<&mut T> { match self { Self::Value { value, .. } => value.downcast_mut(), - Self::Serialized { .. } => None, + Self::Serialized(_) => None, } } @@ -196,14 +203,14 @@ impl Element { *self = Self::new_temp(insert_with()); } } - Self::Serialized { .. } => { + Self::Serialized(_) => { *self = Self::new_temp(insert_with()); } } match self { Self::Value { value, .. } => value.downcast_mut().unwrap(), // This unwrap will never panic because we already converted object to required type - Self::Serialized { .. } => unreachable!(), + Self::Serialized(_) => unreachable!(), } } @@ -220,19 +227,19 @@ impl Element { } #[cfg(feature = "persistence")] - Self::Serialized { ron, .. } => { + Self::Serialized(SerializedElement { ron, .. }) => { *self = Self::new_persisted(from_ron_str::(ron).unwrap_or_else(insert_with)); } #[cfg(not(feature = "persistence"))] - Self::Serialized { .. } => { + Self::Serialized(_) => { *self = Self::new_persisted(insert_with()); } } match self { Self::Value { value, .. } => value.downcast_mut().unwrap(), // This unwrap will never panic because we already converted object to required type - Self::Serialized { .. } => unreachable!(), + Self::Serialized(_) => unreachable!(), } } @@ -241,17 +248,17 @@ impl Element { Self::Value { value, .. } => value.downcast_mut(), #[cfg(feature = "persistence")] - Self::Serialized { ron, .. } => { + Self::Serialized(SerializedElement { ron, .. }) => { *self = Self::new_persisted(from_ron_str::(ron)?); match self { Self::Value { value, .. } => value.downcast_mut(), - Self::Serialized { .. } => unreachable!(), + Self::Serialized(_) => unreachable!(), } } #[cfg(not(feature = "persistence"))] - Self::Serialized { .. } => None, + Self::Serialized(_) => None, } } @@ -268,15 +275,13 @@ impl Element { Some(SerializedElement { type_id: (**value).type_id().into(), ron: ron.into(), + generation: 1, }) } else { None } } - Self::Serialized { type_id, ron } => Some(SerializedElement { - type_id: *type_id, - ron: ron.clone(), - }), + Self::Serialized(element) => Some(element.clone()), } } } @@ -341,23 +346,36 @@ use crate::Id; /// assert_eq!(map.get_persisted::(b), Some(13.37)); /// assert_eq!(map.get_temp::(b), Some("Hello World".to_owned())); /// ``` -#[derive(Clone, Debug, Default)] -// We store use `id XOR typeid` as a key, so we don't need to hash again! -pub struct IdTypeMap(nohash_hasher::IntMap); +#[derive(Clone, Debug)] +// We use `id XOR typeid` as a key, so we don't need to hash again! +pub struct IdTypeMap { + map: nohash_hasher::IntMap, + + max_bytes_per_type: usize, +} + +impl Default for IdTypeMap { + fn default() -> Self { + Self { + map: Default::default(), + max_bytes_per_type: 256 * 1024, + } + } +} impl IdTypeMap { /// Insert a value that will not be persisted. #[inline] pub fn insert_temp(&mut self, id: Id, value: T) { let hash = hash(TypeId::of::(), id); - self.0.insert(hash, Element::new_temp(value)); + self.map.insert(hash, Element::new_temp(value)); } /// Insert a value that will be persisted next time you start the app. #[inline] pub fn insert_persisted(&mut self, id: Id, value: T) { let hash = hash(TypeId::of::(), id); - self.0.insert(hash, Element::new_persisted(value)); + self.map.insert(hash, Element::new_persisted(value)); } /// Read a value without trying to deserialize a persisted value. @@ -366,7 +384,7 @@ impl IdTypeMap { #[inline] pub fn get_temp(&self, id: Id) -> Option { let hash = hash(TypeId::of::(), id); - self.0.get(&hash).and_then(|x| x.get_temp()).cloned() + self.map.get(&hash).and_then(|x| x.get_temp()).cloned() } /// Read a value, optionally deserializing it if available. @@ -378,7 +396,7 @@ impl IdTypeMap { #[inline] pub fn get_persisted(&mut self, id: Id) -> Option { let hash = hash(TypeId::of::(), id); - self.0 + self.map .get_mut(&hash) .and_then(|x| x.get_mut_persisted()) .cloned() @@ -418,7 +436,7 @@ impl IdTypeMap { ) -> &mut T { let hash = hash(TypeId::of::(), id); use std::collections::hash_map::Entry; - match self.0.entry(hash) { + match self.map.entry(hash) { Entry::Vacant(vacant) => vacant .insert(Element::new_temp(insert_with())) .get_mut_temp() @@ -436,7 +454,7 @@ impl IdTypeMap { ) -> &mut T { let hash = hash(TypeId::of::(), id); use std::collections::hash_map::Entry; - match self.0.entry(hash) { + match self.map.entry(hash) { Entry::Vacant(vacant) => vacant .insert(Element::new_persisted(insert_with())) .get_mut_persisted() @@ -447,17 +465,28 @@ impl IdTypeMap { } } + /// For tests + #[cfg(feature = "persistence")] + #[allow(unused)] + fn get_generation(&self, id: Id) -> Option { + let element = self.map.get(&hash(TypeId::of::(), id))?; + match element { + Element::Value { .. } => Some(0), + Element::Serialized(SerializedElement { generation, .. }) => Some(*generation), + } + } + /// Remove the state of this type an id. #[inline] pub fn remove(&mut self, id: Id) { let hash = hash(TypeId::of::(), id); - self.0.remove(&hash); + self.map.remove(&hash); } /// Note all state of the given type. pub fn remove_by_type(&mut self) { let key = TypeId::of::(); - self.0.retain(|_, e| { + self.map.retain(|_, e| { let e: &Element = e; e.type_id() != key }); @@ -465,32 +494,32 @@ impl IdTypeMap { #[inline] pub fn clear(&mut self) { - self.0.clear(); + self.map.clear(); } #[inline] pub fn is_empty(&self) -> bool { - self.0.is_empty() + self.map.is_empty() } #[inline] pub fn len(&self) -> usize { - self.0.len() + self.map.len() } /// Count how many values are stored but not yet deserialized. #[inline] pub fn count_serialized(&self) -> usize { - self.0 + self.map .values() - .filter(|e| matches!(e, Element::Serialized { .. })) + .filter(|e| matches!(e, Element::Serialized(_))) .count() } /// Count the number of values are stored with the given type. pub fn count(&self) -> usize { let key = TypeId::of::(); - self.0 + self.map .iter() .filter(|(_, e)| { let e: &Element = e; @@ -498,6 +527,28 @@ impl IdTypeMap { }) .count() } + + /// The maximum number of bytes that will be used to + /// store the persisted state of a single widget type. + /// + /// Some egui widgets store persisted state that is + /// serialized to disk by some backends (e.g. `eframe`). + /// + /// Example of such widgets is `CollapsingHeader` and `Window`. + /// If you keep creating widgets with unique ids (e.g. `Windows` with many different names), + /// egui will use up more and more space for these widgets, until this limit is reached. + /// + /// Once this limit is reached, the state that was read the longest time ago will be dropped first. + /// + /// This value in itself will not be serialized. + pub fn max_bytes_per_type(&self) -> usize { + self.max_bytes_per_type + } + + /// See [`Self::max_bytes_per_type`]. + pub fn set_max_bytes_per_type(&mut self, max_bytes_per_type: usize) { + self.max_bytes_per_type = max_bytes_per_type; + } } #[inline(always)] @@ -516,25 +567,94 @@ struct PersistedMap(Vec<(u64, SerializedElement)>); impl PersistedMap { fn from_map(map: &IdTypeMap) -> Self { crate::profile_function!(); - // filter out the elements which cannot be serialized: - Self( - map.0 - .iter() - .filter_map(|(&hash, element)| Some((hash, element.to_serialize()?))) - .collect(), - ) + + use std::collections::BTreeMap; + + let mut types_map: nohash_hasher::IntMap = Default::default(); + #[derive(Default)] + struct TypeStats { + num_bytes: usize, + generations: BTreeMap, + } + #[derive(Default)] + struct GenerationStats { + num_bytes: usize, + elements: Vec<(u64, SerializedElement)>, + } + + let max_bytes_per_type = map.max_bytes_per_type; + + { + crate::profile_scope!("gather"); + for (hash, element) in &map.map { + if let Some(element) = element.to_serialize() { + let mut stats = types_map.entry(element.type_id).or_default(); + stats.num_bytes += element.ron.len(); + let mut generation_stats = + stats.generations.entry(element.generation).or_default(); + generation_stats.num_bytes += element.ron.len(); + generation_stats.elements.push((*hash, element)); + } else { + // temporary value that shouldn't be serialized + } + } + } + + let mut persisted = vec![]; + + { + crate::profile_scope!("gc"); + for stats in types_map.values() { + let mut bytes_written = 0; + + // Start with the most recently read values, and then go as far as we are allowed. + // Always include at least one generation. + for generation in stats.generations.values() { + if bytes_written == 0 + || bytes_written + generation.num_bytes <= max_bytes_per_type + { + persisted.append(&mut generation.elements.clone()); + bytes_written += generation.num_bytes; + } else { + // Omit the rest. The user hasn't read the values in a while. + break; + } + } + } + } + + Self(persisted) } fn into_map(self) -> IdTypeMap { crate::profile_function!(); - IdTypeMap( - self.0 - .into_iter() - .map(|(hash, SerializedElement { type_id, ron })| { - (hash, Element::Serialized { type_id, ron }) - }) - .collect(), - ) + let map = self + .0 + .into_iter() + .map( + |( + hash, + SerializedElement { + type_id, + ron, + generation, + }, + )| { + ( + hash, + Element::Serialized(SerializedElement { + type_id, + ron, + generation: generation + 1, // This is where we increment the generation! + }), + ) + }, + ) + .collect(); + IdTypeMap { + map, + ..Default::default() + } } } @@ -731,3 +851,129 @@ fn test_mix_serialize() { ); assert_eq!(map.get_temp::(id), Some(Serializable(555))); } + +#[cfg(feature = "persistence")] +#[test] +fn test_serialize_generations() { + use serde::{Deserialize, Serialize}; + + fn serialize_and_deserialize(map: &IdTypeMap) -> IdTypeMap { + let serialized = ron::to_string(map).unwrap(); + ron::from_str(&serialized).unwrap() + } + + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] + struct A(i32); + + let mut map: IdTypeMap = Default::default(); + for i in 0..3 { + map.insert_persisted(Id::new(i), A(i)); + } + for i in 0..3 { + assert_eq!(map.get_generation::(Id::new(i)), Some(0)); + } + + map = serialize_and_deserialize(&map); + + // We use generation 0 for non-serilized, + // 1 for things that have been serialized but never deserialized, + // and then we increment with 1 on each deserialize. + // So we should have generation 2 now: + for i in 0..3 { + assert_eq!(map.get_generation::(Id::new(i)), Some(2)); + } + + // Reading should reset: + assert_eq!(map.get_persisted::(Id::new(0)), Some(A(0))); + assert_eq!(map.get_generation::(Id::new(0)), Some(0)); + + // Generations should increment: + map = serialize_and_deserialize(&map); + assert_eq!(map.get_generation::(Id::new(0)), Some(2)); + assert_eq!(map.get_generation::(Id::new(1)), Some(3)); +} + +#[cfg(feature = "persistence")] +#[test] +fn test_serialize_gc() { + use serde::{Deserialize, Serialize}; + + fn serialize_and_deserialize(mut map: IdTypeMap, max_bytes_per_type: usize) -> IdTypeMap { + map.set_max_bytes_per_type(max_bytes_per_type); + let serialized = ron::to_string(&map).unwrap(); + ron::from_str(&serialized).unwrap() + } + + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] + struct A(usize); + + #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] + struct B(usize); + + let mut map: IdTypeMap = Default::default(); + + let num_a = 1_000; + let num_b = 10; + + for i in 0..num_a { + map.insert_persisted(Id::new(i), A(i)); + } + for i in 0..num_b { + map.insert_persisted(Id::new(i), B(i)); + } + + map = serialize_and_deserialize(map, 100); + + // We always serialize at least one generation: + assert_eq!(map.count::(), num_a); + assert_eq!(map.count::(), num_b); + + // Create a new small generation: + map.insert_persisted(Id::new(1_000_000), A(1_000_000)); + map.insert_persisted(Id::new(1_000_000), B(1_000_000)); + + assert_eq!(map.count::(), num_a + 1); + assert_eq!(map.count::(), num_b + 1); + + // And read a value: + assert_eq!(map.get_persisted::(Id::new(0)), Some(A(0))); + assert_eq!(map.get_persisted::(Id::new(0)), Some(B(0))); + + map = serialize_and_deserialize(map, 100); + + assert_eq!( + map.count::(), + 2, + "We should have dropped the oldest generation, but kept the new value and the read value" + ); + assert_eq!( + map.count::(), + num_b + 1, + "B should fit under the byte limit" + ); + + // Create another small generation: + map.insert_persisted(Id::new(2_000_000), A(2_000_000)); + map.insert_persisted(Id::new(2_000_000), B(2_000_000)); + + map = serialize_and_deserialize(map, 100); + + assert_eq!(map.count::(), 3); // The read value, plus the two new ones + assert_eq!(map.count::(), num_b + 2); // all the old ones, plus two new ones + + // Lower the limit, and we should only have the latest generation: + + map = serialize_and_deserialize(map, 1); + + assert_eq!(map.count::(), 1); + assert_eq!(map.count::(), 1); + + assert_eq!( + map.get_persisted::(Id::new(2_000_000)), + Some(A(2_000_000)) + ); + assert_eq!( + map.get_persisted::(Id::new(2_000_000)), + Some(B(2_000_000)) + ); +}