mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2025-12-11 20:15:30 +01:00
- Replaced complex subquery logic with raw SQL for filtering entries based on device ownership, improving query performance and readability. - Enhanced error handling for UUID synchronization to ensure data consistency. - Streamlined code by consolidating delete and count operations in the entry closure rebuild process. - Improved logging in the backfill manager to provide better insights into state changes and record processing.
65 lines
1.5 KiB
Rust
65 lines
1.5 KiB
Rust
//! Message tokenization for pattern matching.
|
|
|
|
use crate::types::Token;
|
|
|
|
/// Tokenize a message into comparable units.
|
|
///
|
|
/// Splits on whitespace and punctuation while preserving structure.
|
|
pub fn tokenize(message: &str) -> Vec<Token> {
|
|
let mut tokens = Vec::new();
|
|
let mut current = String::new();
|
|
|
|
for ch in message.chars() {
|
|
if ch.is_whitespace() || "(){}[],:=".contains(ch) {
|
|
if !current.is_empty() {
|
|
tokens.push(Token::Word(current.clone()));
|
|
current.clear();
|
|
}
|
|
if !ch.is_whitespace() {
|
|
tokens.push(Token::Punctuation(ch));
|
|
}
|
|
} else {
|
|
current.push(ch);
|
|
}
|
|
}
|
|
|
|
if !current.is_empty() {
|
|
tokens.push(Token::Word(current));
|
|
}
|
|
|
|
tokens
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_tokenize_simple() {
|
|
let tokens = tokenize("Recorded ACK from peer");
|
|
assert_eq!(tokens.len(), 4);
|
|
assert_eq!(tokens[0], Token::Word("Recorded".to_string()));
|
|
assert_eq!(tokens[1], Token::Word("ACK".to_string()));
|
|
assert_eq!(tokens[2], Token::Word("from".to_string()));
|
|
assert_eq!(tokens[3], Token::Word("peer".to_string()));
|
|
}
|
|
|
|
#[test]
|
|
fn test_tokenize_with_punctuation() {
|
|
let tokens = tokenize("peer=1817e146 hlc=HLC(123,1,:device)");
|
|
|
|
// peer = 1817e146 hlc = HLC ( 123 , 1 , : device )
|
|
assert!(tokens.contains(&Token::Word("peer".to_string())));
|
|
assert!(tokens.contains(&Token::Punctuation('=')));
|
|
assert!(tokens.contains(&Token::Punctuation('(')));
|
|
assert!(tokens.contains(&Token::Punctuation(',')));
|
|
}
|
|
|
|
#[test]
|
|
fn test_tokenize_empty() {
|
|
let tokens = tokenize("");
|
|
assert_eq!(tokens.len(), 0);
|
|
}
|
|
}
|
|
|