Skip to content

Commit 36a7d23

Browse files
committed
Check pt MCP
1 parent c57b5d4 commit 36a7d23

File tree

7 files changed

+544
-54
lines changed

7 files changed

+544
-54
lines changed

crates/diff-engine/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pub mod hungarian_matcher;
1111
pub mod matching;
1212
pub mod refactoring;
1313
pub mod similarity_scorer;
14+
pub mod smart_matcher;
1415
pub mod tree_edit;
1516

1617
pub use changes::{
@@ -34,6 +35,7 @@ pub use cross_file_tracker::{
3435
MoveType, FileTrackingStats, CrossFileTrackingStats
3536
};
3637
pub use matching::{FunctionMatcher, SimilarityScore};
38+
pub use smart_matcher::{SmartMatcher, SmartMatcherConfig};
3739
pub use refactoring::{
3840
RefactoringDetector, RefactoringDetectionConfig, RefactoringPattern,
3941
RefactoringAnalysis, RefactoringCharacteristic, RefactoringCharacteristicType,
Lines changed: 358 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
//! Smart function matching with practical heuristics
2+
//!
3+
//! This module implements a practical function matching algorithm that prioritizes
4+
//! common real-world scenarios over theoretical optimality. It uses smart rules to
5+
//! handle same-named functions, simple functions, and cross-file moves.
6+
7+
use smart_diff_parser::{Function, MatchResult, Change, ChangeType, CodeElement};
8+
use std::collections::{HashMap, HashSet};
9+
10+
/// Configuration for smart matching
11+
#[derive(Debug, Clone)]
12+
pub struct SmartMatcherConfig {
13+
/// Minimum similarity threshold for matching functions
14+
pub similarity_threshold: f64,
15+
/// Whether to enable cross-file matching
16+
pub enable_cross_file_matching: bool,
17+
/// Penalty factor for cross-file matches (0.0 to 1.0)
18+
pub cross_file_penalty: f64,
19+
}
20+
21+
impl Default for SmartMatcherConfig {
22+
fn default() -> Self {
23+
Self {
24+
similarity_threshold: 0.7,
25+
enable_cross_file_matching: true,
26+
cross_file_penalty: 0.5,
27+
}
28+
}
29+
}
30+
31+
/// Smart function matcher using practical heuristics
32+
pub struct SmartMatcher {
33+
config: SmartMatcherConfig,
34+
}
35+
36+
impl SmartMatcher {
37+
pub fn new(config: SmartMatcherConfig) -> Self {
38+
Self { config }
39+
}
40+
41+
/// Match functions between two sets using smart heuristics
42+
pub fn match_functions(
43+
&self,
44+
source_functions: &[Function],
45+
target_functions: &[Function],
46+
) -> MatchResult {
47+
let mut result = MatchResult::new();
48+
49+
if source_functions.is_empty() && target_functions.is_empty() {
50+
result.similarity = 1.0;
51+
return result;
52+
}
53+
54+
// Track which target functions have been matched
55+
let mut matched_targets = HashSet::new();
56+
57+
// First pass: Match functions greedily by best similarity
58+
for source_func in source_functions {
59+
let mut best_match: Option<(usize, f64)> = None;
60+
61+
for (target_idx, target_func) in target_functions.iter().enumerate() {
62+
if matched_targets.contains(&target_idx) {
63+
continue;
64+
}
65+
66+
let similarity = self.calculate_function_similarity(source_func, target_func);
67+
68+
// Apply cross-file penalty to matching threshold, not to similarity score
69+
let same_file = source_func.location.file_path == target_func.location.file_path;
70+
let matching_threshold = if !same_file && self.config.enable_cross_file_matching {
71+
// Require higher similarity for cross-file matches
72+
self.config.similarity_threshold.max(0.85)
73+
} else {
74+
self.config.similarity_threshold
75+
};
76+
77+
if similarity >= matching_threshold {
78+
if let Some((_, best_sim)) = best_match {
79+
if similarity > best_sim {
80+
best_match = Some((target_idx, similarity));
81+
}
82+
} else {
83+
best_match = Some((target_idx, similarity));
84+
}
85+
}
86+
}
87+
88+
if let Some((target_idx, similarity)) = best_match {
89+
let target_func = &target_functions[target_idx];
90+
matched_targets.insert(target_idx);
91+
92+
// Add to mapping
93+
result.mapping.insert(source_func.hash.clone(), target_func.hash.clone());
94+
95+
// Create change record if not identical
96+
if similarity < 1.0 {
97+
let source_element = CodeElement::from_function(source_func);
98+
let target_element = CodeElement::from_function(target_func);
99+
100+
let change_type = self.classify_change_type(source_func, target_func, similarity);
101+
let description = self.change_type_description(&change_type);
102+
103+
let mut change = Change::new(
104+
change_type,
105+
format!(
106+
"Function '{}' {} (similarity: {:.2})",
107+
source_func.signature.name,
108+
description,
109+
similarity
110+
),
111+
);
112+
change.source = Some(source_element);
113+
change.target = Some(target_element);
114+
change.confidence = similarity;
115+
change.details.similarity_score = Some(similarity);
116+
117+
result.changes.push(change);
118+
}
119+
} else {
120+
// Function was deleted
121+
result.unmatched_source.push(source_func.hash.clone());
122+
123+
let source_element = CodeElement::from_function(source_func);
124+
let mut change = Change::new(
125+
ChangeType::Delete,
126+
format!("Function '{}' deleted", source_func.signature.name),
127+
);
128+
change.source = Some(source_element);
129+
change.target = None;
130+
change.confidence = 1.0;
131+
132+
result.changes.push(change);
133+
}
134+
}
135+
136+
// Second pass: Find added functions
137+
for (target_idx, target_func) in target_functions.iter().enumerate() {
138+
if !matched_targets.contains(&target_idx) {
139+
result.unmatched_target.push(target_func.hash.clone());
140+
141+
let target_element = CodeElement::from_function(target_func);
142+
let mut change = Change::new(
143+
ChangeType::Add,
144+
format!("Function '{}' added", target_func.signature.name),
145+
);
146+
change.source = None;
147+
change.target = Some(target_element);
148+
change.confidence = 1.0;
149+
150+
result.changes.push(change);
151+
}
152+
}
153+
154+
result.calculate_similarity();
155+
result
156+
}
157+
158+
/// Calculate similarity between two functions using smart rules
159+
fn calculate_function_similarity(&self, func1: &Function, func2: &Function) -> f64 {
160+
let same_file = func1.location.file_path == func2.location.file_path;
161+
let same_name = func1.signature.name == func2.signature.name;
162+
163+
// Rule 1: Same-named functions in same file should always match
164+
if same_name && same_file {
165+
// Base similarity on content/body
166+
let body_similarity = self.calculate_body_similarity(&func1.body, &func2.body);
167+
return 0.7 + (body_similarity * 0.3); // Minimum 70% for same name, up to 100%
168+
}
169+
170+
// Rule 2: Don't match simple functions unless identical
171+
if self.is_simple_function(func1) || self.is_simple_function(func2) {
172+
if same_name && func1.hash == func2.hash {
173+
return 1.0;
174+
} else {
175+
return 0.0;
176+
}
177+
}
178+
179+
// Rule 3: Regular similarity calculation for complex functions
180+
let mut score = 0.0;
181+
let mut weight = 0.0;
182+
183+
// Calculate name similarity first to determine if this is a potential rename
184+
let name_sim = if same_name {
185+
1.0
186+
} else {
187+
self.string_similarity(&func1.signature.name, &func2.signature.name)
188+
};
189+
190+
// Name similarity (30% weight)
191+
let name_weight = 0.3;
192+
if same_name {
193+
score += name_weight;
194+
} else {
195+
score += name_weight * name_sim * 0.5;
196+
}
197+
weight += name_weight;
198+
199+
// Signature similarity (20% weight)
200+
let sig_weight = 0.2;
201+
let sig_sim = func1.signature.similarity(&func2.signature);
202+
score += sig_weight * sig_sim;
203+
weight += sig_weight;
204+
205+
// Body similarity (50% weight) - highest weight
206+
let body_weight = 0.5;
207+
let body_sim = self.calculate_body_similarity(&func1.body, &func2.body);
208+
score += body_weight * body_sim;
209+
weight += body_weight;
210+
211+
let mut final_score = if weight > 0.0 { score / weight } else { 0.0 };
212+
213+
// Rule 3a: Stricter matching for different-named functions
214+
// If names are different, require high body similarity to avoid matching
215+
// unrelated functions with similar structure
216+
if !same_name {
217+
// Same file, different names: likely different functions, not renames
218+
// Require very high body similarity (95%) to match
219+
if same_file && body_sim < 0.95 {
220+
return 0.0;
221+
}
222+
223+
// Cross-file matches with different names
224+
// For highly similar names (0.8-1.0), require 85% body similarity
225+
if name_sim >= 0.8 && body_sim < 0.85 {
226+
return 0.0;
227+
}
228+
// For moderately similar names (0.5-0.8), require 92% body similarity
229+
if name_sim >= 0.5 && name_sim < 0.8 && body_sim < 0.92 {
230+
return 0.0;
231+
}
232+
// For very different names (< 0.5), require 95% body similarity
233+
if name_sim < 0.5 && body_sim < 0.95 {
234+
return 0.0;
235+
}
236+
}
237+
238+
// For cross-file matching, apply penalty only to the matching threshold,
239+
// not to the similarity score itself. The similarity score should reflect
240+
// actual code similarity, not matching confidence.
241+
if !same_file && !self.config.enable_cross_file_matching {
242+
0.0 // Don't match across files if disabled
243+
} else {
244+
final_score
245+
}
246+
}
247+
248+
/// Check if a function is "simple" (small body, likely a getter/setter/wrapper)
249+
fn is_simple_function(&self, func: &Function) -> bool {
250+
// Count non-empty nodes in the body
251+
let node_count = self.count_ast_nodes(&func.body);
252+
// Simple functions: single statement wrappers, getters, setters
253+
// Typically have 10 or fewer AST nodes
254+
node_count <= 10
255+
}
256+
257+
/// Count AST nodes recursively
258+
fn count_ast_nodes(&self, node: &smart_diff_parser::ASTNode) -> usize {
259+
1 + node.children.iter().map(|child| self.count_ast_nodes(child)).sum::<usize>()
260+
}
261+
262+
/// Calculate body similarity using AST structure
263+
fn calculate_body_similarity(
264+
&self,
265+
body1: &smart_diff_parser::ASTNode,
266+
body2: &smart_diff_parser::ASTNode,
267+
) -> f64 {
268+
// Simple structural similarity based on node count and depth
269+
let count1 = self.count_ast_nodes(body1);
270+
let count2 = self.count_ast_nodes(body2);
271+
272+
let depth1 = self.calculate_ast_depth(body1);
273+
let depth2 = self.calculate_ast_depth(body2);
274+
275+
// Node count similarity (60%)
276+
let count_sim = if count1.max(count2) == 0 {
277+
1.0
278+
} else {
279+
count1.min(count2) as f64 / count1.max(count2) as f64
280+
};
281+
282+
// Depth similarity
283+
let depth_sim = if depth1.max(depth2) == 0 {
284+
1.0
285+
} else {
286+
1.0 - ((depth1 as i32 - depth2 as i32).abs() as f64 / depth1.max(depth2) as f64)
287+
};
288+
289+
// Content similarity - compare actual text content
290+
let content_sim = self.string_similarity(
291+
&body1.metadata.original_text,
292+
&body2.metadata.original_text,
293+
);
294+
295+
// Weighted combination: structure (30%) + content (70%)
296+
// Content is more important to avoid matching structurally similar but semantically different code
297+
count_sim * 0.15 + depth_sim * 0.15 + content_sim * 0.7
298+
}
299+
300+
/// Calculate AST depth
301+
fn calculate_ast_depth(&self, node: &smart_diff_parser::ASTNode) -> usize {
302+
if node.children.is_empty() {
303+
1
304+
} else {
305+
1 + node.children.iter().map(|child| self.calculate_ast_depth(child)).max().unwrap_or(0)
306+
}
307+
}
308+
309+
/// Simple string similarity using character overlap
310+
fn string_similarity(&self, s1: &str, s2: &str) -> f64 {
311+
if s1 == s2 {
312+
return 1.0;
313+
}
314+
if s1.is_empty() || s2.is_empty() {
315+
return 0.0;
316+
}
317+
318+
let chars1: HashSet<char> = s1.chars().collect();
319+
let chars2: HashSet<char> = s2.chars().collect();
320+
321+
let intersection = chars1.intersection(&chars2).count();
322+
let union = chars1.union(&chars2).count();
323+
324+
if union == 0 {
325+
0.0
326+
} else {
327+
intersection as f64 / union as f64
328+
}
329+
}
330+
331+
/// Classify the type of change between two matched functions
332+
fn classify_change_type(&self, func1: &Function, func2: &Function, similarity: f64) -> ChangeType {
333+
let same_file = func1.location.file_path == func2.location.file_path;
334+
let same_name = func1.signature.name == func2.signature.name;
335+
336+
if !same_file && same_name {
337+
ChangeType::CrossFileMove
338+
} else if !same_name && similarity > 0.9 {
339+
ChangeType::Rename
340+
} else if !same_file {
341+
ChangeType::Move
342+
} else {
343+
ChangeType::Modify
344+
}
345+
}
346+
347+
/// Get human-readable description of change type
348+
fn change_type_description(&self, change_type: &ChangeType) -> &'static str {
349+
match change_type {
350+
ChangeType::Modify => "modified",
351+
ChangeType::Rename => "renamed",
352+
ChangeType::Move => "moved",
353+
ChangeType::CrossFileMove => "moved to different file",
354+
_ => "changed",
355+
}
356+
}
357+
}
358+

crates/mcp-server/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ async-stream = "0.3"
3838
walkdir = "2.0"
3939
ignore = "0.4"
4040

41+
# Diff generation
42+
similar = "2.0"
43+
4144
# HTTP server for SSE transport
4245
axum = { version = "0.7", features = ["http2"] }
4346
tower = "0.4"

0 commit comments

Comments
 (0)