diff --git a/src/analyzer/code_analyzer.py b/src/analyzer/code_analyzer.py index 38d7b45..42adeef 100644 --- a/src/analyzer/code_analyzer.py +++ b/src/analyzer/code_analyzer.py @@ -24,9 +24,14 @@ def __init__(self): self.hot_paths: List[List[str]] = [] self.dependencies = nx.DiGraph() self.complexity: Dict[str, int] = {} + # Cache for expression type inference to avoid redundant work + self._expr_type_cache: Dict[int, str] = {} def analyze_file(self, file_path: Path) -> AnalysisResult: """Analyze a Python file and return the results.""" + # Clear caches for a fresh analysis run + self._expr_type_cache.clear() + with open(file_path, 'r') as f: content = f.read() @@ -124,8 +129,55 @@ def _infer_variable_type(self, node: ast.Assign) -> None: self.type_info[node.targets[0].id] = f'std::set<{elt_type}>' else: self.type_info[node.targets[0].id] = 'std::set' # Default + elif isinstance(node.targets[0], ast.Tuple): + # Handle tuple unpacking assignments + if isinstance(node.value, ast.Call): + if isinstance(node.value.func, ast.Name): + func_name = node.value.func.id + if func_name in self.type_info: + return_type = self.type_info[func_name].get('return_type', 'std::tuple') + if return_type.startswith('std::tuple<'): + types = return_type[11:-1].split(', ') + for i, target in enumerate(node.targets[0].elts): + if i < len(types): + if isinstance(target, ast.Tuple): + nested_types = types[i][11:-1].split(',') + for j, nested_target in enumerate(target.elts): + if j < len(nested_types): + self.type_info[nested_target.id] = nested_types[j] + else: + self.type_info[nested_target.id] = 'int' + else: + self.type_info[target.id] = types[i] + else: + self.type_info[target.id] = 'int' + else: + for target in node.targets[0].elts: + if isinstance(target, ast.Tuple): + for nested_target in target.elts: + self.type_info[nested_target.id] = 'int' + else: + self.type_info[target.id] = 'int' + elif isinstance(node.value, ast.Tuple): + for i, (target, value) in enumerate(zip(node.targets[0].elts, node.value.elts)): + if isinstance(target, ast.Tuple): + if isinstance(value, ast.Tuple): + for j, (nested_target, nested_value) in enumerate(zip(target.elts, value.elts)): + self.type_info[nested_target.id] = self._infer_expression_type(nested_value) + else: + for nested_target in target.elts: + self.type_info[nested_target.id] = 'int' + else: + self.type_info[target.id] = self._infer_expression_type(value) + else: + for target in node.targets[0].elts: + if isinstance(target, ast.Tuple): + for nested_target in target.elts: + self.type_info[nested_target.id] = 'int' + else: + self.type_info[target.id] = 'int' elif isinstance(node.value, ast.Tuple): - # For tuples, we'll use std::tuple + # For tuples, assign tuple type to variable if node.value.elts: elt_types = [] for elt in node.value.elts: @@ -134,7 +186,6 @@ def _infer_variable_type(self, node: ast.Assign) -> None: elif isinstance(elt, ast.Subscript): elt_types.append(self._get_type_name(elt)) elif isinstance(elt, ast.Tuple): - # Handle nested tuples nested_types = [] for nested_elt in elt.elts: nested_types.append(self._infer_expression_type(nested_elt)) @@ -212,42 +263,56 @@ def _infer_variable_type(self, node: ast.Assign) -> None: def _infer_expression_type(self, node: ast.AST) -> str: """Infer the type of an expression.""" - print(f"Inferring expression type for: {type(node)}") + # Define a set of cacheable node types + cacheable_node_types = {ast.Constant, ast.Name, ast.List, ast.Dict, ast.Set, ast.Tuple} + cache_key = id(node) if type(node) in cacheable_node_types else None + if cache_key is not None and cache_key in self._expr_type_cache: + return self._expr_type_cache[cache_key] + + self.logger.debug(f"Inferring expression type for: {type(node)}") + result: str = 'int' + if isinstance(node, ast.Constant): if isinstance(node.value, int): - return 'int' + result = 'int' elif isinstance(node.value, float): - return 'double' + result = 'double' elif isinstance(node.value, str): - return 'std::string' + result = 'std::string' elif isinstance(node.value, bool): - return 'bool' + result = 'bool' + else: + result = 'int' elif isinstance(node, ast.Name): if node.id == 'int': - return 'int' + result = 'int' elif node.id == 'float': - return 'double' + result = 'double' elif node.id == 'str': - return 'std::string' + result = 'std::string' elif node.id == 'bool': - return 'bool' - return node.id + result = 'bool' + else: + result = node.id elif isinstance(node, ast.List): if node.elts: elt_type = self._infer_expression_type(node.elts[0]) - return f'std::vector<{elt_type}>' - return 'std::vector' + result = f'std::vector<{elt_type}>' + else: + result = 'std::vector' elif isinstance(node, ast.Dict): if node.keys and node.values: key_type = self._infer_expression_type(node.keys[0]) value_type = self._infer_expression_type(node.values[0]) - return f'std::map<{key_type}, {value_type}>' - return 'std::map' + result = f'std::map<{key_type}, {value_type}>' + else: + result = 'std::map' elif isinstance(node, ast.Set): if node.elts: elt_type = self._infer_expression_type(node.elts[0]) - return f'std::set<{elt_type}>' - return 'std::set' + result = f'std::set<{elt_type}>' + else: + result = 'std::set' elif isinstance(node, ast.Tuple): if node.elts: elt_types = [] @@ -258,16 +323,18 @@ def _infer_expression_type(self, node: ast.AST) -> str: elt_types.append(self._get_type_name(elt)) else: elt_types.append(self._infer_expression_type(elt)) - return f'std::tuple<{", ".join(elt_types)}>' - return 'std::tuple<>' + result = f'std::tuple<{", ".join(elt_types)}>' + else: + result = 'std::tuple<>' elif isinstance(node, ast.BinOp): # For binary operations, infer type based on operands left_type = self._infer_expression_type(node.left) right_type = self._infer_expression_type(node.right) # If either operand is double, result is double if 'double' in (left_type, right_type): - return 'double' - return 'int' + result = 'double' + else: + result = 'int' elif isinstance(node, ast.Subscript): if isinstance(node.value, ast.Name): base_type = node.value.id @@ -275,18 +342,18 @@ def _infer_expression_type(self, node: ast.AST) -> str: elt = node.slice.value else: # Python 3.9 and later elt = node.slice - + if base_type == 'list': - return f'std::vector<{self._infer_expression_type(elt)}>' + result = f'std::vector<{self._infer_expression_type(elt)}>' elif base_type == 'dict': if isinstance(elt, ast.Tuple): key_type = self._infer_expression_type(elt.elts[0]) value_type = self._infer_expression_type(elt.elts[1]) - return f'std::map<{key_type}, {value_type}>' + result = f'std::map<{key_type}, {value_type}>' else: - return f'std::map' + result = f'std::map' elif base_type == 'set': - return f'std::set<{self._infer_expression_type(elt)}>' + result = f'std::set<{self._infer_expression_type(elt)}>' elif base_type == 'tuple': if isinstance(elt, ast.Tuple): elt_types = [] @@ -297,13 +364,19 @@ def _infer_expression_type(self, node: ast.AST) -> str: elt_types.append(self._get_type_name(e)) else: elt_types.append(self._infer_expression_type(e)) - return f'std::tuple<{", ".join(elt_types)}>' + result = f'std::tuple<{", ".join(elt_types)}>' else: - return f'std::tuple<{self._infer_expression_type(elt)}>' + result = f'std::tuple<{self._infer_expression_type(elt)}>' else: - return base_type - return 'int' # Default - return 'int' # Default type + result = base_type + else: + result = 'int' + else: + result = 'int' + + if cache_key is not None: + self._expr_type_cache[cache_key] = result + return result def _analyze_control_flow(self, node: ast.AST) -> None: """Analyze control flow structures.""" diff --git a/src/analyzer/code_analyzer_fixed.py b/src/analyzer/code_analyzer_fixed.py index 1a49fdf..33b0469 100644 --- a/src/analyzer/code_analyzer_fixed.py +++ b/src/analyzer/code_analyzer_fixed.py @@ -41,11 +41,15 @@ def __init__(self): self.hot_paths: List[List[str]] = [] self.dependencies = nx.DiGraph() self.complexity: Dict[str, int] = {} + # Cache mapping id(node) -> inferred C++ type + self._expr_type_cache: Dict[int, str] = {} def analyze_file(self, file_path: Path) -> AnalysisResult: """Analyze a Python file and return the results.""" logger.info(f"Analyzing Python code: {file_path}") try: + # Clear expression type cache for a fresh analysis run + self._expr_type_cache.clear() with open(file_path, 'r') as f: content = f.read() @@ -439,52 +443,66 @@ def _assign_default_types_to_tuple(self, target_tuple: ast.Tuple) -> None: def _infer_expression_type(self, node: ast.AST) -> str: """Infer the type of an expression.""" + # Define cacheable node types + cacheable_node_types = (ast.Constant, ast.Name, ast.List, ast.Dict, ast.Tuple, ast.Set) + cache_key = id(node) if isinstance(node, cacheable_node_types) else None + if cache_key is not None and cache_key in self._expr_type_cache: + return self._expr_type_cache[cache_key] + + result: str = 'int' + if isinstance(node, ast.Constant): if isinstance(node.value, bool): # Check bool first (bool is a subclass of int) - return 'bool' + result = 'bool' elif isinstance(node.value, int): - return 'int' + result = 'int' elif isinstance(node.value, float): - return 'double' + result = 'double' elif isinstance(node.value, str): - return 'std::string' + result = 'std::string' elif node.value is None: - return 'std::nullptr_t' + result = 'std::nullptr_t' elif isinstance(node, ast.Name): # Check if we already know the type of this variable if node.id in self.type_info: type_info = self.type_info[node.id] if isinstance(type_info, str): - return type_info - - # Otherwise infer from common names - if node.id == 'int': - return 'int' - elif node.id == 'float': - return 'double' - elif node.id == 'str': - return 'std::string' - elif node.id == 'bool': - return 'bool' - elif node.id == 'None': - return 'std::nullptr_t' - return 'int' # Default to int for unknown variables + result = type_info + else: + result = 'int' + else: + # Otherwise infer from common names + if node.id == 'int': + result = 'int' + elif node.id == 'float': + result = 'double' + elif node.id == 'str': + result = 'std::string' + elif node.id == 'bool': + result = 'bool' + elif node.id == 'None': + result = 'std::nullptr_t' + else: + result = 'int' elif isinstance(node, ast.List): if node.elts: elt_type = self._infer_expression_type(node.elts[0]) - return f'std::vector<{elt_type}>' - return 'std::vector' + result = f'std::vector<{elt_type}>' + else: + result = 'std::vector' elif isinstance(node, ast.Dict): if node.keys and node.values: key_type = self._infer_expression_type(node.keys[0]) value_type = self._infer_expression_type(node.values[0]) - return f'std::map<{key_type}, {value_type}>' - return 'std::map' + result = f'std::map<{key_type}, {value_type}>' + else: + result = 'std::map' elif isinstance(node, ast.Set): if node.elts: elt_type = self._infer_expression_type(node.elts[0]) - return f'std::set<{elt_type}>' - return 'std::set' + result = f'std::set<{elt_type}>' + else: + result = 'std::set' elif isinstance(node, ast.Tuple): if node.elts: elt_types = [] @@ -495,32 +513,35 @@ def _infer_expression_type(self, node: ast.AST) -> str: elt_types.append(self._get_type_name(elt)) else: elt_types.append(self._infer_expression_type(elt)) - return f'std::tuple<{", ".join(elt_types)}>' - return 'std::tuple<>' + result = f'std::tuple<{", ".join(elt_types)}>' + else: + result = 'std::tuple<>' elif isinstance(node, ast.BinOp): # For binary operations, infer type based on operands left_type = self._infer_expression_type(node.left) right_type = self._infer_expression_type(node.right) # If either operand is double, result is double if 'double' in (left_type, right_type): - return 'double' + result = 'double' # If string + string, result is string - if left_type == 'std::string' and right_type == 'std::string': - return 'std::string' - return 'int' + elif left_type == 'std::string' and right_type == 'std::string': + result = 'std::string' + else: + result = 'int' elif isinstance(node, ast.UnaryOp): # Infer type based on operand operand_type = self._infer_expression_type(node.operand) # For not operator, result is bool if isinstance(node.op, ast.Not): - return 'bool' - return operand_type + result = 'bool' + else: + result = operand_type elif isinstance(node, ast.Compare): # Compare always returns bool - return 'bool' + result = 'bool' elif isinstance(node, ast.BoolOp): # Boolean operations always return bool - return 'bool' + result = 'bool' elif isinstance(node, ast.Call): # Try to infer return type from function if isinstance(node.func, ast.Name): @@ -528,34 +549,40 @@ def _infer_expression_type(self, node: ast.AST) -> str: if func_name in self.type_info: func_info = self.type_info[func_name] if isinstance(func_info, dict) and 'return_type' in func_info: - return func_info['return_type'] - - # Common built-in functions - if func_name == 'int': - return 'int' - elif func_name == 'float': - return 'double' - elif func_name == 'str': - return 'std::string' - elif func_name == 'bool': - return 'bool' - elif func_name == 'list': - return 'std::vector' - elif func_name == 'dict': - return 'std::map' - elif func_name == 'set': - return 'std::set' - elif func_name == 'tuple': - return 'std::tuple' - elif func_name == 'sum': - return 'int' - elif func_name == 'len': - return 'int' - elif func_name == 'min' or func_name == 'max': - if node.args: - return self._infer_expression_type(node.args[0]) - return 'int' - return 'int' # Default for unknown functions + result = func_info['return_type'] + else: + result = 'int' + else: + # Common built-in functions + if func_name == 'int': + result = 'int' + elif func_name == 'float': + result = 'double' + elif func_name == 'str': + result = 'std::string' + elif func_name == 'bool': + result = 'bool' + elif func_name == 'list': + result = 'std::vector' + elif func_name == 'dict': + result = 'std::map' + elif func_name == 'set': + result = 'std::set' + elif func_name == 'tuple': + result = 'std::tuple' + elif func_name == 'sum': + result = 'int' + elif func_name == 'len': + result = 'int' + elif func_name == 'min' or func_name == 'max': + if node.args: + result = self._infer_expression_type(node.args[0]) + else: + result = 'int' + else: + result = 'int' + else: + result = 'int' elif isinstance(node, ast.Subscript): # Handle container access if isinstance(node.value, ast.Name): @@ -565,29 +592,39 @@ def _infer_expression_type(self, node: ast.AST) -> str: # Extract inner type from container types if isinstance(type_info, str): if type_info.startswith('std::vector<'): - return type_info[12:-1] # Extract T from std::vector + result = type_info[12:-1] + self._expr_type_cache[id(node)] = result + return result elif type_info.startswith('std::map<'): - # Return value type from std::map parts = type_info[9:-1].split(', ') if len(parts) > 1: - return parts[1] + result = parts[1] + self._expr_type_cache[id(node)] = result + return result elif type_info.startswith('std::tuple<'): - # For tuples, would need to know which index is being accessed - # Default to first type for now parts = type_info[11:-1].split(', ') if parts: - return parts[0] + result = parts[0] + self._expr_type_cache[id(node)] = result + return result # Try to infer from value type value_type = self._infer_expression_type(node.value) if value_type.startswith('std::vector<'): - return value_type[12:-1] # Extract T from std::vector + result = value_type[12:-1] elif value_type.startswith('std::map<'): - # Return value type from std::map parts = value_type[9:-1].split(', ') if len(parts) > 1: - return parts[1] - return 'int' # Default type - return 'int' # Default type for unknown expressions + result = parts[1] + else: + result = 'int' + else: + result = 'int' + else: + result = 'int' + + if cache_key is not None: + self._expr_type_cache[cache_key] = result + return result def _infer_function_types(self, node: ast.FunctionDef) -> None: """Infer function parameter and return types.""" diff --git a/src/converter/code_generator.py b/src/converter/code_generator.py index d59c5f1..660b890 100644 --- a/src/converter/code_generator.py +++ b/src/converter/code_generator.py @@ -88,6 +88,8 @@ def generate_code(self, analysis_result: AnalysisResult, output_dir: Path) -> No def _generate_header(self, analysis_result: Dict) -> str: """Generate C++ header file.""" + if not isinstance(analysis_result, dict): + analysis_result = analysis_result.__dict__ header = """#pragma once #include @@ -120,6 +122,8 @@ def _generate_header(self, analysis_result: Dict) -> str: def _generate_implementation(self, analysis_result: Dict) -> str: """Generate C++ implementation file.""" + if not isinstance(analysis_result, dict): + analysis_result = analysis_result.__dict__ impl = """#include "generated.hpp" #include #include