diff --git a/.github/workflows/analyze.yml b/.github/workflows/analyze.yml new file mode 100644 index 0000000..bda5008 --- /dev/null +++ b/.github/workflows/analyze.yml @@ -0,0 +1,27 @@ +name: Analyze Codebase + +on: + push: + branches: + - main + +jobs: + analyze: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + - name: Run analysis + run: python -m codecritical.cli --path . --detect --output json --report-file codecritical-report.json + - name: Upload report artifact + uses: actions/upload-artifact@v3 + with: + name: codecritical-report + path: codecritical-report.json diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..38a81d0 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,20 @@ +name: Run Tests + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install pytest + - name: Run tests + run: pytest diff --git a/.gitignore b/.gitignore index 5ff6309..5d2863f 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,59 @@ build/ .vscode/ ### Mac OS ### -.DS_Store \ No newline at end of file +.DS_Store + +### Python ### +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +.env +.venv +env/ +venv/ +ENV/ +venv.bak/ +env.bak/ +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.log +*.pot +*.pyt +local_settings.py +db.sqlite3 +db.sqlite3-journal +instance/ +.webassets-cache +.bundle +celerybeat-schedule +celerybeat.pid +*.so +*.suo +*.ntvs* +*.njsproj +*.sln +*.swp diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e903ab9 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,34 @@ +# Contributing to CodeCritical + +First off, thank you for considering contributing to CodeCritical! We welcome any contributions that improve the project, from bug reports to new features. + +## How to Contribute + +- **Reporting Bugs**: If you find a bug, please open an issue and provide as much detail as possible, including the steps to reproduce the bug. +- **Suggesting Enhancements**: If you have an idea for a new feature or an improvement to an existing one, please open an issue to discuss it. +- **Pull Requests**: If you'd like to contribute code, please follow these steps: + 1. Fork the repository. + 2. Create a new branch for your feature or bug fix. + 3. Make your changes and add tests. + 4. Make sure the tests pass by running `pytest`. + 5. Submit a pull request with a clear description of your changes. + +## Development Setup + +1. Clone the repository: + \`\`\`bash + git clone https://github.com/NemesisGuy/CodeCritical.git + \`\`\` +2. Install the dependencies: + \`\`\`bash + pip install -e . + pip install pytest + \`\`\` +3. Run the tests: + \`\`\`bash + pytest + \`\`\` + +## Code Style + +We use the PEP 8 style guide for Python code. Please make sure your code adheres to this style. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..50d53f5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Jules + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 737c453..99062db 100644 --- a/README.md +++ b/README.md @@ -1,99 +1,79 @@ # 📊 CodeCritical -### 🚀 A Comprehensive Java Code Analysis Tool - -Welcome to **CodeCritical**, a powerful and flexible tool designed to give you in-depth insights into your Java codebase. Track important metrics like lines of code, functions, classes, maintainability index, and much more. This tool is designed for developers and teams aiming to measure and improve the quality, maintainability, and readability of their code. - -## 🔥 Key Features of CodeCritical - -### 1. **File Line Counting** -Analyze each `.java` file in your project to gather information such as: -- **Total Lines**: Number of total lines in the file. -- **Code Lines**: Count of lines that represent actual code (excluding comments and empty lines). -- **Comment Lines**: Count of comment lines, including single-line (`//`) and multi-line comments (`/* */`). -- **Blank Lines**: Automatically skips over blank lines, focusing on the meaningful parts of your code. - -> 📊 **Benefit**: Get a clear picture of the structure and scale of your codebase. - -### 2. **Function Counting** -Detect and count the number of function or method signatures in each file. -- Identifies all functions by scanning for patterns like `public`, `private`, `protected`, `static`, etc. -- Useful for determining code complexity and coverage. - -> 🔧 **Benefit**: Easily track function count to evaluate complexity or assist with unit testing coverage. - -### 3. **Class and Interface Detection** -Automatically detects and counts the number of `classes` and `interfaces` declared in each file. -- Keeps track of object-oriented components, allowing you to assess the balance of interfaces vs. classes. - -> 🏗️ **Benefit**: Quickly understand the architecture of your project, identifying class-heavy or interface-heavy areas. - -### 4. **Code Duplication Detection** -Leveraging a **Code Duplication Detector**, this feature searches for identical blocks of code within your files to help you spot potential areas of code repetition and redundancy. -- Detects duplicate code blocks within individual files. -- Helps enforce DRY (Don't Repeat Yourself) principles, making the codebase easier to maintain. - -> ✂️ **Benefit**: Improve code maintainability by eliminating redundant code and reducing future technical debt. - -### 5. **Maintainability Index Calculation** -Calculate the **Maintainability Index** (MI) of your code. This score provides a quantitative measure of how maintainable your code is based on: -- **Lines of Code** (LOC) -- **Cyclomatic Complexity** (how complex the control flow is) -- **Comment Density** (proportion of comments to code) - -The result is an easy-to-understand score: -- **High** (>80): Very maintainable. -- **Moderate** (50-80): Could use some improvements. -- **Low** (<50): Refactoring recommended! - -> 📈 **Benefit**: Get a clear, easy-to-read score that helps prioritize refactoring and improvements to maintainability. - -### 6. **Markdown Report Generation** -All results are output in a **Markdown-formatted table**, providing an easy-to-read, professional report of the analyzed files. This includes: -- File name -- Total lines of code -- Code, comment, and function count -- Duplicates found -- Maintainability Index - -> 📑 **Benefit**: Perfect for GitHub repositories! Easily track code metrics over time by committing the generated Markdown reports for continuous improvement. - -### 7. **Grand Totals** -At the end of the analysis, the tool prints out **Grand Totals** of all the key metrics for the entire codebase: -- Total number of files analyzed -- Total lines of code, comment lines, and function count -- Maintainability index across the entire project - -> 🏅 **Benefit**: Keep track of project-wide trends in your codebase, allowing you to measure progress as you refactor and grow. - ---- - -## 🛠️ Future Features - -- **Cyclomatic Complexity Breakdown**: Include a per-function breakdown of cyclomatic complexity. -- **Extended Language Support**: Expand analysis to additional languages like Python, JavaScript, and more. -- **Real-time Web Dashboard**: Visualize your code metrics in real-time through a web-based dashboard. - ---- +### 🚀 A Comprehensive Code Analysis Tool + +Welcome to **CodeCritical**, a powerful and flexible tool designed to give you in-depth insights into your codebase. Track important metrics like lines of code, functions, classes, and cyclomatic complexity. This tool is designed for developers and teams aiming to measure and improve the quality, maintainability, and readability of their code. + +## 🔥 Key Features + +- **Multi-Language Support**: Analyze Python code out of the box, with a modular design for easily adding more languages. +- **Core Metrics**: Get a clear picture of your codebase with metrics like: + - **Lines of Code**: Total, code, comment, and blank lines. + - **Function & Class Counts**: Understand the structure of your code. + - **Cyclomatic Complexity**: Measure the complexity of your functions to identify areas for refactoring. +- **CLI Interface**: A simple and powerful command-line interface for running analysis. +- **Structured Reports**: Generate machine-readable JSON reports. +- **CI/CD Integration**: Easily integrate with your CI/CD pipelines to track code quality over time. + +## 🛠️ Usage + +### Installation + +\`\`\`bash +pip install -e . +\`\`\` + +### Running Analysis + +Analyze a single file: +\`\`\`bash +python -m codecritical.cli --path path/to/your/file.py --lang python +\`\`\` + +Analyze a whole directory: +\`\`\`bash +python -m codecritical.cli --path path/to/your/repo --detect +\`\`\` + +Generate a JSON report: +\`\`\`bash +python -m codecritical.cli --path . --detect --output json --report-file report.json +\`\`\` + +## JSON Report Schema + +The tool generates a `codecritical-report.json` file with the following schema: + +\`\`\`json +{ + "repo": "name/or/path", + "date": "2025-12-08T00:00:00Z", + "summary": { + "files_scanned": 123, + "languages": ["python"], + "total_lines": 45678 + }, + "languages": { + "python": { + "files": 12, + "lines": 3000, + "functions": 210, + "avg_complexity": 3.4, + "top_complex_files": [ + {"path":"app/foo.py","complexity":18} + ] + } + } +} +\`\`\` ## 👨‍💻 Contributing -Contributions are welcome! Feel free to open an issue or submit a pull request if you'd like to improve CodeCritical or add new features. +Contributions are welcome! Please see the [CONTRIBUTING.md](CONTRIBUTING.md) file for details. ## 👥 Authors - **Peter Buckingham** - [NemesisGuy](https://github.com/NemesisGuy) - ---- +- **Jules** - AI Software Engineer ## 📄 License -This project is licensed under the MIT License – see the [LICENSE](LICENSE.md) file for details. - ---- - -## 🙌 Acknowledgments - -This tool is part of a larger suite of software development tools aimed at improving software quality, maintainability, and developer productivity. - ---- - -🎉 **Happy coding, and may your code always be clean, maintainable, and DRY!** +This project is licensed under the MIT License – see the [LICENSE](LICENSE) file for details. diff --git a/codecritical/__init__.py b/codecritical/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/codecritical/cli.py b/codecritical/cli.py new file mode 100644 index 0000000..1374e9a --- /dev/null +++ b/codecritical/cli.py @@ -0,0 +1,38 @@ +import argparse +import os +from codecritical.parsers.python_parser import parse_python_file +from codecritical.reporters.json_reporter import generate_report + +def main(): + parser = argparse.ArgumentParser(description="CodeCritical: A code analysis tool.") + parser.add_argument('--path', type=str, default='.', help='Path to the repository or file to analyze.') + parser.add_argument('--lang', type=str, help='Language to analyze (e.g., "python").') + parser.add_argument('--detect', action='store_true', help='Automatically detect and analyze all supported languages.') + parser.add_argument('--output', type=str, default='json', help='Output format (json, csv, md, html).') + parser.add_argument('--report-file', type=str, help='File to write the report to.') + + args = parser.parse_args() + + results = [] + if os.path.isfile(args.path): + if args.lang == 'python' or args.path.endswith('.py'): + results.append(parse_python_file(args.path)) + elif os.path.isdir(args.path): + for root, _, files in os.walk(args.path): + for file in files: + filepath = os.path.join(root, file) + if args.lang == 'python' and file.endswith('.py'): + results.append(parse_python_file(filepath)) + elif args.detect and file.endswith('.py'): # Add other languages later + results.append(parse_python_file(filepath)) + + if args.output == 'json': + report = generate_report(results, args.path) + if args.report_file: + with open(args.report_file, 'w') as f: + f.write(report) + else: + print(report) + +if __name__ == '__main__': + main() diff --git a/codecritical/metrics/__init__.py b/codecritical/metrics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/codecritical/metrics/complexity.py b/codecritical/metrics/complexity.py new file mode 100644 index 0000000..6452fd0 --- /dev/null +++ b/codecritical/metrics/complexity.py @@ -0,0 +1,62 @@ +import ast + +class ComplexityVisitor(ast.NodeVisitor): + """ + A visitor to calculate cyclomatic complexity of a Python function. + """ + def __init__(self): + self.complexity = 1 + + def visit_If(self, node): + self.complexity += 1 + if node.orelse: + # Each 'elif' branch adds complexity, but the final 'else' does not. + # An 'elif' is just another If node in the orelse block. + pass + self.generic_visit(node) + + def visit_For(self, node): + self.complexity += 1 + self.generic_visit(node) + + def visit_While(self, node): + self.complexity += 1 + self.generic_visit(node) + + def visit_ExceptHandler(self, node): + # Each 'except' block adds one to complexity + self.complexity += 1 + self.generic_visit(node) + + def visit_With(self, node): + self.complexity += 1 + self.generic_visit(node) + + def visit_AsyncWith(self, node): + self.complexity += 1 + self.generic_visit(node) + + def visit_Assert(self, node): + self.complexity += 1 + self.generic_visit(node) + + def visit_BoolOp(self, node): + # For 'and' and 'or', each value is a branch + if isinstance(node.op, (ast.And, ast.Or)): + self.complexity += len(node.values) - 1 + self.generic_visit(node) + + def visit_comprehension(self, node): + # For list/dict/set comprehensions + self.complexity += 1 # For the 'for' loop + self.complexity += len(node.ifs) # For each 'if' condition + self.generic_visit(node) + + +def get_function_complexity(func_node): + """ + Calculates the cyclomatic complexity of a single function AST node. + """ + visitor = ComplexityVisitor() + visitor.visit(func_node) + return visitor.complexity diff --git a/codecritical/parsers/__init__.py b/codecritical/parsers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/codecritical/parsers/python_parser.py b/codecritical/parsers/python_parser.py new file mode 100644 index 0000000..022ecc2 --- /dev/null +++ b/codecritical/parsers/python_parser.py @@ -0,0 +1,68 @@ +import ast +from codecritical.metrics.complexity import get_function_complexity + +def parse_python_file(filepath): + """ + Parses a Python file to extract code metrics. + """ + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + lines = content.splitlines() + + total_lines = len(lines) + code_lines = 0 + comment_lines = 0 + blank_lines = 0 + + for line in lines: + stripped_line = line.strip() + if not stripped_line: + blank_lines += 1 + elif stripped_line.startswith('#'): + comment_lines += 1 + else: + code_lines += 1 + + try: + tree = ast.parse(content) + except SyntaxError: + return { + 'filepath': filepath, + 'error': 'SyntaxError: could not parse file' + } + + functions = [] + classes = [] + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef): + complexity = get_function_complexity(node) + functions.append({ + 'name': node.name, + 'lineno': node.lineno, + 'complexity': complexity + }) + elif isinstance(node, ast.ClassDef): + classes.append({ + 'name': node.name, + 'lineno': node.lineno + }) + + avg_complexity = 0 + if functions: + avg_complexity = sum(f['complexity'] for f in functions) / len(functions) + + + return { + 'filepath': filepath, + 'lines': { + 'total': total_lines, + 'code': code_lines, + 'comment': comment_lines, + 'blank': blank_lines, + }, + 'functions': functions, + 'classes': classes, + 'function_count': len(functions), + 'class_count': len(classes), + 'avg_complexity': avg_complexity, + } diff --git a/codecritical/reporters/__init__.py b/codecritical/reporters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/codecritical/reporters/json_reporter.py b/codecritical/reporters/json_reporter.py new file mode 100644 index 0000000..6867859 --- /dev/null +++ b/codecritical/reporters/json_reporter.py @@ -0,0 +1,61 @@ +import json +import datetime + +def generate_report(results, repo_path): + """ + Generates a JSON report from the analysis results. + """ + summary = { + 'files_scanned': len(results), + 'languages': [], + 'total_lines': 0 + } + + languages = {} + + for result in results: + if 'error' in result: + continue + + lang = 'python' # Hardcoded for now + if lang not in languages: + languages[lang] = { + 'files': 0, + 'lines': 0, + 'functions': 0, + 'avg_complexity': 0, + 'top_complex_files': [] + } + summary['languages'].append(lang) + + languages[lang]['files'] += 1 + languages[lang]['lines'] += result['lines']['total'] + languages[lang]['functions'] += result['function_count'] + + # This is a simple sum for now, will be averaged later + languages[lang]['avg_complexity'] += result['avg_complexity'] + + if result['avg_complexity'] > 0: + languages[lang]['top_complex_files'].append({ + 'path': result['filepath'], + 'complexity': result['avg_complexity'] + }) + + # Calculate average complexity + for lang in languages: + if languages[lang]['files'] > 0: + languages[lang]['avg_complexity'] /= languages[lang]['files'] + + # Sort top complex files + languages[lang]['top_complex_files'].sort(key=lambda x: x['complexity'], reverse=True) + languages[lang]['top_complex_files'] = languages[lang]['top_complex_files'][:5] + + + report = { + 'repo': repo_path, + 'date': datetime.datetime.utcnow().isoformat(), + 'summary': summary, + 'languages': languages + } + + return json.dumps(report, indent=2) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8036b5f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "codecritical" +version = "0.1.0" +description = "A code analysis tool for multiple languages." +authors = ["Jules"] +license = "MIT" + +[tool.poetry.dependencies] +python = "^3.9" + +[tool.poetry.dev-dependencies] +pytest = "^6.2" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..f5b90ed --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,29 @@ +import pytest +import subprocess +import json + +@pytest.fixture +def sample_python_file(tmp_path): + content = """ +def hello(name): + print(f"Hello, {name}!") +""" + p = tmp_path / "sample.py" + p.write_text(content) + return str(p) + +def test_cli_with_file(sample_python_file): + result = subprocess.run( + ['python', '-m', 'codecritical.cli', '--path', sample_python_file, '--lang', 'python', '--output', 'json'], + capture_output=True, + text=True + ) + + assert result.returncode == 0 + report = json.loads(result.stdout) + + assert report['repo'] == sample_python_file + assert report['summary']['files_scanned'] == 1 + assert report['languages']['python']['files'] == 1 + assert report['languages']['python']['functions'] == 1 + assert report['languages']['python']['avg_complexity'] == 1.0 diff --git a/tests/test_python_parser.py b/tests/test_python_parser.py new file mode 100644 index 0000000..a743f7d --- /dev/null +++ b/tests/test_python_parser.py @@ -0,0 +1,50 @@ +import pytest +from codecritical.parsers.python_parser import parse_python_file + +@pytest.fixture +def sample_python_file(tmp_path): + content = """ +def hello(name): + print(f"Hello, {name}!") + +class Greeter: + def __init__(self, greeting): + self.greeting = greeting + + def greet(self, name): + if self.greeting == "Hello": + print(f"Hello, {name}!") + else: + print(f"{self.greeting}, {name}!") +""" + p = tmp_path / "sample.py" + p.write_text(content) + return str(p) + +def test_parse_python_file(sample_python_file): + result = parse_python_file(sample_python_file) + + assert result['filepath'] == sample_python_file + assert result['lines']['total'] == 13 + assert result['function_count'] == 3 + assert result['class_count'] == 1 + + # hello complexity = 1 + # __init__ complexity = 1 + # greet complexity = 2 (1 for if) + assert result['avg_complexity'] == (1 + 1 + 2) / 3 + + func_complexities = {f['name']: f['complexity'] for f in result['functions']} + assert func_complexities['hello'] == 1 + assert func_complexities['__init__'] == 1 + assert func_complexities['greet'] == 2 + +def test_parse_python_file_with_syntax_error(tmp_path): + content = """ +def hello(name) + print(f"Hello, {name}!") +""" + p = tmp_path / "sample.py" + p.write_text(content) + result = parse_python_file(str(p)) + assert 'error' in result