From 8189a50f81916c2c03b711da90297ce517673b5e Mon Sep 17 00:00:00 2001 From: teddy Kernix <93723692+tgamiette@users.noreply.github.com> Date: Tue, 18 Feb 2025 09:42:58 +0100 Subject: [PATCH 1/3] (fix): update performance test --- .gitignore | 1 + .../chunking/ChunkingPerformanceTest.java | 312 ++++++++++++++---- 2 files changed, 257 insertions(+), 56 deletions(-) diff --git a/.gitignore b/.gitignore index cee1840..0e7b12b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ # windsurf rules .windsurfrules +java/src/test/resources/ \ No newline at end of file diff --git a/java/src/test/java/com/goofy/GoofyFiles/chunking/ChunkingPerformanceTest.java b/java/src/test/java/com/goofy/GoofyFiles/chunking/ChunkingPerformanceTest.java index 27437c8..935d474 100644 --- a/java/src/test/java/com/goofy/GoofyFiles/chunking/ChunkingPerformanceTest.java +++ b/java/src/test/java/com/goofy/GoofyFiles/chunking/ChunkingPerformanceTest.java @@ -1,14 +1,23 @@ package com.goofy.GoofyFiles.chunking; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - +import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; +import java.io.FileWriter; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Random; +import java.util.stream.Collectors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -17,78 +26,269 @@ public class ChunkingPerformanceTest { private ChunkingService chunkingService; - private static final int[] FILE_SIZES = { - 1 * 1024 * 1024, // 1 MB - 10 * 1024 * 1024, // 10 MB - 50 * 1024 * 1024 // 50 MB - }; + private static final Path TEST_FILES_DIR = Paths.get("src/test/resources/test-files"); + private static final Path PROJECT_ROOT = Paths.get(".").toAbsolutePath().normalize(); + + // Extensions de fichiers à rechercher pour chaque type + private static final Map FILE_PATTERNS = new HashMap<>(); + static { + FILE_PATTERNS.put("Texte", new String[]{".txt", ".md", ".json"}); + FILE_PATTERNS.put("CSV", new String[]{".csv"}); + FILE_PATTERNS.put("Log", new String[]{".log"}); + FILE_PATTERNS.put("Binaire", new String[]{".bin", ".dat"}); + FILE_PATTERNS.put("Archive", new String[]{".zip", ".jar"}); + } @BeforeEach - void setUp() throws NoSuchAlgorithmException { + void setUp() throws NoSuchAlgorithmException, IOException { chunkingService = new ChunkingService(); + System.out.println("Recherche de fichiers dans: " + PROJECT_ROOT); + + // Créer le répertoire de test s'il n'existe pas + Files.createDirectories(TEST_FILES_DIR); } @Test - void testChunkingPerformance() throws IOException { - System.out.println("\n=== Test de Performance du Chunking ==="); - System.out.println("Format: Taille | Temps | Vitesse | Nb Chunks | Taille Moy | Duplication"); - System.out.println("--------------------------------------------------------"); + void testChunkingPerformanceOnFiles() throws IOException { + System.out.println("\n=== Test de Performance sur Fichiers ==="); + System.out.println("Format: Type | Fichier | Taille | Temps | Vitesse | Chunks (Total/Uniques) | Gain"); + System.out.println("------------------------------------------------------------------------"); - for (int fileSize : FILE_SIZES) { - File testFile = createTestFile(fileSize); + // Pour chaque type de fichier + for (Map.Entry entry : FILE_PATTERNS.entrySet()) { + String fileType = entry.getKey(); + String[] extensions = entry.getValue(); + + System.out.println("\n=== Test du type: " + fileType + " ==="); - StopWatch stopWatch = new StopWatch(); - stopWatch.start(); - List chunks = chunkingService.chunkFile(testFile); - stopWatch.stop(); - - double timeInSeconds = stopWatch.getTotalTimeSeconds(); - double speedMBps = (fileSize / (1024.0 * 1024.0)) / timeInSeconds; - double avgChunkSize = fileSize / (double) chunks.size(); - long uniqueChunks = chunks.stream() - .map(Chunk::getHash) - .distinct() - .count(); - double duplicationRate = 1.0 - ((double) uniqueChunks / chunks.size()); - - System.out.printf("%5.1f MB | %6.3f s | %6.1f MB/s | %8d | %7.0f B | %6.2f%%%n", - fileSize / (1024.0 * 1024.0), - timeInSeconds, - speedMBps, - chunks.size(), - avgChunkSize, - duplicationRate * 100); - - testFile.delete(); - - assertNotNull(chunks); - assertTrue(chunks.size() > 0); + // Chercher des fichiers réels dans le projet + List realFiles = findRealFiles(extensions); + + if (!realFiles.isEmpty()) { + // Utiliser les vrais fichiers trouvés + System.out.println("Utilisation de fichiers réels trouvés dans le projet:"); + for (File file : realFiles) { + testFilePerformance(fileType, file); + } + } else { + // Créer des fichiers de test si aucun fichier réel n'est trouvé + System.out.println("Aucun fichier réel trouvé, utilisation de fichiers générés:"); + List testFiles = new ArrayList<>(); + for (int size : new int[]{1, 10,50,100}) { // 1MB et 10MB + File testFile = createTestFile(fileType, size); + testFiles.add(testFile); + testFilePerformance(fileType, testFile); + } + + // Nettoyage des fichiers de test + for (File file : testFiles) { + Files.deleteIfExists(file.toPath()); + } + } + } + } + + private List findRealFiles(String[] extensions) throws IOException { + List files = new ArrayList<>(); + try (var walk = Files.walk(PROJECT_ROOT)) { + files = walk + .filter(Files::isRegularFile) + .filter(p -> { + String name = p.toString().toLowerCase(); + return Arrays.stream(extensions) + .anyMatch(ext -> name.endsWith(ext.toLowerCase())); + }) + .map(Path::toFile) + .filter(f -> f.length() > 0 && f.length() <= 50 * 1024 * 1024) // Fichiers entre 0 et 50MB + .sorted((f1, f2) -> Long.compare(f2.length(), f1.length())) // Plus grands fichiers d'abord + .limit(2) // Limiter à 2 fichiers par type + .collect(Collectors.toList()); } + return files; } - private File createTestFile(int size) throws IOException { - File file = File.createTempFile("perf-test-", ".dat"); - file.deleteOnExit(); + private File createTestFile(String fileType, int sizeMB) throws IOException { + String fileName = String.format("%s-%dMB%s", + fileType.toLowerCase(), + sizeMB, + FILE_PATTERNS.get(fileType)[0]); // Utilise la première extension du type + + File file = TEST_FILES_DIR.resolve(fileName).toFile(); + + switch (fileType) { + case "Texte": + createTextFile(file, sizeMB); + break; + case "CSV": + createCsvFile(file, sizeMB); + break; + case "Log": + createLogFile(file, sizeMB); + break; + case "Binaire": + createBinaryFile(file, sizeMB); + break; + case "Archive": + createZipFile(file, sizeMB); + break; + default: + throw new IllegalArgumentException("Type de fichier non supporté: " + fileType); + } + return file; + } + + private void createTextFile(File file, int sizeMB) throws IOException { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) { + String[] paragraphs = { + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. ", + "Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ", + "Ut enim ad minim veniam, quis nostrud exercitation ullamco. ", + "Duis aute irure dolor in reprehenderit in voluptate velit esse. " + }; + + long targetSize = sizeMB * 1024L * 1024L; + while (file.length() < targetSize) { + for (String para : paragraphs) { + writer.write(para); + if (Math.random() < 0.3) { + writer.write(para); + } + } + writer.write("\n"); + } + } + } + + private void createCsvFile(File file, int sizeMB) throws IOException { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) { + writer.write("id,date,user,action,status,amount\n"); + + long targetSize = sizeMB * 1024L * 1024L; + int id = 1; + String[] actions = {"LOGIN", "PURCHASE", "VIEW", "LOGOUT"}; + String[] statuses = {"SUCCESS", "PENDING", "FAILED"}; + + while (file.length() < targetSize) { + String date = String.format("2024-02-%02d", (id % 28) + 1); + String action = actions[id % actions.length]; + String status = statuses[id % statuses.length]; + double amount = Math.round(Math.random() * 1000 * 100.0) / 100.0; + + writer.write(String.format("%d,%s,user%d,%s,%s,%.2f\n", + id, date, (id % 100) + 1, action, status, amount)); + id++; + } + } + } - // Créer des données avec un certain degré de répétition - byte[] repeatingPattern = new byte[1024]; // 1KB pattern - new Random().nextBytes(repeatingPattern); + private void createLogFile(File file, int sizeMB) throws IOException { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) { + String[] levels = {"INFO", "WARN", "ERROR", "DEBUG"}; + String[] messages = { + "User authentication successful", + "Database connection timeout", + "Invalid request parameters", + "Cache miss for key: %s", + "Processing batch job #%d" + }; + + long targetSize = sizeMB * 1024L * 1024L; + int lineCount = 0; + + while (file.length() < targetSize) { + String timestamp = String.format("2024-02-%02d %02d:%02d:%02d.%03d", + (lineCount % 28) + 1, + lineCount % 24, + (lineCount * 7) % 60, + (lineCount * 13) % 60, + lineCount % 1000); + + String level = levels[lineCount % levels.length]; + String message = messages[lineCount % messages.length]; + if (message.contains("%s")) { + message = String.format(message, "cache_" + lineCount); + } else if (message.contains("%d")) { + message = String.format(message, lineCount); + } + + writer.write(String.format("%s [%s] %s%n", timestamp, level, message)); + lineCount++; + } + } + } + private void createBinaryFile(File file, int sizeMB) throws IOException { try (FileOutputStream fos = new FileOutputStream(file)) { - int written = 0; - while (written < size) { - // 70% chance d'écrire le pattern répétitif, 30% chance de données aléatoires + byte[] pattern = new byte[4096]; + new Random().nextBytes(pattern); + + long targetSize = sizeMB * 1024L * 1024L; + while (file.length() < targetSize) { if (Math.random() < 0.7) { - fos.write(repeatingPattern); + fos.write(pattern); } else { - byte[] randomData = new byte[1024]; - new Random().nextBytes(randomData); - fos.write(randomData); + byte[] random = new byte[4096]; + new Random().nextBytes(random); + fos.write(random); } - written += 1024; } } + } - return file; + private void createZipFile(File file, int sizeMB) throws IOException { + try (ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(file))) { + byte[] data = "Contenu répétitif pour tester la compression ZIP".getBytes(); + + int fileCount = 1; + while (file.length() < sizeMB * 1024L * 1024L) { + ZipEntry entry = new ZipEntry(String.format("file%d.txt", fileCount++)); + zos.putNextEntry(entry); + zos.write(data); + zos.closeEntry(); + + if (Math.random() < 0.3) { + entry = new ZipEntry(String.format("file%d_copy.txt", fileCount - 1)); + zos.putNextEntry(entry); + zos.write(data); + zos.closeEntry(); + } + } + } + } + + private void testFilePerformance(String fileType, File testFile) throws IOException { + StopWatch stopWatch = new StopWatch(); + stopWatch.start(); + List chunks = chunkingService.chunkFile(testFile); + stopWatch.stop(); + + long fileSize = testFile.length(); + double timeInSeconds = stopWatch.getTotalTimeSeconds(); + double speedMBps = (fileSize / (1024.0 * 1024.0)) / timeInSeconds; + double avgChunkSize = fileSize / (double) chunks.size(); + + Map uniqueChunks = chunks.stream() + .collect(Collectors.toMap( + Chunk::getHash, + chunk -> chunk, + (existing, replacement) -> existing + )); + + long totalUniqueSize = uniqueChunks.values().stream() + .mapToInt(chunk -> chunk.getData().length) + .sum(); + + double storageGain = ((fileSize - totalUniqueSize) / (double) fileSize) * 100; + + System.out.printf("%-8s | %-20s | %5.1f MB | %6.3f s | %6.1f MB/s | %4d/%4d | %13.2f%% | %7.0f B%n", + fileType, + testFile.getName(), + fileSize / (1024.0 * 1024.0), + timeInSeconds, + speedMBps, + chunks.size(), + uniqueChunks.size(), + storageGain, + avgChunkSize); } } From f8f426ea4b5b4c5feb9075966f43f08699be249e Mon Sep 17 00:00:00 2001 From: teddy Kernix <93723692+tgamiette@users.noreply.github.com> Date: Tue, 18 Feb 2025 11:01:34 +0100 Subject: [PATCH 2/3] feat : add duplication test --- .vscode/settings.json | 2 +- .../controller/api/DuplicationController.java | 44 ++++++++++ .../duplication/DuplicationService.java | 81 +++++++++++++++++ .../duplication/HashingAlgorithm.java | 17 ++++ .../DuplicationPerformanceTest.java | 87 +++++++++++++++++++ 5 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 java/src/main/java/com/goofy/GoofyFiles/controller/api/DuplicationController.java create mode 100644 java/src/main/java/com/goofy/GoofyFiles/duplication/DuplicationService.java create mode 100644 java/src/main/java/com/goofy/GoofyFiles/duplication/HashingAlgorithm.java create mode 100644 java/src/test/java/com/goofy/GoofyFiles/duplication/DuplicationPerformanceTest.java diff --git a/.vscode/settings.json b/.vscode/settings.json index 0bcf2ea..f54d20c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,5 +4,5 @@ "editor.formatOnSave": true, "editor.defaultFormatter": "esbenp.prettier-vscode", "java.compile.nullAnalysis.mode": "automatic", - "java.configuration.updateBuildConfiguration": "interactive" + "java.configuration.updateBuildConfiguration": "automatic" } diff --git a/java/src/main/java/com/goofy/GoofyFiles/controller/api/DuplicationController.java b/java/src/main/java/com/goofy/GoofyFiles/controller/api/DuplicationController.java new file mode 100644 index 0000000..898ba6e --- /dev/null +++ b/java/src/main/java/com/goofy/GoofyFiles/controller/api/DuplicationController.java @@ -0,0 +1,44 @@ +package com.goofy.GoofyFiles.controller.api; + +import java.io.File; +import java.io.IOException; +import java.util.Map; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import com.goofy.GoofyFiles.duplication.DuplicationService; +import com.goofy.GoofyFiles.duplication.HashingAlgorithm; + +@RestController +@RequestMapping("api/duplication") +public class DuplicationController { + + private final DuplicationService duplicationService; + + public DuplicationController(DuplicationService duplicationService) { + this.duplicationService = duplicationService; + } + + @PostMapping("/analyze") + public ResponseEntity analyzeFile( + @RequestParam("file") MultipartFile file, + @RequestParam(value = "algorithm", defaultValue = "SHA256") HashingAlgorithm algorithm) { + try { + File tempFile = File.createTempFile("upload-", "-" + file.getOriginalFilename()); + file.transferTo(tempFile); + + Map result = duplicationService.analyzeFile(tempFile, algorithm); + + tempFile.delete(); + return ResponseEntity.ok(result); + } catch (IOException e) { + return ResponseEntity.internalServerError() + .body(Map.of("error", "Échec du traitement du fichier: " + e.getMessage())); + } + } +} diff --git a/java/src/main/java/com/goofy/GoofyFiles/duplication/DuplicationService.java b/java/src/main/java/com/goofy/GoofyFiles/duplication/DuplicationService.java new file mode 100644 index 0000000..38a65bb --- /dev/null +++ b/java/src/main/java/com/goofy/GoofyFiles/duplication/DuplicationService.java @@ -0,0 +1,81 @@ +package com.goofy.GoofyFiles.duplication; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import com.goofy.GoofyFiles.chunking.Chunk; +import com.goofy.GoofyFiles.chunking.ChunkingService; +import com.google.common.hash.Hashing; + +@Service +public class DuplicationService { + + private static final Logger logger = LoggerFactory.getLogger(DuplicationService.class); + private final ChunkingService chunkingService; + + public DuplicationService(ChunkingService chunkingService) { + this.chunkingService = chunkingService; + } + + public Map analyzeFile(File file, HashingAlgorithm algorithm) throws IOException { + List chunks = chunkingService.chunkFile(file); + Map duplicates = new HashMap<>(); + + for (Chunk chunk : chunks) { + String hash = calculateHash(chunk.getData(), algorithm); + duplicates.merge(hash, 1, Integer::sum); + logger.debug("Chunk at position {} with size {} bytes has hash: {}", + chunk.getPosition(), chunk.getData().length, hash); + } + + // Log les chunks qui apparaissent plus d'une fois + duplicates.entrySet().stream() + .filter(e -> e.getValue() > 1); + + long uniqueChunks = duplicates.size(); + long totalChunks = chunks.size(); + long duplicatedChunks = duplicates.entrySet().stream() + .filter(e -> e.getValue() > 1) + .count(); + + return Map.of( + "fileName", file.getName(), + "totalChunks", totalChunks, + "uniqueChunks", uniqueChunks, + "duplicatedChunks", duplicatedChunks, + "algorithm", algorithm.name(), + "duplicateDetails", duplicates.entrySet().stream() + .filter(e -> e.getValue() > 1) + .collect(Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue + )) + ); + } + + private String calculateHash(byte[] data, HashingAlgorithm algorithm) { + try { + switch (algorithm) { + case SHA1: + return Hashing.sha1().hashBytes(data).toString(); + case SHA256: + return Hashing.sha256().hashBytes(data).toString(); + case BLAKE3: + // return DigestUtils.sha256Hex(data); + throw new UnsupportedOperationException("BLAKE3 not supported yet"); + default: + throw new IllegalArgumentException("Algorithme de hachage non supporté: " + algorithm); + } + } catch (Exception e) { + throw new RuntimeException("Erreur lors du calcul du hash", e); + } + } +} diff --git a/java/src/main/java/com/goofy/GoofyFiles/duplication/HashingAlgorithm.java b/java/src/main/java/com/goofy/GoofyFiles/duplication/HashingAlgorithm.java new file mode 100644 index 0000000..60e3c46 --- /dev/null +++ b/java/src/main/java/com/goofy/GoofyFiles/duplication/HashingAlgorithm.java @@ -0,0 +1,17 @@ +package com.goofy.GoofyFiles.duplication; + +public enum HashingAlgorithm { + SHA1("SHA-1"), + SHA256("SHA-256"), + BLAKE3("BLAKE3"); + + private final String algorithmName; + + HashingAlgorithm(String algorithmName) { + this.algorithmName = algorithmName; + } + + public String getAlgorithmName() { + return algorithmName; + } +} diff --git a/java/src/test/java/com/goofy/GoofyFiles/duplication/DuplicationPerformanceTest.java b/java/src/test/java/com/goofy/GoofyFiles/duplication/DuplicationPerformanceTest.java new file mode 100644 index 0000000..5bc5610 --- /dev/null +++ b/java/src/test/java/com/goofy/GoofyFiles/duplication/DuplicationPerformanceTest.java @@ -0,0 +1,87 @@ +package com.goofy.GoofyFiles.duplication; + +import com.goofy.GoofyFiles.chunking.ChunkingService; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.Map; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.*; + +class DuplicationPerformanceTest { + + private DuplicationService duplicationService; + private ChunkingService chunkingService; + + @BeforeEach + void setUp() throws NoSuchAlgorithmException { + chunkingService = new ChunkingService(); + duplicationService = new DuplicationService(chunkingService); + } + + @Test + void testDuplicationDetectionWithDifferentAlgorithms(@TempDir Path tempDir) throws IOException { + // Créer un fichier de test avec des données répétitives + File testFile = createTestFile(tempDir, 1024 * 1024); // 1MB + + // Tester avec SHA-1 + long startTime = System.nanoTime(); + Map sha1Results = duplicationService.analyzeFile(testFile, HashingAlgorithm.SHA1); + long sha1Time = System.nanoTime() - startTime; + + // Tester avec SHA-256 + startTime = System.nanoTime(); + Map sha256Results = duplicationService.analyzeFile(testFile, HashingAlgorithm.SHA256); + long sha256Time = System.nanoTime() - startTime; + + // Afficher les résultats détaillés + System.out.println("=== Résultats des tests de performance ==="); + System.out.println("SHA-1:"); + System.out.println(" - Temps d'exécution: " + sha1Time / 1_000_000.0 + " ms"); + System.out.println(" - Chunks uniques: " + sha1Results.get("uniqueChunks")); + System.out.println(" - Chunks dupliqués: " + sha1Results.get("duplicatedChunks")); + System.out.println(" - Détails des doublons: " + sha1Results.get("duplicateDetails")); + + System.out.println("\nSHA-256:"); + System.out.println(" - Temps d'exécution: " + sha256Time / 1_000_000.0 + " ms"); + System.out.println(" - Chunks uniques: " + sha256Results.get("uniqueChunks")); + System.out.println(" - Chunks dupliqués: " + sha256Results.get("duplicatedChunks")); + System.out.println(" - Détails des doublons: " + sha256Results.get("duplicateDetails")); + + // Vérifications + assertTrue((Long) sha1Results.get("duplicatedChunks") > 0, "Des doublons devraient être détectés"); + assertEquals(sha1Results.get("uniqueChunks"), sha256Results.get("uniqueChunks"), + "Le nombre de chunks uniques devrait être le même pour les deux algorithmes"); + } + + private File createTestFile(Path tempDir, int size) throws IOException { + File file = tempDir.resolve("test.dat").toFile(); + try (FileOutputStream fos = new FileOutputStream(file)) { + // Créer quelques patterns fixes pour garantir des doublons + byte[][] patterns = new byte[4][]; + for (int i = 0; i < patterns.length; i++) { + patterns[i] = new byte[8192]; // 8KB par pattern + Arrays.fill(patterns[i], (byte)i); // Remplir avec une valeur constante + } + + // Écrire les patterns de manière répétitive + Random random = new Random(42); + int written = 0; + while (written < size) { + // Choisir un pattern au hasard parmi les 4 + byte[] pattern = patterns[random.nextInt(patterns.length)]; + fos.write(pattern); + written += pattern.length; + } + } + return file; + } +} From 6e44af1cebc0935a3e8064b87e3791d77a90bbac Mon Sep 17 00:00:00 2001 From: Adrien Albuquerque Date: Tue, 18 Feb 2025 11:33:26 +0100 Subject: [PATCH 3/3] Blake3 integration --- Makefile | 7 ++-- java/pom.xml | 5 ++- .../duplication/DuplicationService.java | 9 +++-- .../DuplicationPerformanceTest.java | 33 ++++++++++++------- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 8c05297..2e05463 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: docker-test-perf build up clean compile update +.PHONY: docker-test-perf build up clean compile update test build: docker-compose build @@ -14,10 +14,13 @@ compile: docker-compose exec app mvn clean package -f /source/pom.xml update: - docker-compose exec app mvn clean package -f /source/pom.xml && \ + docker-compose exec app mvn clean package -DskipTests -f /source/pom.xml && \ docker-compose exec app cp /source/target/GoofyFiles-0.0.1-SNAPSHOT.jar /app/app.jar && \ docker-compose restart app # Exécuter les tests de performance docker-test-perf: docker-compose exec app mvn test -f /source/pom.xml -Dtest=ChunkingPerformanceTest + +test: + docker-compose exec app mvn test -f /source/pom.xml \ No newline at end of file diff --git a/java/pom.xml b/java/pom.xml index 14f6e4b..cdfa658 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -36,6 +36,10 @@ + + commons-codec + commons-codec + org.springframework.boot spring-boot-starter-data-jpa @@ -44,7 +48,6 @@ org.springframework.boot spring-boot-starter-web - org.postgresql postgresql diff --git a/java/src/main/java/com/goofy/GoofyFiles/duplication/DuplicationService.java b/java/src/main/java/com/goofy/GoofyFiles/duplication/DuplicationService.java index 38a65bb..1def8fc 100644 --- a/java/src/main/java/com/goofy/GoofyFiles/duplication/DuplicationService.java +++ b/java/src/main/java/com/goofy/GoofyFiles/duplication/DuplicationService.java @@ -7,6 +7,8 @@ import java.util.Map; import java.util.stream.Collectors; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.Blake3; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; @@ -36,7 +38,7 @@ public Map analyzeFile(File file, HashingAlgorithm algorithm) th chunk.getPosition(), chunk.getData().length, hash); } - // Log les chunks qui apparaissent plus d'une fois + // Filtrer les chunks qui apparaissent plus d'une fois (vous pouvez logguer ou utiliser ce résultat) duplicates.entrySet().stream() .filter(e -> e.getValue() > 1); @@ -69,8 +71,9 @@ private String calculateHash(byte[] data, HashingAlgorithm algorithm) { case SHA256: return Hashing.sha256().hashBytes(data).toString(); case BLAKE3: - // return DigestUtils.sha256Hex(data); - throw new UnsupportedOperationException("BLAKE3 not supported yet"); + // Utilisation de Apache Commons Codec pour BLAKE3 + byte[] hashBytes = Blake3.hash(data); + return Hex.encodeHexString(hashBytes); default: throw new IllegalArgumentException("Algorithme de hachage non supporté: " + algorithm); } diff --git a/java/src/test/java/com/goofy/GoofyFiles/duplication/DuplicationPerformanceTest.java b/java/src/test/java/com/goofy/GoofyFiles/duplication/DuplicationPerformanceTest.java index 5bc5610..4db7bdc 100644 --- a/java/src/test/java/com/goofy/GoofyFiles/duplication/DuplicationPerformanceTest.java +++ b/java/src/test/java/com/goofy/GoofyFiles/duplication/DuplicationPerformanceTest.java @@ -4,7 +4,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; - import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -13,7 +12,6 @@ import java.util.Arrays; import java.util.Map; import java.util.Random; - import static org.junit.jupiter.api.Assertions.*; class DuplicationPerformanceTest { @@ -29,8 +27,8 @@ void setUp() throws NoSuchAlgorithmException { @Test void testDuplicationDetectionWithDifferentAlgorithms(@TempDir Path tempDir) throws IOException { - // Créer un fichier de test avec des données répétitives - File testFile = createTestFile(tempDir, 1024 * 1024); // 1MB + // Créer un fichier de test avec des données répétitives (1MB) + File testFile = createTestFile(tempDir, 1024 * 1024); // Tester avec SHA-1 long startTime = System.nanoTime(); @@ -42,7 +40,12 @@ void testDuplicationDetectionWithDifferentAlgorithms(@TempDir Path tempDir) thro Map sha256Results = duplicationService.analyzeFile(testFile, HashingAlgorithm.SHA256); long sha256Time = System.nanoTime() - startTime; - // Afficher les résultats détaillés + // Tester avec BLAKE3 + startTime = System.nanoTime(); + Map blake3Results = duplicationService.analyzeFile(testFile, HashingAlgorithm.BLAKE3); + long blake3Time = System.nanoTime() - startTime; + + // Affichage des résultats System.out.println("=== Résultats des tests de performance ==="); System.out.println("SHA-1:"); System.out.println(" - Temps d'exécution: " + sha1Time / 1_000_000.0 + " ms"); @@ -56,10 +59,20 @@ void testDuplicationDetectionWithDifferentAlgorithms(@TempDir Path tempDir) thro System.out.println(" - Chunks dupliqués: " + sha256Results.get("duplicatedChunks")); System.out.println(" - Détails des doublons: " + sha256Results.get("duplicateDetails")); + System.out.println("\nBLAKE3:"); + System.out.println(" - Temps d'exécution: " + blake3Time / 1_000_000.0 + " ms"); + System.out.println(" - Chunks uniques: " + blake3Results.get("uniqueChunks")); + System.out.println(" - Chunks dupliqués: " + blake3Results.get("duplicatedChunks")); + System.out.println(" - Détails des doublons: " + blake3Results.get("duplicateDetails")); + // Vérifications - assertTrue((Long) sha1Results.get("duplicatedChunks") > 0, "Des doublons devraient être détectés"); - assertEquals(sha1Results.get("uniqueChunks"), sha256Results.get("uniqueChunks"), - "Le nombre de chunks uniques devrait être le même pour les deux algorithmes"); + assertTrue((Long) sha1Results.get("duplicatedChunks") > 0, "Des doublons devraient être détectés pour SHA-1"); + assertTrue((Long) blake3Results.get("duplicatedChunks") > 0, "Des doublons devraient être détectés pour BLAKE3"); + // Le nombre de chunks uniques doit être identique pour tous les algorithmes + assertEquals(sha1Results.get("uniqueChunks"), sha256Results.get("uniqueChunks"), + "Le nombre de chunks uniques doit être le même pour SHA-1 et SHA-256"); + assertEquals(sha1Results.get("uniqueChunks"), blake3Results.get("uniqueChunks"), + "Le nombre de chunks uniques doit être le même pour SHA-1 et BLAKE3"); } private File createTestFile(Path tempDir, int size) throws IOException { @@ -69,14 +82,12 @@ private File createTestFile(Path tempDir, int size) throws IOException { byte[][] patterns = new byte[4][]; for (int i = 0; i < patterns.length; i++) { patterns[i] = new byte[8192]; // 8KB par pattern - Arrays.fill(patterns[i], (byte)i); // Remplir avec une valeur constante + Arrays.fill(patterns[i], (byte)i); } - // Écrire les patterns de manière répétitive Random random = new Random(42); int written = 0; while (written < size) { - // Choisir un pattern au hasard parmi les 4 byte[] pattern = patterns[random.nextInt(patterns.length)]; fos.write(pattern); written += pattern.length;