Skip to content
This repository was archived by the owner on Apr 20, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,8 @@
"editor.formatOnSave": true,
"editor.defaultFormatter": "esbenp.prettier-vscode",
"java.compile.nullAnalysis.mode": "automatic",
"java.configuration.updateBuildConfiguration": "automatic"
"java.configuration.updateBuildConfiguration": "automatic",
"[java]": {
"editor.defaultFormatter": "redhat.java"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,49 @@
@RequestMapping("api/duplication")
public class DuplicationController {

private final DuplicationService duplicationService;
private final DuplicationService duplicationService;

public DuplicationController(DuplicationService duplicationService) {
this.duplicationService = duplicationService;
public DuplicationController(DuplicationService duplicationService) {
this.duplicationService = duplicationService;
}

@PostMapping("/analyze")
public ResponseEntity<?> analyzeFile(
@RequestParam("file") MultipartFile file,
@RequestParam(value = "algorithm", defaultValue = "SHA256") HashingAlgorithm algorithm) {
try {
File tempFile = File.createTempFile("upload-", "-" + file.getOriginalFilename());
file.transferTo(tempFile);

Map<String, Object> result = duplicationService.analyzeFile(tempFile, algorithm);

tempFile.delete();
return ResponseEntity.ok(result);
} catch (IOException e) {
return ResponseEntity.internalServerError()
.body(Map.of("error", "Échec du traitement du fichier: " + e.getMessage()));
}
}

@PostMapping("/process")
public ResponseEntity<?> processFile(
@RequestParam("file") MultipartFile file,
@RequestParam(value = "algorithm", defaultValue = "SHA256") HashingAlgorithm algorithm) {
try {
File tempFile = File.createTempFile("upload-", "-" + file.getOriginalFilename());
file.transferTo(tempFile);

Map<String, Object> result = duplicationService.processAndStoreFile(
tempFile,
file.getOriginalFilename(),
file.getSize(),
algorithm);

@PostMapping("/analyze")
public ResponseEntity<?> analyzeFile(
@RequestParam("file") MultipartFile file,
@RequestParam(value = "algorithm", defaultValue = "SHA256") HashingAlgorithm algorithm) {
try {
File tempFile = File.createTempFile("upload-", "-" + file.getOriginalFilename());
file.transferTo(tempFile);

Map<String, Object> result = duplicationService.analyzeFile(tempFile, algorithm);

tempFile.delete();
return ResponseEntity.ok(result);
} catch (IOException e) {
return ResponseEntity.internalServerError()
.body(Map.of("error", "Échec du traitement du fichier: " + e.getMessage()));
}
tempFile.delete();
return ResponseEntity.ok(result);
} catch (IOException e) {
return ResponseEntity.internalServerError()
.body(Map.of("error", "Échec du traitement et de l'enregistrement du fichier: " + e.getMessage()));
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,80 +5,227 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.Blake3;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import com.goofy.GoofyFiles.chunking.Chunk;
import com.goofy.GoofyFiles.chunking.ChunkingService;
import com.goofy.GoofyFiles.model.ChunkEntity;
import com.goofy.GoofyFiles.model.FileChunkEntity;
import com.goofy.GoofyFiles.model.FileEntity;
import com.goofy.GoofyFiles.repository.ChunkRepository;
import com.goofy.GoofyFiles.repository.FileChunkRepository;
import com.goofy.GoofyFiles.repository.FileRepository;
import com.google.common.hash.Hashing;

@Service
public class DuplicationService {

private static final Logger logger = LoggerFactory.getLogger(DuplicationService.class);
private final ChunkingService chunkingService;
private static final Logger logger = LoggerFactory.getLogger(DuplicationService.class);

public DuplicationService(ChunkingService chunkingService) {
this.chunkingService = chunkingService;
private final ChunkingService chunkingService;
private final FileRepository fileRepository;
private final ChunkRepository chunkRepository;
private final FileChunkRepository fileChunkRepository;

/**
* Constructeur principal pour l'utilisation en production
*/
@Autowired
public DuplicationService(
ChunkingService chunkingService,
FileRepository fileRepository,
ChunkRepository chunkRepository,
FileChunkRepository fileChunkRepository) {
this.chunkingService = chunkingService;
this.fileRepository = fileRepository;
this.chunkRepository = chunkRepository;
this.fileChunkRepository = fileChunkRepository;
}

/**
* Constructeur simplifié pour les tests
* Ne prend que le ChunkingService, les opérations de base de données ne seront
* pas disponibles
*/
public DuplicationService(ChunkingService chunkingService) {
this.chunkingService = chunkingService;
this.fileRepository = null;
this.chunkRepository = null;
this.fileChunkRepository = null;
}

public Map<String, Object> analyzeFile(File file, HashingAlgorithm algorithm) throws IOException {
List<Chunk> chunks = chunkingService.chunkFile(file);
Map<String, Integer> duplicates = new HashMap<>();

for (Chunk chunk : chunks) {
String hash = calculateHash(chunk.getData(), algorithm);
duplicates.merge(hash, 1, Integer::sum);
logger.debug("Chunk at position {} with size {} bytes has hash: {}",
chunk.getPosition(), chunk.getData().length, hash);
}

public Map<String, Object> analyzeFile(File file, HashingAlgorithm algorithm) throws IOException {
List<Chunk> chunks = chunkingService.chunkFile(file);
Map<String, Integer> duplicates = new HashMap<>();

for (Chunk chunk : chunks) {
String hash = calculateHash(chunk.getData(), algorithm);
duplicates.merge(hash, 1, Integer::sum);
logger.debug("Chunk at position {} with size {} bytes has hash: {}",
chunk.getPosition(), chunk.getData().length, hash);
}
// Filtrer les chunks qui apparaissent plus d'une fois (vous pouvez logguer ou
// utiliser ce résultat)
duplicates.entrySet().stream()
.filter(e -> e.getValue() > 1);

long uniqueChunks = duplicates.size();
long totalChunks = chunks.size();
long duplicatedChunks = duplicates.entrySet().stream()
.filter(e -> e.getValue() > 1)
.count();

return Map.of(
"fileName", file.getName(),
"totalChunks", totalChunks,
"uniqueChunks", uniqueChunks,
"duplicatedChunks", duplicatedChunks,
"algorithm", algorithm.name(),
"duplicateDetails", duplicates.entrySet().stream()
.filter(e -> e.getValue() > 1)
.collect(Collectors.toMap(
Map.Entry::getKey,
Map.Entry::getValue)));
}

private String calculateHash(byte[] data, HashingAlgorithm algorithm) {
try {
switch (algorithm) {
case SHA1:
return Hashing.sha1().hashBytes(data).toString();
case SHA256:
return Hashing.sha256().hashBytes(data).toString();
case BLAKE3:
// Utilisation de Apache Commons Codec pour BLAKE3
byte[] hashBytes = Blake3.hash(data);
return Hex.encodeHexString(hashBytes);
default:
throw new IllegalArgumentException("Algorithme de hachage non supporté: " + algorithm);
}
} catch (Exception e) {
throw new RuntimeException("Erreur lors du calcul du hash", e);
}
}

@Transactional
public Map<String, Object> processAndStoreFile(
File file,
String fileName,
long fileSize,
HashingAlgorithm algorithm) throws IOException {
if (fileRepository == null || chunkRepository == null || fileChunkRepository == null) {
throw new UnsupportedOperationException(
"Cette méthode nécessite les repositories qui n'ont pas été injectés. " +
"Utilisez le constructeur avec tous les paramètres pour cette fonctionnalité.");
}

// Filtrer les chunks qui apparaissent plus d'une fois (vous pouvez logguer ou utiliser ce résultat)
duplicates.entrySet().stream()
.filter(e -> e.getValue() > 1);

long uniqueChunks = duplicates.size();
long totalChunks = chunks.size();
long duplicatedChunks = duplicates.entrySet().stream()
.filter(e -> e.getValue() > 1)
.count();

return Map.of(
"fileName", file.getName(),
"totalChunks", totalChunks,
"uniqueChunks", uniqueChunks,
"duplicatedChunks", duplicatedChunks,
"algorithm", algorithm.name(),
"duplicateDetails", duplicates.entrySet().stream()
.filter(e -> e.getValue() > 1)
.collect(Collectors.toMap(
Map.Entry::getKey,
Map.Entry::getValue
))
);
// 1. Extraire le nom et l'extension
String name = fileName;
String extension = "";
int lastDotIndex = fileName.lastIndexOf('.');
if (lastDotIndex > 0) {
name = fileName.substring(0, lastDotIndex);
extension = fileName.substring(lastDotIndex + 1);
}

private String calculateHash(byte[] data, HashingAlgorithm algorithm) {
try {
switch (algorithm) {
case SHA1:
return Hashing.sha1().hashBytes(data).toString();
case SHA256:
return Hashing.sha256().hashBytes(data).toString();
case BLAKE3:
// Utilisation de Apache Commons Codec pour BLAKE3
byte[] hashBytes = Blake3.hash(data);
return Hex.encodeHexString(hashBytes);
default:
throw new IllegalArgumentException("Algorithme de hachage non supporté: " + algorithm);
}
} catch (Exception e) {
throw new RuntimeException("Erreur lors du calcul du hash", e);
// 2. Créer et sauvegarder l'entité de fichier
FileEntity fileEntity = new FileEntity();
fileEntity.setName(name);
fileEntity.setExtension(extension);
fileEntity.setSize(fileSize);
fileEntity = fileRepository.save(fileEntity);

// 3. Découper le fichier
List<Chunk> chunks = chunkingService.chunkFile(file);

// Statistiques pour le résultat
int totalChunks = chunks.size();
int duplicateChunks = 0;
int uniqueChunks = 0;
long savedStorage = 0;

// 4. Traiter chaque chunk
for (Chunk chunk : chunks) {
String hash = calculateHash(chunk.getData(), algorithm);

// Chercher si ce chunk existe déjà en base
Optional<ChunkEntity> existingChunk;
switch (algorithm) {
case SHA1:
existingChunk = chunkRepository.findByHashSha1(hash);
break;
case SHA256:
existingChunk = chunkRepository.findByHashSha256(hash);
break;
case BLAKE3:
existingChunk = chunkRepository.findByHashBlake3(hash);
break;
default:
existingChunk = Optional.empty();
}

// Traiter le chunk (nouveau ou existant)
ChunkEntity chunkEntity;
if (existingChunk.isPresent()) {
chunkEntity = existingChunk.get();
duplicateChunks++;
savedStorage += chunk.getOriginalSize();
logger.info("Chunk dupliqué trouvé: {}", hash);
} else {
chunkEntity = new ChunkEntity();
chunkEntity.setData(chunk.getData());

// Stocker le hash selon l'algorithme
switch (algorithm) {
case SHA1:
chunkEntity.setHashSha1(hash);
break;
case SHA256:
chunkEntity.setHashSha256(hash);
break;
case BLAKE3:
chunkEntity.setHashBlake3(hash);
break;
}

chunkEntity = chunkRepository.save(chunkEntity);
uniqueChunks++;
}

// Créer la relation entre le fichier et le chunk
FileChunkEntity fileChunk = new FileChunkEntity();
fileChunk.setFile(fileEntity);
fileChunk.setChunk(chunkEntity);
fileChunk.setPosition(chunk.getPosition());
fileChunkRepository.save(fileChunk);
}
}

// 5. Préparer le résultat
Map<String, Object> result = new HashMap<>();
result.put("fileId", fileEntity.getId());
result.put("fileName", fileEntity.getName());
result.put("extension", fileEntity.getExtension());
result.put("fileSize", fileEntity.getSize());
result.put("algorithm", algorithm.name());
result.put("totalChunks", totalChunks);
result.put("uniqueChunks", uniqueChunks);
result.put("duplicateChunks", duplicateChunks);
result.put("savedStorage", savedStorage);
result.put("deduplicationRatio", totalChunks > 0 ? (double) duplicateChunks / totalChunks : 0);

logger.info("Fichier traité: id={}, nom={}, chunks={}, uniques={}, doublons={}",
fileEntity.getId(), fileName, totalChunks, uniqueChunks, duplicateChunks);

return result;
}
}
Loading