Skip to content

one question, how to do Incremental learning in drain3 training? #97

@CH-nolyn

Description

@CH-nolyn
import logging
import sys
import time
from util.config_reader import initialize_template_config
from util.httpserver_operation import training_post_model
from drain3.file_persistence import FilePersistence
from drain3 import TemplateMiner


def process_log_training(raw_log_path, query_data):
    logger = logging.getLogger(__name__)
    logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
    scenario = query_data["scenario"]
    output_file = f"{scenario}/drain3_state.bin"
    persistence = FilePersistence(output_file)

    template_miner = TemplateMiner(persistence, config=initialize_template_config(profiling_enabled=True))

    line_count = 0
    with open(raw_log_path, encoding='utf-8') as f:
        lines = f.readlines()

    start_time = time.time()
    batch_start_time = start_time
    batch_size = 10000
    # 逐行训练
    for line in lines:
        line = line.rstrip()
        result = template_miner.add_log_message(line)
        line_count += 1
        if line_count % batch_size == 0:
            time_took = time.time() - batch_start_time
            rate = batch_size / time_took
            logger.info(f"Processing line: {line_count}, rate {rate:.1f} lines/sec, "
                        f"{len(template_miner.drain.clusters)} clusters so far.")
            batch_start_time = time.time()
        if result["change_type"] != "none":
            result_json = json.dumps({
                result["cluster_id"]: {
                    "template_mined": result["template_mined"]
                }
            })
            logger.info(f"Input ({line_count}): " + line)
            logger.info("Result: " + result_json)

    time_took = time.time() - start_time
    rate = line_count / time_took
    logger.info(
        f"--- Done processing file in {time_took:.2f} sec. Total of {line_count} lines, rate {rate:.1f} lines/sec, "
        f"{len(template_miner.drain.clusters)} clusters")

    sorted_clusters = sorted(template_miner.drain.clusters, key=lambda it: it.size, reverse=True)
    for cluster in sorted_clusters:
        logger.info(cluster)

    print("Prefix Tree:")
    template_miner.drain.print_tree()
    template_miner.profiler.report(0)

    training_post_model(output_file)

this is my training code ,so how could i train new log by ex-trained model?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions