Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions Backend/api/app/routers/parser_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,19 +135,15 @@ async def sync_single_parser(
service = ParserSyncService(config_api_url=request.config_api_url)

logger.info(f"Syncing single parser for sourcetype: {request.sourcetype}")

# Use the sourcetype as the source name for lookup
results = service.ensure_parsers_for_sources(
sources=[request.sourcetype],

result = service.ensure_parser_for_sourcetype(
sourcetype=request.sourcetype,
config_write_token=request.config_write_token,
github_repo_urls=request.github_repo_urls,
github_token=request.github_token
github_token=request.github_token,
)

# Get result for the sourcetype
result = results.get(request.sourcetype, {})
status = result.get('status', 'no_parser')
message = result.get('message', 'Unknown status')
status = result.get("status", "no_parser")
message = result.get("message", "Unknown status")

logger.info(f"Single parser sync for {request.sourcetype}: {status} - {message}")

Expand Down
128 changes: 102 additions & 26 deletions Backend/api/app/services/parser_sync_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@

logger = logging.getLogger(__name__)


# Some generator sourcetypes correspond to marketplace parser names that don't exist
# as local parser directories in this repo. Provide aliases so we can still upload
# a reasonable local parser when asked to sync those names.
LOCAL_PARSER_ALIASES: Dict[str, str] = {
# Palo Alto Networks Firewall marketplace parser name -> local parser folder name
"marketplace-paloaltonetworksfirewall-latest": "paloalto_firewall-latest",
}

# Mapping from generator/source names to parser sourcetypes
# This maps scenario sources to their corresponding parser directory names
SCENARIO_SOURCE_TO_PARSER = {
Expand Down Expand Up @@ -49,7 +58,7 @@

# Network Security
"darktrace": "darktrace_darktrace_logs-latest",
"paloalto_firewall": "paloalto_logs-latest",
"paloalto_firewall": "paloalto_firewall-latest",
"f5_networks": "f5_networks_logs-latest",
"fortinet_fortigate": "fortinet_fortigate_candidate_logs-latest",
"zscaler": "zscaler_logs-latest",
Expand Down Expand Up @@ -125,9 +134,36 @@ def get_parser_path_in_siem(self, sourcetype: str) -> str:
sourcetype: The parser sourcetype (e.g., 'okta_authentication-latest')

Returns:
The parser path in SIEM (e.g., '/parsers/okta_authentication-latest')
The parser path in SIEM (e.g., '/logParsers/okta_authentication-latest')
"""
return f"/parsers/{sourcetype}"
# In the Scalyr/SentinelOne config tree, log parsers are stored as JSON files
# under /logParsers.
leaf = sourcetype
if not leaf.endswith(".json"):
leaf = f"{leaf}.json"
return f"/logParsers/{leaf}"

def _local_parser_directories_for_sourcetype(self, sourcetype: str) -> List[Path]:
local_name = LOCAL_PARSER_ALIASES.get(sourcetype, sourcetype)

# Handle prefixed sourcetypes produced by generator tooling (e.g., community-foo-latest)
if local_name.startswith("community-"):
leaf = local_name[len("community-"):]
return [
self.parsers_dir / "community" / leaf,
self.parsers_dir / "community_new" / leaf,
]
if local_name.startswith("marketplace-"):
leaf = local_name[len("marketplace-"):]
return [
self.parsers_dir / "marketplace" / leaf,
]

return [
self.parsers_dir / "community" / local_name,
self.parsers_dir / "community_new" / local_name,
self.parsers_dir / "sentinelone" / local_name,
]

def load_local_parser(self, sourcetype: str) -> Optional[str]:
"""
Expand All @@ -139,12 +175,7 @@ def load_local_parser(self, sourcetype: str) -> Optional[str]:
Returns:
The parser JSON content as string, or None if not found
"""
# Try community directory first
parser_dirs = [
self.parsers_dir / "community" / sourcetype,
self.parsers_dir / "community_new" / sourcetype,
self.parsers_dir / "sentinelone" / sourcetype,
]
parser_dirs = self._local_parser_directories_for_sourcetype(sourcetype)

for parser_dir in parser_dirs:
if parser_dir.exists():
Expand All @@ -168,6 +199,54 @@ def load_local_parser(self, sourcetype: str) -> Optional[str]:

logger.warning(f"Parser not found locally: {sourcetype}")
return None

def ensure_parser_for_sourcetype(
self,
sourcetype: str,
config_write_token: str,
github_repo_urls: Optional[List[str]] = None,
github_token: Optional[str] = None,
selected_parser: Optional[Dict] = None,
) -> Dict[str, str]:
parser_path = self.get_parser_path_in_siem(sourcetype)

exists, _ = self.check_parser_exists(config_write_token, parser_path)
if exists:
return {
"status": "exists",
"message": f"Parser already exists: {parser_path}",
}

parser_content = self.load_local_parser(sourcetype)
from_github = False

if not parser_content and github_repo_urls:
parser_content = self.load_parser_from_github(
sourcetype=sourcetype,
repo_urls=github_repo_urls,
selected_parser=selected_parser,
github_token=github_token,
)
from_github = parser_content is not None

if not parser_content:
return {
"status": "no_parser",
"message": f"Parser not found locally or in GitHub repos: {sourcetype}",
}

success = self.upload_parser(config_write_token, parser_path, parser_content)
ok, detail = success
if not ok:
return {
"status": "failed",
"message": f"Failed to upload parser: {parser_path} ({detail})",
}

return {
"status": "uploaded_from_github" if from_github else "uploaded",
"message": f"Parser uploaded successfully: {parser_path}",
}

def load_parser_from_github(
self,
Expand Down Expand Up @@ -298,7 +377,7 @@ def upload_parser(
parser_path: str,
content: str,
timeout: int = 30
) -> bool:
) -> Tuple[bool, str]:
"""
Upload a parser to the destination SIEM using putFile API

Expand All @@ -309,7 +388,7 @@ def upload_parser(
timeout: Request timeout in seconds

Returns:
True if upload succeeded, False otherwise
Tuple of (success, message)
"""
try:
url = f"{self.api_base_url}/putFile"
Expand All @@ -332,25 +411,22 @@ def upload_parser(
result = response.json()
if result.get("status") == "success":
logger.info(f"Parser uploaded successfully: {parser_path}")
return True
return True, "success"
else:
logger.error(
f"Failed to upload parser {parser_path}: {result.get('message', 'Unknown error')}"
)
return False
msg = result.get('message', 'Unknown error')
logger.error(f"Failed to upload parser {parser_path}: {msg}")
return False, msg
else:
logger.error(
f"Failed to upload parser {parser_path}: "
f"{response.status_code} - {response.text}"
)
return False
msg = f"{response.status_code} - {response.text}"
logger.error(f"Failed to upload parser {parser_path}: {msg}")
return False, msg

except requests.exceptions.Timeout:
logger.error(f"Timeout uploading parser: {parser_path}")
return False
return False, "timeout"
except Exception as e:
logger.error(f"Error uploading parser {parser_path}: {e}")
return False
return False, str(e)

def ensure_parsers_for_sources(
self,
Expand Down Expand Up @@ -457,9 +533,9 @@ def ensure_parsers_for_sources(
continue

# Upload the parser
success = self.upload_parser(config_write_token, parser_path, parser_content)
ok, detail = self.upload_parser(config_write_token, parser_path, parser_content)

if success:
if ok:
status = "uploaded_from_github" if from_github else "uploaded"
source_label = "GitHub" if from_github else "local"
results[source] = {
Expand All @@ -471,7 +547,7 @@ def ensure_parsers_for_sources(
results[source] = {
"status": "failed",
"sourcetype": actual_sourcetype,
"message": f"Failed to upload parser: {parser_path}"
"message": f"Failed to upload parser: {parser_path} ({detail})"
}

return results
Expand Down
48 changes: 36 additions & 12 deletions Backend/event_generators/network_security/paloalto_firewall.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
from datetime import datetime, timezone, timedelta
import time

import os
import sys

sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'shared'))
from randomization import Randomizer

# Palo Alto log types
LOG_TYPES = ["TRAFFIC", "THREAT", "SYSTEM", "CONFIG", "HIP-MATCH", "GLOBALPROTECT", "USERID", "URL"]

Expand All @@ -19,14 +25,18 @@

def get_random_ip(internal_probability=0.5):
"""Generate a random IP address."""
if random.random() < internal_probability:
return random.choice([
f"10.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}",
f"172.{random.randint(16, 31)}.{random.randint(0, 255)}.{random.randint(1, 254)}",
f"192.168.{random.randint(0, 255)}.{random.randint(1, 254)}"
])
else:
return f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
internal = random.random() < internal_probability
return _R.ip(internal=internal)


def get_random_username(domain_probability: float = 0.7, empty_probability: float = 0.2) -> str:
if random.random() < empty_probability:
return ""

username = _R.person(domain="corp.local").username
if random.random() < domain_probability:
return f"corp\\{username}"
return username

def generate_serial_number():
"""Generate a firewall serial number."""
Expand All @@ -36,6 +46,9 @@ def generate_session_id():
"""Generate a session ID."""
return str(random.randint(10000, 999999))


_R = Randomizer()

def generate_traffic_log():
"""Generate a TRAFFIC log entry."""
now = datetime.now(timezone.utc)
Expand Down Expand Up @@ -89,7 +102,7 @@ def generate_traffic_log():
src_ip, # natsrc
dst_ip, # natdst
f"allow-{app}" if action == "allow" else f"block-{random.choice(['threats', 'malware', 'default'])}", # rule
random.choice([f"domain\\user{random.randint(1, 100)}", ""]), # srcuser
get_random_username(), # srcuser
"", # dstuser
app, # app
"vsys1", # vsys
Expand Down Expand Up @@ -125,7 +138,14 @@ def generate_traffic_log():
str(int(packets * 0.4)), # pkts_received
random.choice(["aged-out", "tcp-fin", "tcp-rst", "policy-deny", ""]) if action != "allow" else "aged-out", # session_end_reason
]


# The marketplace Palo Alto firewall parser expects a fixed number of CSV columns.
# If we stop emitting fields early, the line will not match even if the earlier
# fields are correct (because required delimiters/columns are missing).
expected_fields = 115
if len(fields) < expected_fields:
fields.extend([""] * (expected_fields - len(fields)))

return ",".join(fields)

def generate_threat_log():
Expand Down Expand Up @@ -155,7 +175,7 @@ def generate_threat_log():
src_ip, # natsrc
dst_ip, # natdst
"block-threats", # rule
"", # srcuser
get_random_username(), # srcuser
"", # dstuser
random.choice(["web-browsing", "ssl", "ftp", "smtp"]), # app
"vsys1", # vsys
Expand Down Expand Up @@ -197,7 +217,11 @@ def generate_threat_log():
"", # recipient
"", # reportid
]


expected_fields = 120
if len(fields) < expected_fields:
fields.extend([""] * (expected_fields - len(fields)))

return ",".join(fields)

def paloalto_firewall_log(overrides: dict | None = None) -> str:
Expand Down
Loading
Loading