JavaMLBugDetective/sample.config.properties at main · ttaymaz/JavaMLBugDetective

84 lines (75 loc) · 2.75 KB
# JavaMLBugDetective Configuration File
# =====================================
# This file contains all configuration settings for the bug detection analysis
# Repository Configuration
# ------------------------
# The Git repository URL to analyze
repository.url=https://github.com/google/gson.git
# Local directory where the repository will be cloned/analyzed
repository.local.path=./repositories/gson
# Name of the project (used for database naming)
project.name=gson
# Database Configuration
# ----------------------
# Note: Database name is automatically generated from project.name (e.g., "gson.db")
# Database connection timeout in seconds
database.timeout=30
# Analysis Configuration
# ----------------------
# Regular expression pattern to identify bug-fixing commits
bug.fix.pattern=(?i)(fix|bug|defect|issue|patch)
# File extensions to analyze (comma-separated)
file.extensions=.java
# Maximum number of commits to analyze (0 = all commits)
max.commits=0
# SZZ Algorithm Configuration
# ---------------------------
# Keywords to identify bug-fixing commits (comma-separated)
# More specific keywords to reduce noise and focus on severe bugs
szz.bug_fix_keywords=crash,exception,error,fault,fail,npe,nullpointer,incorrect result
# Confidence score for SZZ algorithm labeling (0.0 to 1.0)
szz.confidence_score=0.8
# Whether to ignore comments in blame analysis
szz.ignore_comments=true
# Whether to ignore blank lines in blame analysis
szz.ignore_blank_lines=true
# Machine Learning Configuration
# ------------------------------
# Train/test split ratio (e.g., 0.8 means 80% training, 20% testing)
ml.train.ratio=0.8
# ML algorithm to use:
# - RandomForest: Random Forest ensemble
# - J48: C4.5 decision tree
# - NaiveBayes: Naive Bayes classifier
# - SMO: Support Vector Machine
# - all: Run all algorithms for comparison
ml.algorithm=all
# Number of cross-validation folds
ml.cv.folds=10
# Enable class balancing for imbalanced datasets
ml.balance.classes=true
# Cost-Sensitive Classification Settings
# The cost of misclassifying a 'buggy' file as 'clean' (False Negative).
# Higher values make the model more aggressive at finding bugs.
ml.cost.fn=10.0
# The cost of misclassifying a 'clean' file as 'buggy' (False Positive).
ml.cost.fp=1.0
# Report Configuration
# --------------------
# Output directory for reports
report.output.dir=./reports
# Report format: markdown, html
report.format=markdown
# Include detailed metrics in reports
report.include.details=true
# GitHub Configuration
# --------------------
# GitHub credentials for private repositories (use a Personal Access Token)
github.username=
github.token=
# Performance Configuration
# -------------------------
# Enable verbose logging
logging.verbose=false
# Memory allocation for JVM operations (in MB)
jvm.memory.max=2048
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

sample.config.properties

Latest commit

History

sample.config.properties

File metadata and controls