chore: Merge sample data scripts.

This commit is contained in:
Elisiário Couto
2025-11-23 23:08:57 +00:00
parent 5eecc72219
commit 31abe68b2a
2 changed files with 503 additions and 576 deletions

View File

@@ -1,10 +1,479 @@
"""Generate sample database command."""
import json
import random
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
import click
class SampleDataGenerator:
"""Generates realistic sample data for testing Leggen."""
def __init__(self, db_path: Path):
self.db_path = db_path
self.institutions = [
{
"id": "REVOLUT_REVOLT21",
"name": "Revolut",
"bic": "REVOLT21",
"country": "LT",
},
{
"id": "BANCOBPI_BBPIPTPL",
"name": "Banco BPI",
"bic": "BBPIPTPL",
"country": "PT",
},
{
"id": "MONZO_MONZGB2L",
"name": "Monzo Bank",
"bic": "MONZGB2L",
"country": "GB",
},
{
"id": "NUBANK_NUPBBR25",
"name": "Nu Pagamentos",
"bic": "NUPBBR25",
"country": "BR",
},
]
self.transaction_types = [
{
"description": "Grocery Store",
"amount_range": (-150, -20),
"frequency": 0.3,
},
{"description": "Coffee Shop", "amount_range": (-15, -3), "frequency": 0.2},
{
"description": "Gas Station",
"amount_range": (-80, -30),
"frequency": 0.1,
},
{
"description": "Online Shopping",
"amount_range": (-200, -25),
"frequency": 0.15,
},
{
"description": "Restaurant",
"amount_range": (-60, -15),
"frequency": 0.15,
},
{"description": "Salary", "amount_range": (2500, 5000), "frequency": 0.02},
{
"description": "ATM Withdrawal",
"amount_range": (-200, -20),
"frequency": 0.05,
},
{
"description": "Transfer to Savings",
"amount_range": (-1000, -100),
"frequency": 0.03,
},
]
def ensure_database_dir(self):
"""Ensure database directory exists."""
self.db_path.parent.mkdir(parents=True, exist_ok=True)
def create_tables(self):
"""Create database tables."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
# Create accounts table
cursor.execute("""
CREATE TABLE IF NOT EXISTS accounts (
id TEXT PRIMARY KEY,
institution_id TEXT,
status TEXT,
iban TEXT,
name TEXT,
currency TEXT,
created DATETIME,
last_accessed DATETIME,
last_updated DATETIME,
display_name TEXT
)
""")
# Create transactions table with composite primary key
cursor.execute("""
CREATE TABLE IF NOT EXISTS transactions (
accountId TEXT NOT NULL,
transactionId TEXT NOT NULL,
internalTransactionId TEXT,
institutionId TEXT,
iban TEXT,
transactionDate DATETIME,
description TEXT,
transactionValue REAL,
transactionCurrency TEXT,
transactionStatus TEXT,
rawTransaction JSON,
PRIMARY KEY (accountId, transactionId)
)
""")
# Create balances table
cursor.execute("""
CREATE TABLE IF NOT EXISTS balances (
id INTEGER PRIMARY KEY AUTOINCREMENT,
account_id TEXT,
bank TEXT,
status TEXT,
iban TEXT,
amount REAL,
currency TEXT,
type TEXT,
timestamp DATETIME
)
""")
# Create indexes
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_transactions_internal_id ON transactions(internalTransactionId)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_transactions_date ON transactions(transactionDate)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_transactions_account_date ON transactions(accountId, transactionDate)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_transactions_amount ON transactions(transactionValue)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_balances_account_id ON balances(account_id)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_balances_timestamp ON balances(timestamp)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_balances_account_type_timestamp ON balances(account_id, type, timestamp)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_accounts_institution_id ON accounts(institution_id)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_accounts_status ON accounts(status)"
)
conn.commit()
conn.close()
def generate_iban(self, country_code: str) -> str:
"""Generate a realistic IBAN for the given country."""
ibans = {
"LT": lambda: f"LT{random.randint(10, 99)}{random.randint(10000, 99999)}{random.randint(10000000, 99999999)}",
"PT": lambda: f"PT{random.randint(10, 99)}{random.randint(1000, 9999)}{random.randint(1000, 9999)}{random.randint(10000000000, 99999999999)}",
"GB": lambda: f"GB{random.randint(10, 99)}MONZ{random.randint(100000, 999999)}{random.randint(100000, 999999)}",
"BR": lambda: f"BR{random.randint(10, 99)}{random.randint(10000000, 99999999)}{random.randint(1000, 9999)}{random.randint(10000000, 99999999)}",
}
return ibans.get(
country_code,
lambda: f"{country_code}{random.randint(1000000000000000, 9999999999999999)}",
)()
def generate_accounts(self, num_accounts: int = 3) -> list[dict[str, Any]]:
"""Generate sample accounts."""
accounts = []
base_date = datetime.now() - timedelta(days=90)
for i in range(num_accounts):
institution = random.choice(self.institutions)
account_id = f"account-{i + 1:03d}-{random.randint(1000, 9999)}"
account = {
"id": account_id,
"institution_id": institution["id"],
"status": "READY",
"iban": self.generate_iban(institution["country"]),
"name": f"Personal Account {i + 1}",
"currency": "EUR",
"created": (
base_date + timedelta(days=random.randint(0, 30))
).isoformat(),
"last_accessed": (
datetime.now() - timedelta(hours=random.randint(1, 48))
).isoformat(),
"last_updated": datetime.now().isoformat(),
}
accounts.append(account)
return accounts
def generate_transactions(
self, accounts: list[dict[str, Any]], num_transactions_per_account: int = 50
) -> list[dict[str, Any]]:
"""Generate sample transactions for accounts."""
transactions = []
base_date = datetime.now() - timedelta(days=60)
for account in accounts:
account_transactions = []
current_balance = random.uniform(500, 3000)
for i in range(num_transactions_per_account):
# Choose transaction type based on frequency weights
transaction_type = random.choices(
self.transaction_types,
weights=[t["frequency"] for t in self.transaction_types],
)[0]
# Generate transaction amount
min_amount, max_amount = transaction_type["amount_range"]
amount = round(random.uniform(min_amount, max_amount), 2)
# Generate transaction date (more recent transactions are more likely)
days_ago = random.choices(
range(60), weights=[1.5 ** (60 - d) for d in range(60)]
)[0]
transaction_date = base_date + timedelta(
days=days_ago,
hours=random.randint(6, 22),
minutes=random.randint(0, 59),
)
# Generate transaction IDs
transaction_id = f"bank-txn-{account['id']}-{i + 1:04d}"
internal_transaction_id = f"int-txn-{random.randint(100000, 999999)}"
# Create realistic descriptions
descriptions = {
"Grocery Store": [
"TESCO",
"SAINSBURY'S",
"LIDL",
"ALDI",
"WALMART",
"CARREFOUR",
],
"Coffee Shop": [
"STARBUCKS",
"COSTA COFFEE",
"PRET A MANGER",
"LOCAL CAFE",
],
"Gas Station": ["BP", "SHELL", "ESSO", "GALP", "PETROBRAS"],
"Online Shopping": ["AMAZON", "EBAY", "ZALANDO", "ASOS", "APPLE"],
"Restaurant": [
"PIZZA HUT",
"MCDONALD'S",
"BURGER KING",
"LOCAL RESTAURANT",
],
"Salary": ["MONTHLY SALARY", "PAYROLL DEPOSIT", "SALARY PAYMENT"],
"ATM Withdrawal": ["ATM WITHDRAWAL", "CASH WITHDRAWAL"],
"Transfer to Savings": ["SAVINGS TRANSFER", "INVESTMENT TRANSFER"],
}
specific_descriptions = descriptions.get(
transaction_type["description"], [transaction_type["description"]]
)
description = random.choice(specific_descriptions)
# Create raw transaction (simplified GoCardless format)
raw_transaction = {
"transactionId": transaction_id,
"bookingDate": transaction_date.strftime("%Y-%m-%d"),
"valueDate": transaction_date.strftime("%Y-%m-%d"),
"transactionAmount": {
"amount": str(amount),
"currency": account["currency"],
},
"remittanceInformationUnstructured": description,
"bankTransactionCode": "PMNT" if amount < 0 else "RCDT",
}
# Determine status (most are booked, some recent ones might be pending)
status = (
"pending" if days_ago < 2 and random.random() < 0.1 else "booked"
)
transaction = {
"accountId": account["id"],
"transactionId": transaction_id,
"internalTransactionId": internal_transaction_id,
"institutionId": account["institution_id"],
"iban": account["iban"],
"transactionDate": transaction_date.isoformat(),
"description": description,
"transactionValue": amount,
"transactionCurrency": account["currency"],
"transactionStatus": status,
"rawTransaction": raw_transaction,
}
account_transactions.append(transaction)
current_balance += amount
# Sort transactions by date for realistic ordering
account_transactions.sort(key=lambda x: x["transactionDate"])
transactions.extend(account_transactions)
return transactions
def generate_balances(self, accounts: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Generate sample balances for accounts."""
balances = []
for account in accounts:
# Calculate balance from transactions (simplified)
base_balance = random.uniform(500, 2000)
balance_types = ["interimAvailable", "closingBooked", "authorised"]
for balance_type in balance_types:
# Add some variation to balance types
variation = (
random.uniform(-50, 50) if balance_type != "interimAvailable" else 0
)
balance_amount = base_balance + variation
balance = {
"account_id": account["id"],
"bank": account["institution_id"],
"status": account["status"],
"iban": account["iban"],
"amount": round(balance_amount, 2),
"currency": account["currency"],
"type": balance_type,
"timestamp": datetime.now().isoformat(),
}
balances.append(balance)
return balances
def insert_data(
self,
accounts: list[dict[str, Any]],
transactions: list[dict[str, Any]],
balances: list[dict[str, Any]],
):
"""Insert generated data into the database."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
# Insert accounts
for account in accounts:
cursor.execute(
"""
INSERT OR REPLACE INTO accounts
(id, institution_id, status, iban, name, currency, created, last_accessed, last_updated, display_name)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
account["id"],
account["institution_id"],
account["status"],
account["iban"],
account["name"],
account["currency"],
account["created"],
account["last_accessed"],
account["last_updated"],
None, # display_name is initially None for sample data
),
)
# Insert transactions
for transaction in transactions:
cursor.execute(
"""
INSERT OR REPLACE INTO transactions
(accountId, transactionId, internalTransactionId, institutionId, iban,
transactionDate, description, transactionValue, transactionCurrency,
transactionStatus, rawTransaction)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
transaction["accountId"],
transaction["transactionId"],
transaction["internalTransactionId"],
transaction["institutionId"],
transaction["iban"],
transaction["transactionDate"],
transaction["description"],
transaction["transactionValue"],
transaction["transactionCurrency"],
transaction["transactionStatus"],
json.dumps(transaction["rawTransaction"]),
),
)
# Insert balances
for balance in balances:
cursor.execute(
"""
INSERT INTO balances
(account_id, bank, status, iban, amount, currency, type, timestamp)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
balance["account_id"],
balance["bank"],
balance["status"],
balance["iban"],
balance["amount"],
balance["currency"],
balance["type"],
balance["timestamp"],
),
)
conn.commit()
conn.close()
def generate_sample_database(
self, num_accounts: int = 3, num_transactions_per_account: int = 50
):
"""Generate complete sample database."""
click.echo(f"🗄️ Creating sample database at: {self.db_path}")
self.ensure_database_dir()
self.create_tables()
click.echo(f"👥 Generating {num_accounts} sample accounts...")
accounts = self.generate_accounts(num_accounts)
click.echo(
f"💳 Generating {num_transactions_per_account} transactions per account..."
)
transactions = self.generate_transactions(
accounts, num_transactions_per_account
)
click.echo("💰 Generating account balances...")
balances = self.generate_balances(accounts)
click.echo("💾 Inserting data into database...")
self.insert_data(accounts, transactions, balances)
# Print summary
click.echo("\n✅ Sample database created successfully!")
click.echo("📊 Summary:")
click.echo(f" - Accounts: {len(accounts)}")
click.echo(f" - Transactions: {len(transactions)}")
click.echo(f" - Balances: {len(balances)}")
click.echo(f" - Database: {self.db_path}")
# Show account details
click.echo("\n📋 Sample accounts:")
for account in accounts:
institution_name = next(
inst["name"]
for inst in self.institutions
if inst["id"] == account["institution_id"]
)
click.echo(f" - {account['id']} ({institution_name}) - {account['iban']}")
@click.command()
@click.option(
"--database",
@@ -30,39 +499,45 @@ import click
)
@click.pass_context
def generate_sample_db(
ctx: click.Context, database: Path, accounts: int, transactions: int, force: bool
ctx: click.Context,
database: Path | None,
accounts: int,
transactions: int,
force: bool,
):
"""Generate a sample database with realistic financial data for testing."""
import os
# Import here to avoid circular imports
import subprocess
import sys
from pathlib import Path as PathlibPath
# Get the script path
script_path = (
PathlibPath(__file__).parent.parent.parent / "scripts" / "generate_sample_db.py"
)
# Build command arguments
cmd = [sys.executable, str(script_path)]
from leggen.utils.paths import path_manager
# Determine database path
if database:
cmd.extend(["--database", str(database)])
db_path = database
else:
# Use development database by default to avoid overwriting production data
env_path = os.environ.get("LEGGEN_DATABASE_PATH")
if env_path:
db_path = Path(env_path)
else:
# Default to development database in config directory
db_path = path_manager.get_config_dir() / "leggen-dev.db"
cmd.extend(["--accounts", str(accounts)])
cmd.extend(["--transactions", str(transactions)])
# Check if database exists and ask for confirmation
if db_path.exists() and not force:
click.echo(f"⚠️ Database already exists: {db_path}")
if not click.confirm("Do you want to overwrite it?"):
click.echo("Aborted.")
ctx.exit(0)
if force:
cmd.append("--force")
# Generate the sample database
generator = SampleDataGenerator(db_path)
generator.generate_sample_database(accounts, transactions)
# Execute the script
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
click.echo(f"Error generating sample database: {e}")
ctx.exit(1)
# Export the command
generate_sample_db = generate_sample_db
# Show usage instructions
click.echo("\n🚀 Usage instructions:")
click.echo("To use this sample database with leggen commands:")
click.echo(f" export LEGGEN_DATABASE_PATH={db_path}")
click.echo(" leggen transactions")
click.echo("")
click.echo("To use this sample database with leggen server:")
click.echo(f" leggen server --database {db_path}")

View File

@@ -1,548 +0,0 @@
#!/usr/bin/env python3
"""Sample database generator for Leggen testing and development."""
import json
import random
import sqlite3
import sys
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List
import click
# Add the project root to the Python path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
# Import after path setup - this is necessary for the script to work
from leggen.utils.paths import path_manager # noqa: E402
class SampleDataGenerator:
"""Generates realistic sample data for testing Leggen."""
def __init__(self, db_path: Path):
self.db_path = db_path
self.institutions = [
{
"id": "REVOLUT_REVOLT21",
"name": "Revolut",
"bic": "REVOLT21",
"country": "LT",
},
{
"id": "BANCOBPI_BBPIPTPL",
"name": "Banco BPI",
"bic": "BBPIPTPL",
"country": "PT",
},
{
"id": "MONZO_MONZGB2L",
"name": "Monzo Bank",
"bic": "MONZGB2L",
"country": "GB",
},
{
"id": "NUBANK_NUPBBR25",
"name": "Nu Pagamentos",
"bic": "NUPBBR25",
"country": "BR",
},
]
self.transaction_types = [
{
"description": "Grocery Store",
"amount_range": (-150, -20),
"frequency": 0.3,
},
{"description": "Coffee Shop", "amount_range": (-15, -3), "frequency": 0.2},
{
"description": "Gas Station",
"amount_range": (-80, -30),
"frequency": 0.1,
},
{
"description": "Online Shopping",
"amount_range": (-200, -25),
"frequency": 0.15,
},
{
"description": "Restaurant",
"amount_range": (-60, -15),
"frequency": 0.15,
},
{"description": "Salary", "amount_range": (2500, 5000), "frequency": 0.02},
{
"description": "ATM Withdrawal",
"amount_range": (-200, -20),
"frequency": 0.05,
},
{
"description": "Transfer to Savings",
"amount_range": (-1000, -100),
"frequency": 0.03,
},
]
def ensure_database_dir(self):
"""Ensure database directory exists."""
self.db_path.parent.mkdir(parents=True, exist_ok=True)
def create_tables(self):
"""Create database tables."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
# Create accounts table
cursor.execute("""
CREATE TABLE IF NOT EXISTS accounts (
id TEXT PRIMARY KEY,
institution_id TEXT,
status TEXT,
iban TEXT,
name TEXT,
currency TEXT,
created DATETIME,
last_accessed DATETIME,
last_updated DATETIME,
display_name TEXT
)
""")
# Create transactions table with composite primary key
cursor.execute("""
CREATE TABLE IF NOT EXISTS transactions (
accountId TEXT NOT NULL,
transactionId TEXT NOT NULL,
internalTransactionId TEXT,
institutionId TEXT,
iban TEXT,
transactionDate DATETIME,
description TEXT,
transactionValue REAL,
transactionCurrency TEXT,
transactionStatus TEXT,
rawTransaction JSON,
PRIMARY KEY (accountId, transactionId)
)
""")
# Create balances table
cursor.execute("""
CREATE TABLE IF NOT EXISTS balances (
id INTEGER PRIMARY KEY AUTOINCREMENT,
account_id TEXT,
bank TEXT,
status TEXT,
iban TEXT,
amount REAL,
currency TEXT,
type TEXT,
timestamp DATETIME
)
""")
# Create indexes
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_transactions_internal_id ON transactions(internalTransactionId)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_transactions_date ON transactions(transactionDate)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_transactions_account_date ON transactions(accountId, transactionDate)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_transactions_amount ON transactions(transactionValue)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_balances_account_id ON balances(account_id)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_balances_timestamp ON balances(timestamp)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_balances_account_type_timestamp ON balances(account_id, type, timestamp)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_accounts_institution_id ON accounts(institution_id)"
)
cursor.execute(
"CREATE INDEX IF NOT EXISTS idx_accounts_status ON accounts(status)"
)
conn.commit()
conn.close()
def generate_iban(self, country_code: str) -> str:
"""Generate a realistic IBAN for the given country."""
ibans = {
"LT": lambda: f"LT{random.randint(10, 99)}{random.randint(10000, 99999)}{random.randint(10000000, 99999999)}",
"PT": lambda: f"PT{random.randint(10, 99)}{random.randint(1000, 9999)}{random.randint(1000, 9999)}{random.randint(10000000000, 99999999999)}",
"GB": lambda: f"GB{random.randint(10, 99)}MONZ{random.randint(100000, 999999)}{random.randint(100000, 999999)}",
"BR": lambda: f"BR{random.randint(10, 99)}{random.randint(10000000, 99999999)}{random.randint(1000, 9999)}{random.randint(10000000, 99999999)}",
}
return ibans.get(
country_code,
lambda: f"{country_code}{random.randint(1000000000000000, 9999999999999999)}",
)()
def generate_accounts(self, num_accounts: int = 3) -> List[Dict[str, Any]]:
"""Generate sample accounts."""
accounts = []
base_date = datetime.now() - timedelta(days=90)
for i in range(num_accounts):
institution = random.choice(self.institutions)
account_id = f"account-{i + 1:03d}-{random.randint(1000, 9999)}"
account = {
"id": account_id,
"institution_id": institution["id"],
"status": "READY",
"iban": self.generate_iban(institution["country"]),
"name": f"Personal Account {i + 1}",
"currency": "EUR",
"created": (
base_date + timedelta(days=random.randint(0, 30))
).isoformat(),
"last_accessed": (
datetime.now() - timedelta(hours=random.randint(1, 48))
).isoformat(),
"last_updated": datetime.now().isoformat(),
}
accounts.append(account)
return accounts
def generate_transactions(
self, accounts: List[Dict[str, Any]], num_transactions_per_account: int = 50
) -> List[Dict[str, Any]]:
"""Generate sample transactions for accounts."""
transactions = []
base_date = datetime.now() - timedelta(days=60)
for account in accounts:
account_transactions = []
current_balance = random.uniform(500, 3000)
for i in range(num_transactions_per_account):
# Choose transaction type based on frequency weights
transaction_type = random.choices(
self.transaction_types,
weights=[t["frequency"] for t in self.transaction_types],
)[0]
# Generate transaction amount
min_amount, max_amount = transaction_type["amount_range"]
amount = round(random.uniform(min_amount, max_amount), 2)
# Generate transaction date (more recent transactions are more likely)
days_ago = random.choices(
range(60), weights=[1.5 ** (60 - d) for d in range(60)]
)[0]
transaction_date = base_date + timedelta(
days=days_ago,
hours=random.randint(6, 22),
minutes=random.randint(0, 59),
)
# Generate transaction IDs
transaction_id = f"bank-txn-{account['id']}-{i + 1:04d}"
internal_transaction_id = f"int-txn-{random.randint(100000, 999999)}"
# Create realistic descriptions
descriptions = {
"Grocery Store": [
"TESCO",
"SAINSBURY'S",
"LIDL",
"ALDI",
"WALMART",
"CARREFOUR",
],
"Coffee Shop": [
"STARBUCKS",
"COSTA COFFEE",
"PRET A MANGER",
"LOCAL CAFE",
],
"Gas Station": ["BP", "SHELL", "ESSO", "GALP", "PETROBRAS"],
"Online Shopping": ["AMAZON", "EBAY", "ZALANDO", "ASOS", "APPLE"],
"Restaurant": [
"PIZZA HUT",
"MCDONALD'S",
"BURGER KING",
"LOCAL RESTAURANT",
],
"Salary": ["MONTHLY SALARY", "PAYROLL DEPOSIT", "SALARY PAYMENT"],
"ATM Withdrawal": ["ATM WITHDRAWAL", "CASH WITHDRAWAL"],
"Transfer to Savings": ["SAVINGS TRANSFER", "INVESTMENT TRANSFER"],
}
specific_descriptions = descriptions.get(
transaction_type["description"], [transaction_type["description"]]
)
description = random.choice(specific_descriptions)
# Create raw transaction (simplified GoCardless format)
raw_transaction = {
"transactionId": transaction_id,
"bookingDate": transaction_date.strftime("%Y-%m-%d"),
"valueDate": transaction_date.strftime("%Y-%m-%d"),
"transactionAmount": {
"amount": str(amount),
"currency": account["currency"],
},
"remittanceInformationUnstructured": description,
"bankTransactionCode": "PMNT" if amount < 0 else "RCDT",
}
# Determine status (most are booked, some recent ones might be pending)
status = (
"pending" if days_ago < 2 and random.random() < 0.1 else "booked"
)
transaction = {
"accountId": account["id"],
"transactionId": transaction_id,
"internalTransactionId": internal_transaction_id,
"institutionId": account["institution_id"],
"iban": account["iban"],
"transactionDate": transaction_date.isoformat(),
"description": description,
"transactionValue": amount,
"transactionCurrency": account["currency"],
"transactionStatus": status,
"rawTransaction": raw_transaction,
}
account_transactions.append(transaction)
current_balance += amount
# Sort transactions by date for realistic ordering
account_transactions.sort(key=lambda x: x["transactionDate"])
transactions.extend(account_transactions)
return transactions
def generate_balances(self, accounts: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Generate sample balances for accounts."""
balances = []
for account in accounts:
# Calculate balance from transactions (simplified)
base_balance = random.uniform(500, 2000)
balance_types = ["interimAvailable", "closingBooked", "authorised"]
for balance_type in balance_types:
# Add some variation to balance types
variation = (
random.uniform(-50, 50) if balance_type != "interimAvailable" else 0
)
balance_amount = base_balance + variation
balance = {
"account_id": account["id"],
"bank": account["institution_id"],
"status": account["status"],
"iban": account["iban"],
"amount": round(balance_amount, 2),
"currency": account["currency"],
"type": balance_type,
"timestamp": datetime.now().isoformat(),
}
balances.append(balance)
return balances
def insert_data(
self,
accounts: List[Dict[str, Any]],
transactions: List[Dict[str, Any]],
balances: List[Dict[str, Any]],
):
"""Insert generated data into the database."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
# Insert accounts
for account in accounts:
cursor.execute(
"""
INSERT OR REPLACE INTO accounts
(id, institution_id, status, iban, name, currency, created, last_accessed, last_updated, display_name)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
account["id"],
account["institution_id"],
account["status"],
account["iban"],
account["name"],
account["currency"],
account["created"],
account["last_accessed"],
account["last_updated"],
None, # display_name is initially None for sample data
),
)
# Insert transactions
for transaction in transactions:
cursor.execute(
"""
INSERT OR REPLACE INTO transactions
(accountId, transactionId, internalTransactionId, institutionId, iban,
transactionDate, description, transactionValue, transactionCurrency,
transactionStatus, rawTransaction)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
transaction["accountId"],
transaction["transactionId"],
transaction["internalTransactionId"],
transaction["institutionId"],
transaction["iban"],
transaction["transactionDate"],
transaction["description"],
transaction["transactionValue"],
transaction["transactionCurrency"],
transaction["transactionStatus"],
json.dumps(transaction["rawTransaction"]),
),
)
# Insert balances
for balance in balances:
cursor.execute(
"""
INSERT INTO balances
(account_id, bank, status, iban, amount, currency, type, timestamp)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
balance["account_id"],
balance["bank"],
balance["status"],
balance["iban"],
balance["amount"],
balance["currency"],
balance["type"],
balance["timestamp"],
),
)
conn.commit()
conn.close()
def generate_sample_database(
self, num_accounts: int = 3, num_transactions_per_account: int = 50
):
"""Generate complete sample database."""
click.echo(f"🗄️ Creating sample database at: {self.db_path}")
self.ensure_database_dir()
self.create_tables()
click.echo(f"👥 Generating {num_accounts} sample accounts...")
accounts = self.generate_accounts(num_accounts)
click.echo(
f"💳 Generating {num_transactions_per_account} transactions per account..."
)
transactions = self.generate_transactions(
accounts, num_transactions_per_account
)
click.echo("💰 Generating account balances...")
balances = self.generate_balances(accounts)
click.echo("💾 Inserting data into database...")
self.insert_data(accounts, transactions, balances)
# Print summary
click.echo("\n✅ Sample database created successfully!")
click.echo("📊 Summary:")
click.echo(f" - Accounts: {len(accounts)}")
click.echo(f" - Transactions: {len(transactions)}")
click.echo(f" - Balances: {len(balances)}")
click.echo(f" - Database: {self.db_path}")
# Show account details
click.echo("\n📋 Sample accounts:")
for account in accounts:
institution_name = next(
inst["name"]
for inst in self.institutions
if inst["id"] == account["institution_id"]
)
click.echo(f" - {account['id']} ({institution_name}) - {account['iban']}")
@click.command()
@click.option(
"--database",
type=click.Path(path_type=Path),
help="Path to database file (default: uses LEGGEN_DATABASE_PATH or ~/.config/leggen/leggen-dev.db)",
)
@click.option(
"--accounts",
type=int,
default=3,
help="Number of sample accounts to generate (default: 3)",
)
@click.option(
"--transactions",
type=int,
default=50,
help="Number of transactions per account (default: 50)",
)
@click.option(
"--force",
is_flag=True,
help="Overwrite existing database without confirmation",
)
def main(database: Path, accounts: int, transactions: int, force: bool):
"""Generate a sample database with realistic financial data for testing Leggen."""
# Determine database path
if database:
db_path = database
else:
# Use development database by default to avoid overwriting production data
import os
env_path = os.environ.get("LEGGEN_DATABASE_PATH")
if env_path:
db_path = Path(env_path)
else:
# Default to development database in config directory
db_path = path_manager.get_config_dir() / "leggen-dev.db"
# Check if database exists and ask for confirmation
if db_path.exists() and not force:
click.echo(f"⚠️ Database already exists: {db_path}")
if not click.confirm("Do you want to overwrite it?"):
click.echo("Aborted.")
return
# Generate the sample database
generator = SampleDataGenerator(db_path)
generator.generate_sample_database(accounts, transactions)
# Show usage instructions
click.echo("\n🚀 Usage instructions:")
click.echo("To use this sample database with leggen commands:")
click.echo(f" export LEGGEN_DATABASE_PATH={db_path}")
click.echo(" leggen transactions")
click.echo("")
click.echo("To use this sample database with leggen server:")
click.echo(f" leggen server --database {db_path}")
if __name__ == "__main__":
main()