Database Schema
Comprehensive documentation of SysManage database design, tables, relationships, and data management patterns.
Schema Overview
SysManage uses PostgreSQL as its primary database, designed for high performance, data integrity, and scalability. The schema is organized into logical domains with clear separation of concerns and well-defined relationships.
Database Structure
┌─────────────────────────────────────────────────────────────────┐
│ SysManage Database │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
│ │ Identity & │ │ Agent │ │ Inventory & │ │
│ │ Access │ │ Management │ │ Configuration │ │
│ │ │ │ │ │ │ │
│ │ • users │ │ • agents │ │ • hosts │ │
│ │ • roles │ │ • groups │ │ • packages │ │
│ │ • sessions │ │ • certs │ │ • services │ │
│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
│ │ Task & │ │ Monitoring │ │ Audit & │ │
│ │ Workflow │ │ & Metrics │ │ Logging │ │
│ │ │ │ │ │ │ │
│ │ • tasks │ │ • metrics │ │ • audit_log │ │
│ │ • schedules │ │ • alerts │ │ • event_log │ │
│ │ • jobs │ │ • status │ │ • notifications │ │
│ └─────────────┘ └─────────────┘ └─────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘
Core Tables
Users & Authentication
users
CREATE TABLE users (
id BIGSERIAL PRIMARY KEY,
username VARCHAR(255) UNIQUE NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
first_name VARCHAR(255),
last_name VARCHAR(255),
is_active BOOLEAN DEFAULT true,
is_superuser BOOLEAN DEFAULT false,
last_login TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT users_username_length CHECK (length(username) >= 3),
CONSTRAINT users_email_format CHECK (email ~* '^[A-Za-z0-9._%-]+@[A-Za-z0-9.-]+[.][A-Za-z]+$')
);
CREATE INDEX idx_users_username ON users(username);
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_active ON users(is_active);
roles
CREATE TABLE roles (
id BIGSERIAL PRIMARY KEY,
name VARCHAR(100) UNIQUE NOT NULL,
description TEXT,
permissions JSONB DEFAULT '{}',
is_system BOOLEAN DEFAULT false,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX idx_roles_name ON roles(name);
CREATE INDEX idx_roles_permissions ON roles USING GIN(permissions);
user_roles
CREATE TABLE user_roles (
id BIGSERIAL PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
role_id BIGINT NOT NULL REFERENCES roles(id) ON DELETE CASCADE,
granted_by BIGINT REFERENCES users(id),
granted_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
expires_at TIMESTAMP WITH TIME ZONE,
UNIQUE(user_id, role_id)
);
CREATE INDEX idx_user_roles_user_id ON user_roles(user_id);
CREATE INDEX idx_user_roles_role_id ON user_roles(role_id);
Agent Management
agents
CREATE TABLE agents (
id BIGSERIAL PRIMARY KEY,
hostname VARCHAR(255) UNIQUE NOT NULL,
ip_address INET,
platform VARCHAR(50) NOT NULL,
platform_version VARCHAR(100),
architecture VARCHAR(50),
agent_version VARCHAR(50),
status VARCHAR(20) DEFAULT 'offline',
last_seen TIMESTAMP WITH TIME ZONE,
last_heartbeat TIMESTAMP WITH TIME ZONE,
certificate_id BIGINT REFERENCES certificates(id),
group_id BIGINT REFERENCES agent_groups(id),
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT agents_status_valid CHECK (status IN ('online', 'offline', 'maintenance', 'error'))
);
CREATE INDEX idx_agents_hostname ON agents(hostname);
CREATE INDEX idx_agents_status ON agents(status);
CREATE INDEX idx_agents_last_seen ON agents(last_seen);
CREATE INDEX idx_agents_platform ON agents(platform);
CREATE INDEX idx_agents_metadata ON agents USING GIN(metadata);
agent_groups
CREATE TABLE agent_groups (
id BIGSERIAL PRIMARY KEY,
name VARCHAR(255) UNIQUE NOT NULL,
description TEXT,
parent_id BIGINT REFERENCES agent_groups(id),
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX idx_agent_groups_name ON agent_groups(name);
CREATE INDEX idx_agent_groups_parent_id ON agent_groups(parent_id);
certificates
CREATE TABLE certificates (
id BIGSERIAL PRIMARY KEY,
serial_number VARCHAR(255) UNIQUE NOT NULL,
subject_dn TEXT NOT NULL,
issuer_dn TEXT NOT NULL,
certificate_pem TEXT NOT NULL,
private_key_pem TEXT,
status VARCHAR(20) DEFAULT 'active',
issued_at TIMESTAMP WITH TIME ZONE NOT NULL,
expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
revoked_at TIMESTAMP WITH TIME ZONE,
revocation_reason VARCHAR(50),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT certificates_status_valid CHECK (status IN ('active', 'revoked', 'expired'))
);
CREATE INDEX idx_certificates_serial ON certificates(serial_number);
CREATE INDEX idx_certificates_status ON certificates(status);
CREATE INDEX idx_certificates_expires_at ON certificates(expires_at);
Inventory & Configuration
hosts
CREATE TABLE hosts (
id BIGSERIAL PRIMARY KEY,
agent_id BIGINT NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
fqdn VARCHAR(255),
os_name VARCHAR(100),
os_version VARCHAR(100),
kernel_version VARCHAR(100),
cpu_info JSONB,
memory_info JSONB,
disk_info JSONB,
network_info JSONB,
environment_vars JSONB,
last_inventory TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(agent_id)
);
CREATE INDEX idx_hosts_agent_id ON hosts(agent_id);
CREATE INDEX idx_hosts_fqdn ON hosts(fqdn);
CREATE INDEX idx_hosts_os_name ON hosts(os_name);
CREATE INDEX idx_hosts_last_inventory ON hosts(last_inventory);
packages
CREATE TABLE packages (
id BIGSERIAL PRIMARY KEY,
agent_id BIGINT NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
version VARCHAR(255),
architecture VARCHAR(50),
repository VARCHAR(255),
package_manager VARCHAR(50),
status VARCHAR(20) DEFAULT 'installed',
size_bytes BIGINT,
install_date TIMESTAMP WITH TIME ZONE,
description TEXT,
dependencies JSONB DEFAULT '[]',
metadata JSONB DEFAULT '{}',
last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT packages_status_valid CHECK (status IN ('installed', 'available', 'upgradeable', 'broken')),
UNIQUE(agent_id, name, version, architecture)
);
CREATE INDEX idx_packages_agent_id ON packages(agent_id);
CREATE INDEX idx_packages_name ON packages(name);
CREATE INDEX idx_packages_status ON packages(status);
CREATE INDEX idx_packages_package_manager ON packages(package_manager);
CREATE INDEX idx_packages_dependencies ON packages USING GIN(dependencies);
services
CREATE TABLE services (
id BIGSERIAL PRIMARY KEY,
agent_id BIGINT NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
display_name VARCHAR(255),
status VARCHAR(20),
startup_type VARCHAR(20),
pid INTEGER,
memory_usage BIGINT,
cpu_usage DECIMAL(5,2),
description TEXT,
service_type VARCHAR(50),
binary_path TEXT,
last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT services_status_valid CHECK (status IN ('running', 'stopped', 'starting', 'stopping', 'error')),
UNIQUE(agent_id, name)
);
CREATE INDEX idx_services_agent_id ON services(agent_id);
CREATE INDEX idx_services_name ON services(name);
CREATE INDEX idx_services_status ON services(status);
Task Management
tasks
CREATE TABLE tasks (
id BIGSERIAL PRIMARY KEY,
task_type VARCHAR(100) NOT NULL,
status VARCHAR(20) DEFAULT 'pending',
priority INTEGER DEFAULT 5,
agent_id BIGINT REFERENCES agents(id),
created_by BIGINT NOT NULL REFERENCES users(id),
assigned_to BIGINT REFERENCES users(id),
title VARCHAR(500) NOT NULL,
description TEXT,
parameters JSONB DEFAULT '{}',
result JSONB,
error_message TEXT,
progress INTEGER DEFAULT 0,
started_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE,
timeout_seconds INTEGER DEFAULT 300,
retry_count INTEGER DEFAULT 0,
max_retries INTEGER DEFAULT 3,
parent_task_id BIGINT REFERENCES tasks(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT tasks_status_valid CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled', 'timeout')),
CONSTRAINT tasks_priority_range CHECK (priority BETWEEN 1 AND 10),
CONSTRAINT tasks_progress_range CHECK (progress BETWEEN 0 AND 100)
);
CREATE INDEX idx_tasks_status ON tasks(status);
CREATE INDEX idx_tasks_agent_id ON tasks(agent_id);
CREATE INDEX idx_tasks_created_by ON tasks(created_by);
CREATE INDEX idx_tasks_created_at ON tasks(created_at);
CREATE INDEX idx_tasks_parent_task_id ON tasks(parent_task_id);
CREATE INDEX idx_tasks_parameters ON tasks USING GIN(parameters);
schedules
CREATE TABLE schedules (
id BIGSERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description TEXT,
cron_expression VARCHAR(100) NOT NULL,
task_template JSONB NOT NULL,
is_active BOOLEAN DEFAULT true,
last_run TIMESTAMP WITH TIME ZONE,
next_run TIMESTAMP WITH TIME ZONE,
created_by BIGINT NOT NULL REFERENCES users(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX idx_schedules_name ON schedules(name);
CREATE INDEX idx_schedules_is_active ON schedules(is_active);
CREATE INDEX idx_schedules_next_run ON schedules(next_run);
Monitoring & Metrics
metrics
CREATE TABLE metrics (
id BIGSERIAL PRIMARY KEY,
agent_id BIGINT NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
metric_name VARCHAR(255) NOT NULL,
metric_type VARCHAR(20) NOT NULL,
value DOUBLE PRECISION NOT NULL,
unit VARCHAR(50),
labels JSONB DEFAULT '{}',
timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT metrics_type_valid CHECK (metric_type IN ('gauge', 'counter', 'histogram', 'summary'))
);
CREATE INDEX idx_metrics_agent_id ON metrics(agent_id);
CREATE INDEX idx_metrics_name ON metrics(metric_name);
CREATE INDEX idx_metrics_timestamp ON metrics(timestamp);
CREATE INDEX idx_metrics_labels ON metrics USING GIN(labels);
-- Partition by timestamp for better performance
CREATE TABLE metrics_y2024m01 PARTITION OF metrics
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
alerts
CREATE TABLE alerts (
id BIGSERIAL PRIMARY KEY,
alert_type VARCHAR(100) NOT NULL,
severity VARCHAR(20) NOT NULL,
status VARCHAR(20) DEFAULT 'open',
agent_id BIGINT REFERENCES agents(id),
title VARCHAR(500) NOT NULL,
description TEXT,
details JSONB DEFAULT '{}',
threshold_value DOUBLE PRECISION,
current_value DOUBLE PRECISION,
triggered_at TIMESTAMP WITH TIME ZONE NOT NULL,
acknowledged_at TIMESTAMP WITH TIME ZONE,
acknowledged_by BIGINT REFERENCES users(id),
resolved_at TIMESTAMP WITH TIME ZONE,
resolved_by BIGINT REFERENCES users(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT alerts_severity_valid CHECK (severity IN ('critical', 'high', 'medium', 'low', 'info')),
CONSTRAINT alerts_status_valid CHECK (status IN ('open', 'acknowledged', 'resolved', 'suppressed'))
);
CREATE INDEX idx_alerts_status ON alerts(status);
CREATE INDEX idx_alerts_severity ON alerts(severity);
CREATE INDEX idx_alerts_agent_id ON alerts(agent_id);
CREATE INDEX idx_alerts_triggered_at ON alerts(triggered_at);
Audit & Logging
audit_log
CREATE TABLE audit_log (
id BIGSERIAL PRIMARY KEY,
user_id BIGINT REFERENCES users(id),
action VARCHAR(100) NOT NULL,
resource_type VARCHAR(100),
resource_id VARCHAR(255),
details JSONB DEFAULT '{}',
ip_address INET,
user_agent TEXT,
session_id VARCHAR(255),
success BOOLEAN NOT NULL,
error_message TEXT,
timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX idx_audit_log_user_id ON audit_log(user_id);
CREATE INDEX idx_audit_log_action ON audit_log(action);
CREATE INDEX idx_audit_log_timestamp ON audit_log(timestamp);
CREATE INDEX idx_audit_log_resource ON audit_log(resource_type, resource_id);
event_log
CREATE TABLE event_log (
id BIGSERIAL PRIMARY KEY,
event_type VARCHAR(100) NOT NULL,
severity VARCHAR(20) NOT NULL,
source VARCHAR(100),
agent_id BIGINT REFERENCES agents(id),
title VARCHAR(500) NOT NULL,
message TEXT,
details JSONB DEFAULT '{}',
correlation_id VARCHAR(255),
timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
CONSTRAINT event_log_severity_valid CHECK (severity IN ('debug', 'info', 'warning', 'error', 'critical'))
);
CREATE INDEX idx_event_log_event_type ON event_log(event_type);
CREATE INDEX idx_event_log_severity ON event_log(severity);
CREATE INDEX idx_event_log_agent_id ON event_log(agent_id);
CREATE INDEX idx_event_log_timestamp ON event_log(timestamp);
CREATE INDEX idx_event_log_correlation_id ON event_log(correlation_id);
Entity Relationships
Core Entity Relationships
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ users │────▶│ user_roles │◀────│ roles │
│ │ │ │ │ │
│ • id │ │ • user_id │ │ • id │
│ • username │ │ • role_id │ │ • name │
│ • email │ │ • granted_at│ │ • permissions│
└─────────────┘ └─────────────┘ └─────────────┘
│
▼
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ tasks │ │ agents │────▶│agent_groups │
│ │ │ │ │ │
│ • id │ │ • id │ │ • id │
│ • created_by│────▶│ • hostname │ │ • name │
│ • agent_id │────▶│ • status │ │ • parent_id │
│ • status │ │ • group_id │────▶│ │
└─────────────┘ └─────────────┘ └─────────────┘
│
▼
┌─────────────┐
│ hosts │
│ │
│ • agent_id │
│ • fqdn │
│ • os_info │
└─────────────┘
│
┌────────────┼────────────┐
▼ ▼ ▼
┌─────────────┐┌─────────────┐┌─────────────┐
│ packages ││ services ││ metrics │
│ ││ ││ │
│ • agent_id ││ • agent_id ││ • agent_id │
│ • name ││ • name ││ • metric_name│
│ • version ││ • status ││ • value │
└─────────────┘└─────────────┘└─────────────┘
Foreign Key Constraints
Critical Relationships
- agents → certificates: Each agent has one certificate for mTLS authentication
- agents → agent_groups: Agents can be organized into hierarchical groups
- hosts → agents: One-to-one relationship between host inventory and agent
- tasks → agents: Tasks are executed on specific agents
- tasks → users: Tasks track who created and assigned them
- user_roles → users/roles: Many-to-many relationship for RBAC
Cascade Behaviors
- ON DELETE CASCADE: When an agent is deleted, all related hosts, packages, services, and metrics are automatically removed
- ON DELETE SET NULL: When a user is deleted, their tasks are orphaned but preserved for audit purposes
- ON DELETE RESTRICT: Roles cannot be deleted if they're assigned to users
Data Types & Constraints
JSON/JSONB Usage
PostgreSQL's JSONB type is extensively used for flexible, schema-free data:
- agent.metadata: Platform-specific configuration and capabilities
- task.parameters: Dynamic task configuration and input parameters
- host.*_info: Hardware and system information that varies by platform
- roles.permissions: Flexible permission definitions with resource-level granularity
- metrics.labels: Prometheus-style metric labels for dimensional data
Example JSONB Structures
-- Agent metadata example
{
"capabilities": ["package_management", "service_control", "file_transfer"],
"package_managers": ["apt", "snap"],
"platform_specific": {
"distribution": "Ubuntu",
"distribution_version": "22.04",
"desktop_environment": "GNOME"
}
}
-- Task parameters example
{
"package_name": "nginx",
"action": "install",
"version": "latest",
"options": {
"auto_start": true,
"configure_firewall": true
}
}
-- Role permissions example
{
"agents": ["read", "update"],
"tasks": ["create", "read", "execute"],
"packages": ["read", "install", "remove"],
"system": ["reboot", "shutdown"]
}
Temporal Data Management
All timestamps use TIMESTAMP WITH TIME ZONE for global consistency:
- created_at/updated_at: Standard audit fields on all major tables
- last_seen/last_heartbeat: Agent connectivity tracking
- started_at/completed_at: Task execution timeframes
- issued_at/expires_at: Certificate lifecycle management
Automatic Timestamp Updates
-- Trigger function for updated_at
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ language 'plpgsql';
-- Apply to tables
CREATE TRIGGER update_users_updated_at
BEFORE UPDATE ON users
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
Enumerated Values
Critical status fields use CHECK constraints for data integrity:
- Agent Status: online, offline, maintenance, error
- Task Status: pending, running, completed, failed, cancelled, timeout
- Certificate Status: active, revoked, expired
- Alert Severity: critical, high, medium, low, info
- Package Status: installed, available, upgradeable, broken
Performance Optimization
Indexing Strategy
Primary Indexes
- Unique constraints: Natural keys (hostname, email, serial_number)
- Foreign key indexes: All foreign key columns for join performance
- Status indexes: Critical for filtering active/online entities
- Timestamp indexes: Essential for time-based queries and cleanup
Specialized Indexes
- GIN indexes: JSONB columns for fast JSON queries
- Partial indexes: Active records only (WHERE is_active = true)
- Composite indexes: Multi-column queries (agent_id, status, timestamp)
-- Example composite index for common query pattern
CREATE INDEX idx_tasks_agent_status_created
ON tasks(agent_id, status, created_at DESC)
WHERE status IN ('pending', 'running');
-- Partial index for active agents
CREATE INDEX idx_agents_active_hostname
ON agents(hostname)
WHERE status != 'offline';
Partitioning Strategy
Time-Based Partitioning
High-volume tables are partitioned by timestamp:
- metrics: Monthly partitions with automatic rotation
- audit_log: Monthly partitions with 2-year retention
- event_log: Weekly partitions with 6-month retention
-- Automatic partition creation function
CREATE OR REPLACE FUNCTION create_monthly_partition(
table_name TEXT,
start_date DATE
) RETURNS VOID AS $$
DECLARE
partition_name TEXT;
end_date DATE;
BEGIN
partition_name := table_name || '_y' ||
to_char(start_date, 'YYYY') || 'm' ||
to_char(start_date, 'MM');
end_date := start_date + INTERVAL '1 month';
EXECUTE format('CREATE TABLE %I PARTITION OF %I
FOR VALUES FROM (%L) TO (%L)',
partition_name, table_name, start_date, end_date);
END;
$$ LANGUAGE plpgsql;
Query Optimization
Common Query Patterns
- Agent Dashboard: Join agents, hosts, latest metrics
- Package Inventory: Aggregate packages by name/version across agents
- Task Monitoring: Active tasks with progress and ETA
- Alert Summary: Open alerts grouped by severity and type
-- Optimized agent dashboard query
SELECT
a.id, a.hostname, a.status, a.last_seen,
h.os_name, h.os_version,
COUNT(t.id) as active_tasks,
MAX(m.timestamp) as last_metric
FROM agents a
LEFT JOIN hosts h ON a.id = h.agent_id
LEFT JOIN tasks t ON a.id = t.agent_id AND t.status IN ('pending', 'running')
LEFT JOIN metrics m ON a.id = m.agent_id
WHERE a.status != 'offline'
GROUP BY a.id, a.hostname, a.status, a.last_seen, h.os_name, h.os_version
ORDER BY a.last_seen DESC;
Database Migrations
Migration Framework
SysManage uses Alembic (SQLAlchemy's migration tool) for database schema management:
Migration Structure
backend/alembic/
├── versions/ # Migration files
│ ├── 001_initial_schema.py
│ ├── 002_add_agent_groups.py
│ ├── 003_add_metrics_partitioning.py
│ └── 004_add_certificate_management.py
├── env.py # Alembic environment configuration
├── script.py.mako # Migration template
└── alembic.ini # Alembic configuration
Example Migration
"""Add agent groups support
Revision ID: 002_add_agent_groups
Revises: 001_initial_schema
Create Date: 2024-01-15 10:30:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision = '002_add_agent_groups'
down_revision = '001_initial_schema'
branch_labels = None
depends_on = None
def upgrade():
# Create agent_groups table
op.create_table('agent_groups',
sa.Column('id', sa.BigInteger(), nullable=False),
sa.Column('name', sa.String(255), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('parent_id', sa.BigInteger(), nullable=True),
sa.Column('metadata', postgresql.JSONB(), nullable=True),
sa.Column('created_at', sa.TIMESTAMP(timezone=True),
server_default=sa.text('NOW()'), nullable=True),
sa.Column('updated_at', sa.TIMESTAMP(timezone=True),
server_default=sa.text('NOW()'), nullable=True),
sa.ForeignKeyConstraint(['parent_id'], ['agent_groups.id']),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('name')
)
# Add group_id to agents table
op.add_column('agents',
sa.Column('group_id', sa.BigInteger(), nullable=True))
op.create_foreign_key('fk_agents_group_id', 'agents',
'agent_groups', ['group_id'], ['id'])
# Create indexes
op.create_index('idx_agent_groups_name', 'agent_groups', ['name'])
op.create_index('idx_agent_groups_parent_id', 'agent_groups', ['parent_id'])
def downgrade():
op.drop_constraint('fk_agents_group_id', 'agents', type_='foreignkey')
op.drop_column('agents', 'group_id')
op.drop_table('agent_groups')
Migration Best Practices
- Incremental Changes: Small, focused migrations for easier rollback
- Data Preservation: Always include data migration scripts when needed
- Index Management: Add indexes concurrently to avoid locking
- Constraint Validation: Use NOT VALID constraints for large table modifications
- Testing: Test migrations on production-like data sets
Database Maintenance
Automated Cleanup
Data Retention Policies
- Metrics: 90 days (configurable)
- Audit Log: 2 years
- Event Log: 6 months
- Completed Tasks: 30 days
- Expired Certificates: 1 year after expiration
-- Automated cleanup procedure
CREATE OR REPLACE FUNCTION cleanup_old_data()
RETURNS VOID AS $$
BEGIN
-- Clean old metrics (90 days)
DELETE FROM metrics
WHERE timestamp < NOW() - INTERVAL '90 days';
-- Clean old completed tasks (30 days)
DELETE FROM tasks
WHERE status IN ('completed', 'failed', 'cancelled')
AND completed_at < NOW() - INTERVAL '30 days';
-- Clean old audit logs (2 years)
DELETE FROM audit_log
WHERE timestamp < NOW() - INTERVAL '2 years';
-- Clean old event logs (6 months)
DELETE FROM event_log
WHERE timestamp < NOW() - INTERVAL '6 months';
-- Analyze tables after cleanup
ANALYZE metrics, tasks, audit_log, event_log;
END;
$$ LANGUAGE plpgsql;
Backup Strategy
Backup Types
- Full Backup: Daily pg_dump with compression
- Incremental: WAL-E for continuous archiving
- Point-in-Time: Recovery capability for last 30 days
- Logical: Schema and reference data exports
Critical Data Priority
- Certificates & Keys: Essential for agent communication
- User Accounts & Roles: Access control and permissions
- Agent Configuration: Agent registration and grouping
- Active Tasks: In-progress operations
- Recent Inventory: Current system state
Monitoring & Health
Key Metrics
- Connection Pool: Active/idle connections
- Query Performance: Slow query log analysis
- Table Sizes: Growth rate monitoring
- Index Usage: Unused index detection
- Lock Contention: Blocking query identification
-- Database health check queries
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(tablename::regclass)) as size,
pg_stat_get_live_tuples(tablename::regclass) as live_tuples
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(tablename::regclass) DESC;
-- Index usage statistics
SELECT
indexrelname,
idx_tup_read,
idx_tup_fetch,
idx_scan
FROM pg_stat_user_indexes
WHERE idx_scan = 0 AND schemaname = 'public';