Examples
This page provides comprehensive examples demonstrating various use cases of SketchySVD.jl.
Example 1: Basic Low-Rank Approximation
Compute a low-rank approximation of a random matrix:
using SketchySVD
using LinearAlgebra
using Random
Random.seed!(42)
# Create a low-rank matrix with noise
m, n, true_rank = 1000, 500, 20
U_true = randn(m, true_rank)
U_true = qr(U_true).Q # Orthonormalize
V_true = randn(n, true_rank)
V_true = qr(V_true).Q # Orthonormalize
S_true = sort(rand(true_rank), rev=true) .* 100
A = U_true * Diagonal(S_true) * V_true' + 0.1 * randn(m, n)
# Compute approximation
r = 30 # Oversample beyond true rank
U, S, V = rsvd(A, r)
# Evaluate quality
A_approx = U * Diagonal(S) * V'
relative_error = norm(A - A_approx) / norm(A)
println("Relative approximation error: $relative_error")
# Compare singular values
using Plots
plot(S_true, label="True", marker=:circle, ylabel="Singular Value", xlabel="Index")
plot!(S[1:true_rank], label="Estimated", marker=:square, legend=:topright)Expected output: Relative error < 1%, singular values closely match.
Example 2: Image Compression
Compress an image using low-rank approximation:
using SketchySVD
using TestImages, ImageIO
using LinearAlgebra
# Load image (or create synthetic)
img = Float64.(Gray.(testimage("cameraman")))
m, n = size(img)
# Compress with different ranks
ranks = [5, 10, 20, 50, 100]
compressed_images = []
for r in ranks
U, S, V = rsvd(img, r)
img_approx = U * Diagonal(S) * V'
error = norm(img - img_approx) / norm(img)
compression_ratio = (m * n) / (r * (m + n + 1))
println("Rank $r: Error = $(round(error, digits=4)), Compression = $(round(compression_ratio, digits=2))x")
push!(compressed_images, img_approx)
end
# Visualize results
using Plots
plot(
plot(Gray.(img), title="Original"),
plot(Gray.(compressed_images[1]), title="Rank 5"),
plot(Gray.(compressed_images[3]), title="Rank 20"),
plot(Gray.(compressed_images[5]), title="Rank 100"),
layout=(2,2)
)Key insight: Rank 50-100 often gives good visual quality with 10-20x compression.
Example 3: Streaming PCA
Perform PCA on streaming data:
using SketchySVD
using Statistics
using LinearAlgebra
# Simulate streaming sensor data
n_sensors = 100
n_timesteps = 5000
true_components = 5
# Generate synthetic data with 5 principal components
W = randn(n_sensors, true_components)
H = randn(true_components, n_timesteps)
data = W * H + 0.5 * randn(n_sensors, n_timesteps)
# Normalize (center and scale)
data_mean = mean(data, dims=2)
data_centered = data .- data_mean
# Streaming PCA with Sketchy
sketchy = init_sketchy(m=n_sensors, n=n_timesteps, r=20, ReduxMap=:sparse)
for t in 1:n_timesteps
x = data_centered[:, t]
increment!(sketchy, x)
# Optional: periodic updates
if t % 1000 == 0
_, _ = finalize!(sketchy)
println("At t=$t: Top 5 singular values: $(sketchy.Σ[1:5])")
end
end
# Final PCA
finalize!(sketchy)
# Principal components are columns of U
# Projections onto components are rows of Diagonal(S) * V'
# Analyze variance explained
S = sketchy.Σ
total_variance = sum(S.^2)
variance_explained = cumsum(S.^2) ./ total_variance
using Plots
plot(variance_explained[1:20],
label="Cumulative Variance Explained",
marker=:circle,
xlabel="Number of Components",
ylabel="Fraction of Variance")
hline!([0.9, 0.95, 0.99], label=["90%" "95%" "99%"], linestyle=:dash)Key insight: Usually 5-10 components explain > 90% of variance in real data.
Example 4: Video Background Subtraction
Separate foreground objects from video background:
using SketchySVD
using LinearAlgebra
# Simulate video: static background + moving foreground
n_frames = 200
height, width = 64, 64
m = height * width
# Background: low-rank
background = randn(height, width, 3)
background_flat = zeros(m, n_frames)
for t in 1:n_frames
background_flat[:, t] = vec(background[:,:,1]) # Static
end
# Foreground: sparse
foreground = zeros(m, n_frames)
for t in 1:n_frames
# Moving object
obj_size = 10
x_pos = 20 + div(t * width, n_frames)
y_pos = 30
indices = (y_pos:y_pos+obj_size-1, x_pos:min(x_pos+obj_size-1, width))
for i in indices[1], j in indices[2]
idx = (j-1) * height + i
if idx <= m
foreground[idx, t] = 100 # Bright object
end
end
end
# Composite video
video = background_flat + foreground
# Robust PCA via sketching
r_background = 5 # Background is low-rank
sketchy = init_sketchy(m=m, n=n_frames, r=r_background, ReduxMap=:sparse)
for t in 1:n_frames
increment!(sketchy, video[:, t])
end
finalize!(sketchy)
U = sketchy.V
S = sketchy.Σ
V = sketchy.W
background_recovered = U * Diagonal(S) * V'
foreground_recovered = video - background_recovered
# Visualize results
using Plots
t_show = 100
plot(
heatmap(reshape(video[:, t_show], height, width), title="Original Frame"),
heatmap(reshape(background_recovered[:, t_show], height, width), title="Background"),
heatmap(reshape(foreground_recovered[:, t_show], height, width), title="Foreground"),
layout=(1,3), colorbar=false
)Key insight: Low-rank approximation captures static/slow-varying background, residual is foreground.
Example 5: Temporal Data with Forgetting
Track evolving patterns in non-stationary time series:
using SketchySVD
using Plots
# Simulate evolving system: frequencies change over time
n_sensors = 50
n_timesteps = 2000
m = n_sensors
function generate_evolving_data(t, n_sensors)
# Frequency drifts over time
freq1 = 0.1 + 0.05 * sin(2π * t / 500)
freq2 = 0.2 + 0.03 * cos(2π * t / 700)
x = zeros(n_sensors)
for i in 1:n_sensors
phase = 2π * i / n_sensors
x[i] = sin(2π * freq1 * t + phase) + 0.5 * sin(2π * freq2 * t)
end
return x + 0.1 * randn(n_sensors)
end
# Track with exponential forgetting
η = 0.99 # Forget slowly
sketchy = init_sketchy(m=m, n=n_timesteps, r=10, ErrorEstimate=true)
errors = Float64[]
top_sv = Float64[]
for t in 1:n_timesteps
x = generate_evolving_data(t, n_sensors)
result = increment!(sketchy, x, 1.0, η)
# Track every 50 steps
if t % 50 == 0
est_err, _ = finalize!(sketchy)
S = sketchy.Σ
push!(errors, isnothing(est_err) ? NaN : est_err)
push!(top_sv, S[1])
println("t=$t: Top SV = $(round(S[1], digits=2)), Error = $(isnothing(est_err) ? "N/A" : round(est_err, digits=4))")
end
end
# Plot evolution
plot(
plot(50:50:n_timesteps, top_sv, label="Top Singular Value", ylabel="Value"),
plot(50:50:n_timesteps, errors, label="Estimated Error", ylabel="Error"),
layout=(2,1), xlabel="Time", legend=:topright
)Key insight: Forgetting factor allows tracking of non-stationary dynamics.
Example 6: Large-Scale Matrix with Sparse Map
Efficiently handle very large matrices:
using SketchySVD
using LinearAlgebra
using Printf
# Very large matrix (simulated - not created in memory)
m, n = 100_000, 50_000
r = 100
println("Matrix size: $m × $n ($(m*n/1e9) billion elements)")
println("Would require $(8*m*n/1e9) GB if stored as Float64")
# Function to generate columns on-the-fly
function generate_column(j, m, true_rank=50)
# Simulate low-rank structure
U_col = randn(m) * sin(2π * j / n) # Smooth variation
return U_col + 0.01 * randn(m)
end
# Use sparse reduction map for efficiency
sketchy = init_sketchy(m=m, n=n, r=r, ReduxMap=:sparse)
# Process in streaming fashion
println("\nProcessing columns...")
@time begin
for j in 1:n
x = generate_column(j, m)
increment!(sketchy, x)
if j % 10_000 == 0
@printf("Progress: %d/%d (%.1f%%)\n", j, n, 100*j/n)
end
end
end
println("\nFinalizing...")
@time _, _ = finalize!(sketchy)
println("\nTop 10 singular values:")
S = sketchy.Σ
println(S[1:10])
# Memory usage (approximate)
sketchy_size = sizeof(sketchy.Y) + sizeof(sketchy.Z) + sizeof(sketchy.Ω)
println("\nApproximate memory used: $(sketchy_size/1e9) GB")Key insight: Sparse maps enable processing matrices thousands of times larger than available RAM.
Example 7: Comparison with Standard SVD
Compare accuracy and speed of different methods:
using SketchySVD
using LinearAlgebra
using BenchmarkTools
using Printf
# Create test matrix
m, n = 2000, 1000
true_rank = 50
U_true = randn(m, true_rank)
S_true = exp.(-0.1 * (1:true_rank)) # Exponential decay
V_true = randn(n, true_rank)
A = U_true * Diagonal(S_true) * V_true' + 0.01 * randn(m, n)
r = 60 # Target rank
# Method 1: Standard SVD (expensive)
println("Standard SVD:")
@time U_full, S_full, V_full = svd(A)
A_full = U_full[:, 1:r] * Diagonal(S_full[1:r]) * V_full[:, 1:r]'
err_full = norm(A - A_full) / norm(A)
println("Error: $err_full")
# Method 2: Randomized SVD
println("\nRandomized SVD:")
@time U_rsvd, S_rsvd, V_rsvd = rsvd(A, r)
A_rsvd = U_rsvd * Diagonal(S_rsvd) * V_rsvd'
err_rsvd = norm(A - A_rsvd) / norm(A)
println("Error: $err_rsvd")
# Method 3: Randomized SVD with power iterations
println("\nRandomized SVD (q=2):")
@time U_rsvd2, S_rsvd2, V_rsvd2 = rsvd(A, r; q=2)
A_rsvd2 = U_rsvd2 * Diagonal(S_rsvd2) * V_rsvd2'
err_rsvd2 = norm(A - A_rsvd2) / norm(A)
println("Error: $err_rsvd2")
# Method 4: Sketchy (streaming)
println("\nSketchy SVD:")
@time begin
sketchy = init_sketchy(m=m, n=n, r=r, ReduxMap=:sparse)
for j in 1:n
increment!(sketchy, A[:, j])
end
finalize!(sketchy)
end
U_sketchy = sketchy.V
S_sketchy = sketchy.Σ
V_sketchy = sketchy.W
A_sketchy = U_sketchy * Diagonal(S_sketchy) * V_sketchy'
err_sketchy = norm(A - A_sketchy) / norm(A)
println("Error: $err_sketchy")
# Summary
println("\n" * "="^50)
println("Summary:")
println("Method Error Speedup")
println("-" * "="^50)
@printf("Standard SVD %.2e 1.0x (baseline)\n", err_full)
@printf("Randomized SVD %.2e ~10-50x\n", err_rsvd)
@printf("Randomized SVD (q=2) %.2e ~5-25x\n", err_rsvd2)
@printf("Sketchy SVD %.2e ~5-20x\n", err_sketchy)Key insight: Randomized methods are 10-50x faster with minimal accuracy loss.
Example 8: Adaptive Rank Selection
Automatically determine the appropriate rank:
using SketchySVD
using LinearAlgebra
# Unknown true rank
m, n = 1000, 800
A = rand(m, n)
# Start with high rank estimate
r_max = 100
U, S, V = rsvd(A, r_max)
# Find rank where singular values drop
threshold = 0.03 * S[1] # 1% of largest singular value
r_effective = findfirst(S .< threshold)
if isnothing(r_effective)
r_effective = r_max
println("Need rank > $r_max")
else
println("Effective rank: $r_effective")
end
# Truncate to effective rank
U = U[:, 1:r_effective]
S = S[1:r_effective]
V = V[:, 1:r_effective]
# Verify accuracy
A_approx = U * Diagonal(S) * V'
println("Relative error with rank $r_effective: $(norm(A - A_approx) / norm(A))")
# Plot singular value decay
using Plots
plot(S, yscale=:log10, label="Singular Values",
marker=:circle, ylabel="Singular Value (log scale)", xlabel="Index")
hline!([threshold], label="Threshold", linestyle=:dash)Key insight: Adaptive rank selection balances accuracy and efficiency.
Example 9: Burgers' Equation (PDE Solution)
Analyze solutions of a 1D viscous Burgers' equation:
using SketchySVD
using LinearAlgebra
using Plots
# Load or generate Burgers' equation data
# (see scripts/burgers.jl for data generation)
# Assuming data is in scripts/data/
using CSV, DataFrames
data_path = "scripts/data/viscous_burgers1d_states.csv"
if isfile(data_path)
df = CSV.read(data_path, DataFrame)
snapshots = Matrix(df) # Each column is a spatial snapshot at one time
m, n = size(snapshots)
println("Loaded $n snapshots of size $m")
# Compute POD (Proper Orthogonal Decomposition) modes
r = 20
U, S, V = rsvd(snapshots, r)
# U columns are POD modes (spatial patterns)
# V columns are temporal coefficients
# Analyze modal energy
modal_energy = S.^2
total_energy = sum(modal_energy)
energy_fraction = cumsum(modal_energy) ./ total_energy
println("\nModal energy distribution:")
for i in 1:min(10, r)
@printf("Mode %2d: %.2f%% (cumulative: %.2f%%)\n",
i, 100*modal_energy[i]/total_energy, 100*energy_fraction[i])
end
# Visualize modes
p1 = plot(U[:, 1:4], label=["Mode 1" "Mode 2" "Mode 3" "Mode 4"],
xlabel="Spatial Position", ylabel="Amplitude", title="POD Modes")
p2 = plot(energy_fraction[1:r], marker=:circle,
xlabel="Mode Index", ylabel="Cumulative Energy Fraction",
title="Energy Spectrum", legend=false)
hline!([0.99], linestyle=:dash, color=:red)
plot(p1, p2, layout=(2,1))
else
println("Data file not found. Run scripts/burgers.jl first.")
endKey insight: PDE solutions often have low effective rank; 5-10 modes capture > 99% energy.
Example 10: Real-Time Anomaly Detection
Detect anomalies in streaming data:
using SketchySVD
using LinearAlgebra
using Statistics
# Simulation parameters
n_sensors = 50
n_timesteps = 1000
baseline_rank = 5
# Generate normal data
function generate_normal(t, n)
# 5 underlying patterns
patterns = rand(n, baseline_rank)
weights = [sin(2π*t/100), cos(2π*t/100), sin(2π*t/50),
cos(2π*t/50), sin(2π*t/200)]
return patterns * weights + 0.1 * rand(n)
end
# Inject anomalies
anomaly_times = [300, 600, 850]
# Initialize sketchy
sketchy = init_sketchy(m=n_sensors, n=n_timesteps, r=10)
reconstruction_errors = Float64[]
is_anomaly = Bool[]
for t in 1:n_timesteps
# Get data
x = generate_normal(t, n_sensors)
# Inject anomaly
if t in anomaly_times
x += 1000 * randn(n_sensors) # Large spike
end
# Update sketch
increment!(sketchy, x, 1.0, 1.0)
# Reconstruction-based anomaly detection
if t > 50 # Wait for initialization
finalize!(sketchy)
U = sketchy.V
S = sketchy.Σ
V = sketchy.W
# Project onto top modes
r_detect = 5
U_trunc = U[:, 1:r_detect]
x_proj = U_trunc * (U_trunc' * x)
# Reconstruction error
error = norm(x - x_proj) / norm(x)
push!(reconstruction_errors, error)
# Anomaly threshold (3 standard deviations)
if t > 100
μ = mean(reconstruction_errors[1:end-1])
σ = std(reconstruction_errors[1:end-1])
push!(is_anomaly, error > μ + 2.5σ || error < μ - 2.5σ)
else
push!(is_anomaly, false)
end
end
end
# Visualize results
using Plots
times = 51:n_timesteps
p = plot(times, reconstruction_errors, label="Reconstruction Error",
ylabel="Error", xlabel="Time")
scatter!(anomaly_times, [reconstruction_errors[t-50] for t in anomaly_times if t > 50],
color=:red, markersize=8, label="True Anomalies")
# Mark detected anomalies
detected = times[is_anomaly]
if !isempty(detected)
scatter!(detected, reconstruction_errors[is_anomaly],
color=:orange, marker=:x, markersize=10,
markerstrokewidth=5,
label="Detected")
end
plot!(p, legend=:topleft)Key insight: Anomalies have large reconstruction error when projected onto normal subspace.
More Examples
For additional examples, see:
scripts/random_matrix.jl: Random matrix benchmarksscripts/burgers.jl: PDE analysis workflowtest/directory: Comprehensive test suite with many usage patterns
Tips for Your Application
- Start simple: Begin with
rsvd()on a small dataset - Profile: Use
@timeand@benchmarkto identify bottlenecks - Iterate: Adjust
r,T, and reduction map based on results - Validate: Compare with standard SVD on small test cases
- Monitor: Use error estimation to track quality
Questions?
If you have questions about applying SketchySVD to your specific problem, please open a discussion on GitHub.