-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathruru.sv
More file actions
346 lines (301 loc) · 10.3 KB
/
ruru.sv
File metadata and controls
346 lines (301 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
// ruru.sv -- The World's First Probabilistic Processor
//
// Every computer since 1837 has assumed it knows the answer.
// Ruru knows it doesn't.
//
// Named after the morepork (Ninox novaeseelandiae), New Zealand's
// only surviving native owl. In Māori tradition, the ruru is a
// watchful guardian. Ruru the processor computes under uncertainty
// and tells you the probability.
//
// Architecture:
// - 16 probabilistic registers (pr0-pr15)
// - Each register: tag(4) + mean(16) + variance(16) = 36 bits
// - Gaussian fast path: PADD, PFUSE, POBS as single-cycle ops
// - Tag dispatch: instructions adapt to distribution type
//
// The Gaussian operations (fixed-point, 16-bit):
// PADD: μ = μ₁ + μ₂, σ² = σ₁² + σ₂²
// PFUSE: σ² = 1/(1/σ₁² + 1/σ₂²), μ = σ²·(μ₁/σ₁² + μ₂/σ₂²)
// POBS: Kalman update — single most important equation in
// estimation theory, now a machine instruction.
//
// Lineage:
// Babbage (1837) → Setun (1958) → B5000 (1961) → Ruru (2026)
// Deterministic → Ternary → Tagged → Probabilistic
//
// To synthesise:
// ./takahe --lib sky130.lib --map ruru.v --sta 100 ruru.sv
// ---- Tag definitions ----
// Each register carries its type. The processor adapts.
// Burroughs B5000 did this for code/data in 1961.
// We do it for probability distributions in 2026.
`define TAG_EMPTY 4'h0
`define TAG_CONST 4'h1
`define TAG_GAUSS 4'h2
`define TAG_UNIF 4'h3
// ---- Instruction encoding (16-bit) ----
// [15:12] opcode
// [11:8] destination register (pr0-pr15)
// [7:4] source register 1
// [3:0] source register 2 / immediate
`define OP_NOP 4'h0
`define OP_PCONST 4'h1 // Load constant (zero variance)
`define OP_PGAUSS 4'h2 // Load Gaussian (mean + variance from memory)
`define OP_PADD 4'h3 // Distributional addition
`define OP_PSUB 4'h4 // Distributional subtraction
`define OP_PFUSE 4'h5 // Sensor fusion (precision-weighted)
`define OP_POBS 4'h6 // Bayesian update (Kalman)
`define OP_PMEAN 4'h7 // Extract mean to scalar output
`define OP_PVAR 4'h8 // Extract variance to scalar output
`define OP_PCMP 4'h9 // Probabilistic comparison
`define OP_PCOPY 4'hA // Copy register
`define OP_PCLEAR 4'hB // Clear register
`define OP_PSCALE 4'hC // Scale by constant
`define OP_HALT 4'hF // Stop
// ---- Probabilistic Register ----
// tag(4) + mean(16) + variance(16) = 36 bits per register
module ruru_regfile (
input logic clk,
input logic rst_n,
// Read ports (2 simultaneous reads)
input logic [3:0] raddr1,
input logic [3:0] raddr2,
output logic [3:0] rtag1,
output logic [15:0] rmean1,
output logic [15:0] rvar1,
output logic [3:0] rtag2,
output logic [15:0] rmean2,
output logic [15:0] rvar2,
// Write port
input logic we,
input logic [3:0] waddr,
input logic [3:0] wtag,
input logic [15:0] wmean,
input logic [15:0] wvar
);
// 16 registers × 36 bits = 576 bits total.
// The entire probabilistic state of the processor.
logic [3:0] tags [0:15];
logic [15:0] means [0:15];
logic [15:0] vars [0:15];
// Read
assign rtag1 = tags[raddr1];
assign rmean1 = means[raddr1];
assign rvar1 = vars[raddr1];
assign rtag2 = tags[raddr2];
assign rmean2 = means[raddr2];
assign rvar2 = vars[raddr2];
// Write
integer i;
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
for (i = 0; i < 16; i = i + 1) begin
tags[i] <= `TAG_EMPTY;
means[i] <= 16'h0;
vars[i] <= 16'h0;
end
end else if (we) begin
tags[waddr] <= wtag;
means[waddr] <= wmean;
vars[waddr] <= wvar;
end
end
endmodule
// ---- Gaussian ALU ----
// The mathematical core. Fixed-point 16-bit.
// Mean: signed 8.8 fixed point (range -128 to +127.996)
// Variance: unsigned 8.8 fixed point (range 0 to 255.996)
//
// PADD: μ = μ₁ + μ₂, σ² = σ₁² + σ₂²
// (independent random variables: variances add)
//
// PFUSE: Combine two measurements of the SAME quantity.
// σ² = (σ₁² · σ₂²) / (σ₁² + σ₂²)
// μ = (μ₁·σ₂² + μ₂·σ₁²) / (σ₁² + σ₂²)
// Information ALWAYS increases. Uncertainty ALWAYS decreases.
// This is why sensor fusion works.
//
// POBS: Bayesian update. Same math as PFUSE but semantically:
// prior + observation → posterior.
// This IS the Kalman filter update. One instruction.
module ruru_galu (
input logic [2:0] op, // 0=ADD 1=SUB 2=FUSE 3=OBS 4=SCALE
input logic [15:0] mean1,
input logic [15:0] var1,
input logic [15:0] mean2,
input logic [15:0] var2,
output logic [15:0] mean_out,
output logic [15:0] var_out
);
// Intermediate wires
logic [31:0] var_sum;
logic [31:0] var_prod;
logic [31:0] fuse_var;
logic [31:0] fuse_mean_num;
logic [31:0] m1_v2, m2_v1;
assign var_sum = {16'h0, var1} + {16'h0, var2};
assign var_prod = var1 * var2;
// PFUSE/POBS: precision-weighted combination
// fuse_var = (v1 * v2) / (v1 + v2)
assign fuse_var = (var_sum != 0) ? var_prod / var_sum[15:0] : 32'h0;
// fuse_mean = (m1*v2 + m2*v1) / (v1 + v2)
assign m1_v2 = $signed(mean1) * var2;
assign m2_v1 = $signed(mean2) * var1;
assign fuse_mean_num = m1_v2 + m2_v1;
always_comb begin
case (op)
3'b000: begin // PADD: variances add, means add
mean_out = mean1 + mean2;
var_out = var1 + var2;
end
3'b001: begin // PSUB: variances add, means subtract
mean_out = mean1 - mean2;
var_out = var1 + var2;
end
3'b010, 3'b011: begin // PFUSE / POBS
// Precision-weighted combination
var_out = fuse_var[15:0];
mean_out = (var_sum != 0) ?
fuse_mean_num[23:8] : // fixed-point shift
mean1;
end
3'b100: begin // PSCALE: scale by mean2 as constant
// μ = μ₁ × c, σ² = σ₁² × c²
mean_out = (mean1 * mean2) >> 8; // fixed-point multiply
var_out = (var1 * mean2 * mean2) >> 16;
end
default: begin
mean_out = mean1;
var_out = var1;
end
endcase
end
endmodule
// ---- Top: Ruru Probabilistic Processor ----
module ruru (
input logic clk,
input logic rst_n,
// Instruction interface
input logic [15:0] instr,
input logic valid,
// Memory interface (for loading distributions)
input logic [15:0] mem_data,
// Scalar output (for PMEAN/PVAR queries)
output logic [15:0] scalar_out,
output logic scalar_valid,
// Status
output logic halted
);
// Decode instruction fields
logic [3:0] opcode, rd, rs1, rs2;
assign opcode = instr[15:12];
assign rd = instr[11:8];
assign rs1 = instr[7:4];
assign rs2 = instr[3:0];
// Register file
logic [3:0] tag1, tag2;
logic [15:0] mean1, var1, mean2, var2;
logic rf_we;
logic [3:0] rf_waddr, rf_wtag;
logic [15:0] rf_wmean, rf_wvar;
ruru_regfile regs (
.clk(clk), .rst_n(rst_n),
.raddr1(rs1), .raddr2(rs2),
.rtag1(tag1), .rmean1(mean1), .rvar1(var1),
.rtag2(tag2), .rmean2(mean2), .rvar2(var2),
.we(rf_we), .waddr(rf_waddr),
.wtag(rf_wtag), .wmean(rf_wmean), .wvar(rf_wvar)
);
// Gaussian ALU
logic [2:0] alu_op;
logic [15:0] alu_mean, alu_var;
ruru_galu galu (
.op(alu_op),
.mean1(mean1), .var1(var1),
.mean2(mean2), .var2(var2),
.mean_out(alu_mean), .var_out(alu_var)
);
// Control
always_comb begin
rf_we = 1'b0;
rf_waddr = rd;
rf_wtag = `TAG_GAUSS;
rf_wmean = alu_mean;
rf_wvar = alu_var;
alu_op = 3'b000;
scalar_out = 16'h0;
scalar_valid = 1'b0;
if (valid && !halted) begin
case (opcode)
`OP_PCONST: begin
rf_we = 1'b1;
rf_wtag = `TAG_CONST;
rf_wmean = mem_data;
rf_wvar = 16'h0;
end
`OP_PGAUSS: begin
rf_we = 1'b1;
rf_wtag = `TAG_GAUSS;
rf_wmean = mem_data;
rf_wvar = {rs2, rs2, rs2, rs2}; // variance from imm
end
`OP_PADD: begin
alu_op = 3'b000;
rf_we = 1'b1;
end
`OP_PSUB: begin
alu_op = 3'b001;
rf_we = 1'b1;
end
`OP_PFUSE: begin
alu_op = 3'b010;
rf_we = 1'b1;
end
`OP_POBS: begin
alu_op = 3'b011;
rf_we = 1'b1;
end
`OP_PMEAN: begin
scalar_out = mean1;
scalar_valid = 1'b1;
end
`OP_PVAR: begin
scalar_out = var1;
scalar_valid = 1'b1;
end
`OP_PCMP: begin
// P(X1 > X2): approximate for Gaussians
// If mean1 > mean2 with high confidence → 1
scalar_out = ($signed(mean1) > $signed(mean2)) ?
16'hFF00 : 16'h0100;
scalar_valid = 1'b1;
end
`OP_PCOPY: begin
rf_we = 1'b1;
rf_wtag = tag1;
rf_wmean = mean1;
rf_wvar = var1;
end
`OP_PCLEAR: begin
rf_we = 1'b1;
rf_wtag = `TAG_EMPTY;
rf_wmean = 16'h0;
rf_wvar = 16'h0;
end
`OP_PSCALE: begin
alu_op = 3'b100;
rf_we = 1'b1;
end
default: begin end // NOP, HALT handled below
endcase
end
end
// Halt register
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n)
halted <= 1'b0;
else if (valid && opcode == `OP_HALT)
halted <= 1'b1;
end
endmodule