-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCargo.toml
More file actions
99 lines (81 loc) · 2.25 KB
/
Cargo.toml
File metadata and controls
99 lines (81 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[package]
name = "blazr"
version = "0.1.0-beta.1"
edition = "2021"
authors = ["Farhan Syah"]
description = "Production-grade LLM inference server with OpenAI-compatible API. Supports Llama, Mistral, Mamba2, MLA+MoE, and hybrid architectures across SafeTensors, AWQ, GPTQ, and GGUF formats."
license = "Apache-2.0"
repository = "https://github.com/ml-rust/blazr"
homepage = "https://github.com/ml-rust/blazr"
keywords = ["inference", "llm", "server", "openai", "gguf"]
categories = ["science", "web-programming"]
[features]
default = ["f16"]
f16 = ["boostr/f16"]
cuda = ["boostr/cuda"]
nccl = ["boostr/nccl"]
distributed = ["boostr/distributed", "dep:nexar"]
[dependencies]
# ML framework (model architectures, quant kernels, tensors via numr)
boostr = { version = "0.1" }
# Tokenization
splintr = { path = "../splintr" }
# HTTP server
axum = { version = "0.7", features = ["multipart"] }
tokio = { version = "1", features = ["full"] }
tower = { version = "0.5", features = ["limit"] }
tower-http = { version = "0.6", features = [
"cors",
"trace",
"timeout",
"limit",
] }
# Serialization
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde-saphyr = "0.0"
# CLI
clap = { version = "4", features = ["derive", "env"] }
clap_complete = "4"
# Byte casting for tensor creation from raw bytes
bytemuck = "1"
half = "2"
# Utilities
anyhow = "1"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
uuid = { version = "1", features = ["v4"] }
chrono = "0.4"
glob = "0.3"
# HuggingFace Hub for model downloading
hf-hub = "0.5"
dotenvy = "0.15"
# Distributed runtime (swarm mode)
nexar = { version = "0.1.0", optional = true }
# Async streaming
futures = "0.3"
async-stream = "0.3"
tokio-stream = "0.1"
# Random number generation
rand = "0.8"
# Metrics
metrics = "0.24"
metrics-exporter-prometheus = { version = "0.16", features = ["http-listener"] }
# CLI UX
colored = "3"
indicatif = "0.18"
dirs = "6"
rustyline = "15"
# HTTP client (for blazr ps)
reqwest = { version = "0.13", features = ["json"] }
# TLS support
rustls = "0.23"
rustls-pemfile = "2"
tokio-rustls = "0.26"
hyper-util = { version = "0.1", features = ["tokio", "server-auto"] }
[dev-dependencies]
tokio-test = "0.4"
[profile.release]
opt-level = 3
lto = true
codegen-units = 1