forked from abetlen/llama-cpp-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllava_cpp.py
More file actions
271 lines (222 loc) · 7.35 KB
/
llava_cpp.py
File metadata and controls
271 lines (222 loc) · 7.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
from __future__ import annotations
import os
from ctypes import (
c_bool,
c_char_p,
c_int,
c_uint8,
c_float,
c_size_t,
c_void_p,
POINTER,
_Pointer, # type: ignore
Structure,
)
import pathlib
from typing import (
Union,
NewType,
Optional,
TYPE_CHECKING,
)
import llama_cpp.llama_cpp as llama_cpp
from llama_cpp._ctypes_extensions import (
load_shared_library,
ctypes_function_for_shared_library,
)
if TYPE_CHECKING:
from llama_cpp._ctypes_extensions import (
CtypesArray,
)
# Specify the base name of the shared library to load
_libllava_base_name = "llava"
_libllava_override_path = os.environ.get("LLAVA_CPP_LIB")
_libllava_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib" if _libllava_override_path is None else pathlib.Path()
# Load the library
_libllava = load_shared_library(_libllava_base_name, _libllava_base_path)
ctypes_function = ctypes_function_for_shared_library(_libllava)
################################################
# llava.h
################################################
# struct clip_ctx;
clip_ctx_p = NewType("clip_ctx_p", int)
clip_ctx_p_ctypes = c_void_p
# struct llava_image_embed {
# float * embed;
# int n_image_pos;
# };
class llava_image_embed(Structure):
_fields_ = [
("embed", POINTER(c_float)),
("n_image_pos", c_int),
]
# /** sanity check for clip <-> llava embed size match */
# LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
@ctypes_function(
"llava_validate_embed_size",
[llama_cpp.llama_context_p_ctypes, clip_ctx_p_ctypes],
c_bool,
)
def llava_validate_embed_size(
ctx_llama: llama_cpp.llama_context_p, ctx_clip: clip_ctx_p, /
) -> bool:
...
# /** build an image embed from image file bytes */
# LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
@ctypes_function(
"llava_image_embed_make_with_bytes",
[clip_ctx_p_ctypes, c_int, POINTER(c_uint8), c_int],
POINTER(llava_image_embed),
)
def llava_image_embed_make_with_bytes(
ctx_clip: clip_ctx_p,
n_threads: Union[c_int, int],
image_bytes: CtypesArray[c_uint8],
image_bytes_length: Union[c_int, int],
/,
) -> "_Pointer[llava_image_embed]":
...
# /** build an image embed from a path to an image filename */
# LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path);
@ctypes_function(
"llava_image_embed_make_with_filename",
[clip_ctx_p_ctypes, c_int, c_char_p],
POINTER(llava_image_embed),
)
def llava_image_embed_make_with_filename(
ctx_clip: clip_ctx_p, n_threads: Union[c_int, int], image_path: bytes, /
) -> "_Pointer[llava_image_embed]":
...
# LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed);
# /** free an embedding made with llava_image_embed_make_* */
@ctypes_function("llava_image_embed_free", [POINTER(llava_image_embed)], None)
def llava_image_embed_free(embed: "_Pointer[llava_image_embed]", /):
...
# /** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */
# LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past);
@ctypes_function(
"llava_eval_image_embed",
[
llama_cpp.llama_context_p_ctypes,
POINTER(llava_image_embed),
c_int,
POINTER(c_int),
],
c_bool,
)
def llava_eval_image_embed(
ctx_llama: llama_cpp.llama_context_p,
embed: "_Pointer[llava_image_embed]",
n_batch: Union[c_int, int],
n_past: "_Pointer[c_int]",
/,
) -> bool:
...
################################################
# clip.h
################################################
# struct clip_image_u8_batch {
# struct clip_image_u8 * data;
# size_t size;
# };
class clip_image_u8_batch(Structure):
_fields_ = [
("data", c_void_p),
("size", c_size_t),
]
# struct clip_image_f32_batch {
# struct clip_image_f32 * data;
# size_t size;
# };
class clip_image_f32_batch(Structure):
_fields_ = [
("data", c_void_p),
("size", c_size_t),
]
# /** load mmproj model */
# CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity);
@ctypes_function("clip_model_load", [c_char_p, c_int], clip_ctx_p_ctypes)
def clip_model_load(
fname: bytes, verbosity: Union[c_int, int], /
) -> Optional[clip_ctx_p]:
...
# /** free mmproj model */
# CLIP_API void clip_free(struct clip_ctx * ctx);
@ctypes_function("clip_free", [clip_ctx_p_ctypes], None)
def clip_free(ctx: clip_ctx_p, /):
...
# CLIP_API struct clip_image_u8 * clip_image_u8_init ();
@ctypes_function("clip_image_u8_init", [], c_void_p)
def clip_image_u8_init() -> Optional[c_void_p]:
...
# CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
@ctypes_function("clip_image_u8_free", [c_void_p], None)
def clip_image_u8_free(img: c_void_p, /):
...
# CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
@ctypes_function("clip_image_f32_free", [c_void_p], None)
def clip_image_f32_free(img: c_void_p, /):
...
# CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch);
@ctypes_function("clip_image_u8_batch_free", [POINTER(clip_image_u8_batch)], None)
def clip_image_u8_batch_free(batch: "_Pointer[clip_image_u8_batch]", /):
...
# CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
@ctypes_function("clip_image_f32_batch_free", [POINTER(clip_image_f32_batch)], None)
def clip_image_f32_batch_free(batch: "_Pointer[clip_image_f32_batch]", /):
...
# /** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
# CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
@ctypes_function(
"clip_image_preprocess",
[
clip_ctx_p_ctypes,
c_void_p,
POINTER(clip_image_f32_batch),
],
c_bool,
)
def clip_image_preprocess(
ctx: clip_ctx_p,
img: c_void_p,
res_imgs: "_Pointer[clip_image_f32_batch]",
/,
) -> bool:
...
# CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
@ctypes_function(
"clip_image_batch_encode",
[
clip_ctx_p_ctypes,
c_int,
POINTER(clip_image_f32_batch),
POINTER(c_float),
],
c_bool,
)
def clip_image_batch_encode(
ctx: clip_ctx_p,
n_threads: c_int,
imgs: "_Pointer[clip_image_f32_batch]",
vec: c_void_p,
/,
) -> bool:
...
# /** interpret bytes as an image file with length bytes_length, and use the result to populate img */
# CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
@ctypes_function(
"clip_image_load_from_bytes",
[
c_void_p,
c_size_t,
c_void_p,
],
c_bool,
)
def clip_image_load_from_bytes(
bytes: c_void_p,
bytes_length: c_size_t,
img: c_void_p,
/,
) -> bool:
...