Skip to content

Commit 5bff373

Browse files
author
Chris Warren-Smith
committed
LLAMA: implemented mem_info command
1 parent 66d874c commit 5bff373

3 files changed

Lines changed: 35 additions & 13 deletions

File tree

llama/llama-sb.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -537,8 +537,9 @@ LlamaMemoryInfo Llama::memory_info() {
537537
info.kv_percent = 100.0f * info.kv_used / info.kv_total;
538538

539539
// Model layers
540+
auto n_gpu_layers = std::max(0, _n_gpu_layers);
540541
info.n_layers_total = llama_model_n_layer(_model);
541-
info.n_layers_gpu = _n_gpu_layers;
542+
info.n_layers_gpu = std::min(info.n_layers_total, n_gpu_layers);
542543
info.n_layers_cpu = info.n_layers_total - info.n_layers_gpu;
543544

544545
// ram
@@ -548,6 +549,13 @@ LlamaMemoryInfo Llama::memory_info() {
548549

549550
// Advice
550551
ostringstream advice;
552+
553+
if (n_gpu_layers < info.n_layers_total) {
554+
advice << "Only " << n_gpu_layers << "/" << info.n_layers_total
555+
<< " layers on GPU - increase n_gpu_layers if VRAM allows. ";
556+
} else {
557+
advice << "All " << info.n_layers_total << " layers on GPU. ";
558+
}
551559
if (info.n_layers_cpu > 0) {
552560
advice << "CPU offload active (" << info.n_layers_cpu
553561
<< " layers on CPU) - increase n_gpu_layers if VRAM allows. ";

llama/llama.cpp

llama/samples/nitro_cli.bas

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ const WHITE = chr(27) + "[37m"
2020
const BOLD_CYAN = chr(27) + "[1;36m"
2121

2222
' llama configuration (quen settings)
23-
const n_ctx = 32768
23+
const n_ctx = 65536
2424
const n_batch = 512
2525
const n_max_tokens = 4096
2626
const n_temperature = 0.6
@@ -29,8 +29,12 @@ const n_top_p = 0.95
2929
const n_min_p = 0
3030
const n_penalty_repeat = 1.0
3131
const n_penalty_last_n = 256
32+
const n_gpu_layers = 32
3233

33-
sandbox_home = cwd
34+
sandbox_home = iff(len(command) > 0, trim(command), cwd)
35+
if (left(sandbox_home) == "~") then
36+
sandbox_home = home + mid(sandbox_home, 1)
37+
endif
3438

3539
'
3640
' Displays the welcome message
@@ -50,8 +54,8 @@ end sub
5054
' handles the TOOL:LIST command
5155
'
5256
func tool_list_files(arg)
53-
if (arg == "./") then
54-
arg = sandbox_home + arg
57+
if (left(arg, 2) == "./") then
58+
arg = sandbox_home + mid(arg, 2)
5559
else if (len(arg) == 0 or arg == ".") then
5660
arg = sandbox_home
5761
endif
@@ -151,11 +155,11 @@ func process_tool(cmd)
151155
endif
152156
endif
153157

154-
' print RED
155-
' print "["+op+"]"
156-
' print "["+arg1+"]"
157-
' print "["+arg2+"]"
158-
' print RESET
158+
' print RED
159+
' print "["+op+"]"
160+
' print "["+arg1+"]"
161+
' print "["+arg2+"]"
162+
' print RESET
159163

160164
select case op
161165
case "TOOL:DATE"
@@ -219,7 +223,7 @@ end
219223
' creates the llama instance
220224
'
221225
func create_llama()
222-
local llama = llm.llama(model, n_ctx, n_batch, 50)
226+
local llama = llm.llama(model, n_ctx, n_batch, n_gpu_layers)
223227
llama.add_stop("<|turn|>")
224228
llama.set_max_tokens(n_max_tokens)
225229
llama.set_temperature(n_temperature)
@@ -279,11 +283,21 @@ sub main()
279283
print
280284
print WHITE;
281285
print "--- Tokens/sec: " + round(iter.tokens_sec(), 2) + " ---\n"
282-
iter = llama.add_message("user", process_input())
286+
local next_iter = false
287+
repeat
288+
local user_input = process_input()
289+
if (user_input == "/meminfo") then
290+
print llama.mem_info()
291+
else
292+
iter = llama.add_message("user", user_input)
293+
next_iter = true
294+
endif
295+
until next_iter
283296
print BLUE;
284297
endif
285298
wend
286299
end
287300

288301
welcome_message()
289302
main()
303+

0 commit comments

Comments
 (0)