From d2d33c5606d7e08f7c8f9c12191c724556d888e1 Mon Sep 17 00:00:00 2001 From: Mikameel Date: Mon, 13 Oct 2025 10:18:21 +0800 Subject: [PATCH] num predict low: 16 --- server/src/ollama_client.rs | 9 +++++++ server/src/stream.rs | 34 +++++++++++++++++-------- worker/__pycache__/vlm.cpython-311.pyc | Bin 0 -> 10839 bytes worker/vlm.py | 21 ++++++++++++++- 4 files changed, 52 insertions(+), 12 deletions(-) create mode 100644 worker/__pycache__/vlm.cpython-311.pyc diff --git a/server/src/ollama_client.rs b/server/src/ollama_client.rs index 46ddfe0..52c6ee0 100644 --- a/server/src/ollama_client.rs +++ b/server/src/ollama_client.rs @@ -84,6 +84,7 @@ pub async fn send_to_ollama( prompt: String, model: String, ollama_host: String, + num_predict: Option, ) -> Result>, reqwest::Error> { tracing::info!("Sending images to Ollama: {:?}", images_batch.len()); let images_b64: Vec = images_batch @@ -96,6 +97,14 @@ pub async fn send_to_ollama( "images": images_b64, "model": model.clone(), "stream": true, +<<<<<<< Updated upstream +<<<<<<< Updated upstream +======= + "num_predict": num_predict.unwrap_or(16), +>>>>>>> Stashed changes +======= + "num_predict": num_predict.unwrap_or(16), +>>>>>>> Stashed changes }); let url = format!("{}/api/generate", ollama_host); diff --git a/server/src/stream.rs b/server/src/stream.rs index b7d8bbe..463cae8 100644 --- a/server/src/stream.rs +++ b/server/src/stream.rs @@ -2,6 +2,7 @@ use actix_web::{rt, web, Error, HttpRequest, HttpResponse}; use actix_ws::Message; use futures::{select, FutureExt}; use futures_util::StreamExt as _; +use serde_json::Value; use tokio::time::{self, Duration, Instant}; use crate::{config, ollama_client::{send_to_ollama}}; @@ -12,10 +13,11 @@ async fn proxy_ollama_response( prompt: String, model: String, ollama_host: String, + num_predict: i32, ) { let mut session_clone = session.clone(); rt::spawn(async move { - match send_to_ollama(images_batch, prompt, model, ollama_host).await { + match send_to_ollama(images_batch, prompt, model, ollama_host, Some(num_predict)).await { Ok(mut rx) => { while let Some(res) = rx.next().await { match res { @@ -43,7 +45,7 @@ async fn handle_binary( images: &mut Vec>, bin: bytes::Bytes, session: &mut actix_ws::Session, - last_prompt: &Option, + last_prompt: &Option<(String, i32)>, model: String, ollama_host: String, image_batch_size: usize, @@ -51,9 +53,9 @@ async fn handle_binary( images.push(bin.to_vec()); if images.len() >= image_batch_size { - if let Some(prompt) = last_prompt.clone() { + if let Some((prompt, num_predict)) = last_prompt.clone() { let images_batch = std::mem::take(images); - proxy_ollama_response(session, images_batch, prompt, model, ollama_host).await; + proxy_ollama_response(session, images_batch, prompt, model, ollama_host, num_predict).await; } else { let _ = session.text("No prompt received for image batch".to_string()).await; } @@ -64,16 +66,24 @@ async fn handle_text( session: &mut actix_ws::Session, images: &mut Vec>, text: String, - last_prompt: &mut Option, + last_prompt: &mut Option<(String, i32)>, model: String, ollama_host: String, ) { - *last_prompt = Some(text.clone()); + let (prompt, num_predict) = if let Ok(value) = serde_json::from_str::(&text) { + let prompt = value["prompt"].as_str().unwrap_or(&text).to_string(); + let num_predict = value["num_predict"].as_i64().map(|n| n as i32).unwrap_or(16); + (prompt, num_predict) + } else { + (text.clone(), 16) + }; + + *last_prompt = Some((prompt.clone(), num_predict)); // If prompt updated, send the images to Ollama if !images.is_empty() { let images_batch = std::mem::take(images); - proxy_ollama_response(session, images_batch, text, model, ollama_host).await; + proxy_ollama_response(session, images_batch, prompt, model, ollama_host, num_predict).await; } } @@ -92,7 +102,7 @@ pub async fn ws_index(req: HttpRequest, stream: web::Payload, vlm_config: web::D rt::spawn(async move { let mut images: Vec> = Vec::new(); - let mut last_prompt: Option = None; + let mut last_prompt: Option<(String, i32)> = None; let mut inference_interval = time::interval_at(Instant::now() + Duration::from_secs(30), Duration::from_secs(10)); inference_interval.set_missed_tick_behavior(time::MissedTickBehavior::Skip); @@ -141,9 +151,11 @@ pub async fn ws_index(req: HttpRequest, stream: web::Payload, vlm_config: web::D tracing::error!("Error sending ping: {:?}", e); break; } - if let Some(prompt) = last_prompt.clone() { - tracing::info!("Inference interval fired: running handle_text with last prompt"); - handle_text(&mut session, &mut images, prompt.clone(), &mut last_prompt, model.clone(), ollama_host.clone()).await; + if let Some((prompt, num_predict)) = last_prompt.clone() { + if !images.is_empty() { + let images_batch = std::mem::take(&mut images); + proxy_ollama_response(&mut session, images_batch, prompt, model.clone(), ollama_host.clone(), num_predict).await; + } } } } diff --git a/worker/__pycache__/vlm.cpython-311.pyc b/worker/__pycache__/vlm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7d36f514592c24de4d6383a57a90bc875da3f52 GIT binary patch literal 10839 zcmdT~Yitu)mM+^>cDd}tj_te%Bt>`{NJ7Yq4kVB_frMs4pveO~CRIrs96McANx;~x zOshwmK}elONMu=N+Ot|QL9kLZ>eldMS7Nv6Zf3Q!Q-un(RoX~ME3|6uu9V*HmRha- zvFBFB;$A=brOj|0*Yk!r=P1K4pBR1jGIbMUt0!E%5bs5V(Ue z7-I-xgWB6LXi($EL8JOM4Vu(9K8VBH7$hdmgXT%gpkOVtI@WsEhG1xvZOUu%1zzalS_k<}-PaUck737BX(2MU3Gr=FR^SHGbM_Qu0Ov zp#VSbo8rPFEYAl*V+uLy4+MP^;meABBs@74WCa$*yat6DV+CI@JT}I15Hq(018hi8 zOf$hr#r9@TPfKr$@5GyD`y&P8f-u!sRTT`6_=DqNUTCbYuC1wsIyj1r=xl*Oknot` z`5MtXm=zNslDsDLl5w3O$vpIC42&_8T`@yOeblmg#qne){T($hrmXr#LGKg8CWvI| zsFA@R5t(HTm|#!lV_4jh3a3K%A^1>_#!P}!k6>|YDx3;2W+6Wni<&a0KLS~qDX{n~ z?_p6BV=C6GwPRWR;!!M$&tP2HCi*9%*oM7W&tjXXO>JB|gHtwrplN~Vy6@UVZ?=tl zV_>X1Frg^b{7#JPjS(3w=A?2{A;zw?$mC?qLY6oO43Bcxt%Yr=Rwi`eW*phGHGnbJ z81W8)#;Ap64YSx)(*QPW@H!(i6b)BLh@a+Ip7zs|VTKLT0SL~3So$vq*#q>{bT9~# zN(TfwFiMA5B)NX>8XX7;VYbZA zh#8hzoF5o(+CT>x6hiId0p>L-#>#_S9@OEHnA7z4IkREz3d>c2YN?#M_C0kp2qQq+ zs5!EaRe9r6EBkb=R325e4c`obWLY!{ek+hUOf2cv?xpF)j`*H?r}c!dAs>qr_o{sM z2RYWyTuU*#kv<*SqUD9c0zDd@4l()!G}4h0Ef38I%h3Gv2%3vgh+Tusf=-pgG|UPr zpz*AcRtT2k!rTMk0DD#7{9huC<8>;=Ft1qH&`f{}hZGa&S_Pl-3*!nt5e|eDa?*c= zWda0DCSKs68uX@EJFbqfQ$ipd;tHVy$hyWWILxKs zH7k_HzL0;CRY)}9K4^L^l}BL??No|W=dZ?ZUNNvd+C$MIrD@KC3KeWTBU}|^B0=CU z!UxV_t5gBudF9T(-FT<@R&#vc(x~LFlihXm-x`kZT{=Du&cVG})j|FULj=E0FiM#r%q8O3H71((t=B@yw5Y+xhpMV(Zy?yF~WOWWPxEuaLHz zj{Ui@F2Kcsw$KeSPtriXknbCM?noDK}f1fKQek6>GCQh7gd4K)U;4FH69kiK8d3YisI!_0>uPHcCete?rpC(Yie|ce ztzbF57xZsH*pvDmulkuB(toET=l2fzD@I!?IuBO7aq;@$SVelYHdaxa9gn$hbm`apCEy2e?A1 zy!9d8s+GVxT;mp~Z@(}y&Z8Mm>4MxTKgY8^tyX~HjzDfDJp2V9pabj}kvrFoehEk3 z&Dk#;WzQXDaY}ORmmT}(PQ0?`-t110{sig#iZJDvzrldab*z#YnX`%+h|+|^^ZxZ4 z*B7gQ_Rh_BL~>igmNS2dD}cuTe=cu72D&kGg`*`Y1SVNtfQ7iZHQvt!z*vB72CR7o z)_HP)2Lo4Fy4-hNZ!YF5r$G`jk}Avp*99RZ09g`Jp1$f!eS43cIV$kY!wjr>AJF=k zcki)AUv#)Ky3?x|1)i&hndS~5(f}lD@#Uv4KK+}yW+-_Kl|Bn(lWR~cXn0&C*;{3M z>s&{|Vn>T_dvfvBN)B3f&>~5vmft_${Zn-P_Sl!m`BZEIJ2}O2egVul#TJ^L1P=># z?-5}nUHc&iK-Izrfg*1-M!;jvaA9q=#5Ym}dm$ zjhG>0WYpM>UHm9!-2@M*CmhPr2bWV;XZ_fVh1 z7!@TMyMXMuwJU1f;Olc#9@r5TQieD5`NL(S)Pz1~Y74oDMj$OlZh{|<41QDV`^ro) zU^hSBM61s1T1;M)3?LN4by~5W@9FjRzS-W<6CuGUp7A#}T#n?OQ;h(~N?P0zm*)GV z31w3x4@?J!P7}?hNLk->D5QE;p;4A&LnB}YXj|C`82&<2Bxfy(_CvlY;tntebUkx` zodGz6inz|`Sc+DGYq%_;W6HutL^K^E0%M2UjzZ)Z z8$v3IRfy4G*e|G?wW=)@5}ePeaENEUHiZHw4BKF&?QNWIn82nn#htO^1Bo&uDYNorf~L@bD6d{2Z2W z70jJVI6ZUS-q{pkXM^l)nCpIpQ-5;m-BY4-M?4@otHeW{lC$%7 z?q@?%_ki3zAUOxb;jrWkzu=}t$EjbB0IhBo6j+6!WNZ77XsM@c}T7*ObrC|P_% z!gtB|E)n0Q_f!3G+h3a=pOOxr08c}z?Url1C1>}uHznuLi%Wj-@}zVrBwq?i&X9zM zWjrk6;Y9AvxvrN5+gC9=QKu5*@+94vC2%+DB(h#6>qWADCD-$Q_(nM4DoMDvB|N1I zM_-lgj1Ng=)pA*NqHNzPVam^4#SpP#j?%9%(m{QL$h>LQg;|{oH9s$1qNL&)xwuAh z*DN!VyYcZ^$$k7q+XZo8L~3K?Hb`T{u`81MiWquZa=$GKv+(KDh8DyJPYdEeyRlo1 zsMOkRoVTO~4E`7Y;P9ivlBY%@Yh|)lRBxR_yCi(KjPDll-K%EI>R7O>nv8Zc@`=ov z^`@&hm}j&#sN3Q%V7~7Fi0~706kL=m$T-Hht?Aoi97?Re02PGv)CPe7+Ot^H5H%v4 z?4Bux1J0PX5l^7r)C=KcD$)lj5B1a-5hc=G8}k)4hx<{+NYdI%wgwxlC1V>*YbA4p zs=6f+g`RYW3dJ+a!M2*0*19#l^q*lr&-(6nGtki2;Eh>|TH_U?B#KAwdiwwJqWG>^GCP_0J9mOV;?vIXk$@v2<|w90P?{n zcz`)9NPU8C`|=s2Ms}tE6L3lxdRA+*kp|TwO!Gd3T{T50@b5J3)D$UtU5!X_azN0M zkb%s2DOE8lmRUcCv^~;xZ%>204e-bm;M1xmOLnJ?8L3YN7Z6;t2ZE|j^;#6$+5mmi zT#!Q^PUjId0HFdh*R@KKgmcARa=R&BvfS{b zRdjbr?k?HgHBTmNZrN5AuX%3UE!uV?1f=Iik63){(;ZL7<<3E=bx3XnpAjack%D)sJHEf?INR$*wNZ-t|fcQzUz>Y_AoQ_jfA@ z05K5G6iMVd<}E6SVS;o7Vj#HoQEN`?5$tb|P;GY8Cxo%hV*12FKsdv@Lz0Fh@az9j z^}`qd7mNXbh%{Z0f-$lI3o!5gAqG&y#^&^WXf_S6$6o_(7^8S5{);pO_sb~p8gL`a zxC2=-F(B#jG$R-~Ub`~K@Ka;Tf{R(wHBL%T#L+3rx(jq!)?pHuacQMCQ9t{1#0sXX zX-)g(XYgA$bfeZNnaLXiXx_MbB$w8?8ibNrdiv(?p=_EvlB$u(71Sn}fH<6CMR4R^ zMhKwn7(}K*V-e?YI3MN&FQr&jWJtwI&!BSDj6WD)e4rzE?hQ3TZQX||bB$WV zR|Pm!3$p$Y%c$w87_umpoG=#{3xxbZ6>#AZ&(`z>yfo9uhFpBjvf?=o3cLpo|1Ut0 zVdMeBC~x1~$yc6&+b0*VNgl84@vdS{qCzDg`uW@LT)K5h%HJjD@0zDH1Ij9HsaT3g z&PLhUI8P*OAf+~IMZ#5hXaBAJi~bKPZ&%K@y)4-g?}?xL)xe(*{Br0ohT=VPN%efs zO3Ai+4Idu8dvwXVd{inqE|(mi?}1jgmEG(5u;*^iQt|Q$X={_bwQ0UL;V4*8HCGPkF_dLfv5?&zV z1tMPX(uU}IlR@{S&wz3OlK~V$n%Lm!OEudQgkvFnqet6s=8|NLYi7q0AYaE4&a_l)4#>tX^up;1~=ql}vGL#rvHL)(bKP)QSqD88}EU`PSs^=sFJ zt`1&P+R9LwBAOM;5K2%w=uBtrdhG~of7A%bDD9+@jaSdl2 zX)yv66#2+`A6CIbv`wg2%o_SfFlZ1;xORa-^ZOW;F{5cmxye#szU5`kk$`~HwJVxs zI595p*WH<41#{^6!8-qiRRJp^?WhsV)%>*Y&sb5g#h5ek0|OT7UdM@}o8m;lf0q+* z@QKB!cc>7ly;DYHG6xyy*?9ue!AXv1ON=#JVm8m4sFlIi@rGZ=o6-Lp-Y|v#)2rkM zdAdEx(e+wr-TH+&@fMv{2z9qpx6w$+>9B@upmP;C5CxC~9g?7}uqm(r$|K^)DuL{M zWV|aDz_ocetx=5N6STlE6bFKE`a62hbo90KpX=-35LBxoC0f?$bG^PZeI4!HZT+h2 zz`cp`I}zys67h7P!&Ex;2NoLrS;eTC;|M6@kX`vDgF=2(A?;K)-O1pFAk&+t?y1}< zRC)&@!-ycw&mjatgH_bi6NIuTCOC3e;R($xP`lfO%8ej`HVF=CAwW^!^Fwf|3uEK1 zK#D?XfR1|XYF63DbE+9(0)r%ls&M_N4b^)<+d#@1SThGqfRu1Qft+8!!+!%x6wH7e z=R)Dl;knKg*S5ugLLvcK%i1Mc+B`{997r1-b8n zSTg`OI&>YB$w84E1O}`WZdt7UVCMGBX4tT5G+F(I6?@TQK|Dt;u3R=Pw|``PT>Hc= z**j!=hp67)tvHIn|ACU}RCm9spR_&0pLV~vFeDCNlrCJ7FI*De@WDObBiVl}+kY&s zyVu+&Xc0m$ku#Y5sX2x4@eufS3q}+yFpF(*{9bo_R>G@fyh_BYR&guL8|hg%zt}71 z?^|kH#vgSr&pyE=vO^|2MD^BKX+c~}5}lP5PfSnRpITq^za?H6kopJZ{z0*B2yVp6 zVVN8j$>BeimFkMh_zDN~i@w5z5&9C8@a;0bT}P(i7XTXWOWg zIi_cOT1rlmroSTzh=4*-_0nz!-;|J{5uG5Fntb8Kg}#<$^%k6cVdxo$DwKp`e;xL1w=y^qdQYkX@rX2j7!N z0cV{Kvd!FkkOqnvy|n5ORHnPV4?5!Ao4%ry<*KNDZfX~?-8xNlC@X1_Rg7C z@e%{T1UhNN43sw1Ror0!gi|Nwb`;gg*6jv>J%BtR)R#DTQGI8VPJ_z;&W%p?78wdx RF`d-zHxs(Y{{)4rbMOEF literal 0 HcmV?d00001 diff --git a/worker/vlm.py b/worker/vlm.py index 33a666a..e2d022a 100644 --- a/worker/vlm.py +++ b/worker/vlm.py @@ -74,18 +74,25 @@ def parse_image_timestamp(image_path): return match.group('ts') return "" -def run_inference(vlm_prompt, prompt, image_paths): +def run_inference(vlm_prompt, prompt, image_paths, temperature: float = 0.2, num_predict: int = 16): start_image = None end_image = None # Get model names from environment variables vlm_model = os.environ.get("VLM_MODEL", "llava:7b") +<<<<<<< Updated upstream llm_model = os.environ.get("LLM_MODEL", "llama3:latest") logger.info("Using VLM model: " + vlm_model) logger.info("Using LLM model: " + llm_model) # Ensure models are available +======= + + logger.info("Using VLM model: " + vlm_model + " with temperature=" + str(temperature) + " and num_predict=" + str(num_predict)) + + # Ensure VLM model is available +>>>>>>> Stashed changes ensure_model_available(vlm_model) ensure_model_available(llm_model) @@ -98,6 +105,13 @@ def run_inference(vlm_prompt, prompt, image_paths): model=vlm_model, prompt=vlm_prompt, images=[image_path], +<<<<<<< Updated upstream +======= + options={ + "temperature": float(temperature), + "num_predict": num_predict + }, +>>>>>>> Stashed changes ) results += '"' + parse_image_id(image_path) + '",' + '"' + parse_image_timestamp(image_path) + '",' + '"' + res.response + '"\n' logger.info("Inference output: " + str(res)) @@ -183,7 +197,12 @@ def run(conn, job, input_dir, output_dir): return try: +<<<<<<< Updated upstream results = run_inference(inputs['vlm_prompt'], inputs['prompt'], image_paths) +======= + temperature = float(inputs.get('temperature', os.environ.get("VLM_TEMPERATURE", 0.2))) + num_predict = int(inputs.get('num_predict', os.environ.get("VLM_NUM_PREDICT", 16))) + results = run_inference(inputs['vlm_prompt'], inputs['prompt'], image_paths, temperature=temperature, num_predict=num_predict) except Exception as e: logger.error("Error processing job", extra={"job_id": job['id'], "error": str(e)}) err = {