本文主要是介绍LLaVA: Large Language and Vision Assistant 图片解析 图生文,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
LLaVA: Large Language and Vision Assistant 图片解析 图生文
目录
介绍
效果
编辑项目
测试代码
Form1.cs
Helper.cs
下载
介绍
LLaVA,一种新的大型多模态模型,称为“大型语言和视觉助手”,旨在开发一种通用视觉助手,可以遵循语言和图像指令来完成各种现实世界的任务。 这个想法是将 GPT-4 等大型语言模型 (LLM) 的强大功能与 CLIP 等视觉编码器相结合,创建一个经过端到端训练的神经助手,可以理解多模态指令并根据多模态指令采取行动。
项目地址:https://github.com/IntptrMax/LLavaSharp
模型下载地址:https://hf-mirror.com/jartine/llava-v1.5-7B-GGUF/tree/main
效果
项目
测试代码
Form1.cs
using LLavaSharp;
using System;
using System.Diagnostics;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
namespace WinformTest
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
/*
项目地址:https://github.com/IntptrMax/LLavaSharp
模型下载地址:https://hf-mirror.com/jartine/llava-v1.5-7B-GGUF/tree/main
*/
IntPtr llamaDllPtr = Lib.LoadLibrary(@".\dll\cuda12\llama.dll");
IntPtr llavaSharedDllPtr = Lib.LoadLibrary(@".\dll\cuda12\llava_shared.dll");
string model = @"C:\MyStudy\llava\llava-v1.5-7b-Q4_K.gguf";
string mmproj = @"C:\MyStudy\llava\llava-v1.5-7b-mmproj-Q4_0.gguf";
Helper helper;
private void Form1_Load(object sender, EventArgs e)
{
helper = new Helper(model, mmproj);
}
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path = "";
Bitmap bitmap;
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
image_path = ofd.FileName;
bitmap = new Bitmap(image_path);
pictureBox1.Image = bitmap;
txtInfo.Text = "";
}
StringBuilder sb = new StringBuilder();
private void button2_Click(object sender, EventArgs e)
{
if (image_path == "")
{
return;
}
if (String.IsNullOrEmpty(txtPrompt.Text))
{
return;
}
txtInfo.Text = "";
button2.Enabled = false;
sb.Clear();
System.Windows.Forms.Application.DoEvents();
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
string result = helper.ProcessImage(bitmap, txtPrompt.Text);
double totalTime = stopwatch.Elapsed.TotalSeconds;
stopwatch.Stop();
sb.AppendLine($"totalTime: {totalTime:F2}s");
sb.AppendLine("- - - - - - - - - - - - - - - - ");
sb.AppendLine(result);
txtInfo.Text = sb.ToString();
button2.Enabled = true;
}
}
}
using LLavaSharp;
using System;
using System.Diagnostics;
using System.Drawing;
using System.Text;
using System.Windows.Forms;namespace WinformTest
{public partial class Form1 : Form{public Form1(){InitializeComponent();}/*项目地址:https://github.com/IntptrMax/LLavaSharp模型下载地址:https://hf-mirror.com/jartine/llava-v1.5-7B-GGUF/tree/main */IntPtr llamaDllPtr = Lib.LoadLibrary(@".\dll\cuda12\llama.dll");IntPtr llavaSharedDllPtr = Lib.LoadLibrary(@".\dll\cuda12\llava_shared.dll");string model = @"C:\MyStudy\llava\llava-v1.5-7b-Q4_K.gguf";string mmproj = @"C:\MyStudy\llava\llava-v1.5-7b-mmproj-Q4_0.gguf";Helper helper;private void Form1_Load(object sender, EventArgs e){helper = new Helper(model, mmproj);}string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";string image_path = "";Bitmap bitmap;private void button1_Click(object sender, EventArgs e){OpenFileDialog ofd = new OpenFileDialog();ofd.Filter = fileFilter;if (ofd.ShowDialog() != DialogResult.OK) return;pictureBox1.Image = null;image_path = ofd.FileName;bitmap = new Bitmap(image_path);pictureBox1.Image = bitmap;txtInfo.Text = "";}StringBuilder sb = new StringBuilder();private void button2_Click(object sender, EventArgs e){if (image_path == ""){return;}if (String.IsNullOrEmpty(txtPrompt.Text)){return;}txtInfo.Text = "";button2.Enabled = false;sb.Clear();System.Windows.Forms.Application.DoEvents();Stopwatch stopwatch = new Stopwatch();stopwatch.Start();string result = helper.ProcessImage(bitmap, txtPrompt.Text);double totalTime = stopwatch.Elapsed.TotalSeconds;stopwatch.Stop();sb.AppendLine($"totalTime: {totalTime:F2}s");sb.AppendLine("- - - - - - - - - - - - - - - - ");sb.AppendLine(result);txtInfo.Text = sb.ToString();button2.Enabled = true;}}
}
Helper.cs
using System;
using System.Drawing;
namespace LLavaSharp
{
public class Helper : IDisposable
{
private llava_context ctx_llava;
gpt_params @params = new gpt_params();
public Helper(string model_path, string mmproj_path, int ngl = 32)
{
@params.model = model_path;
@params.mmproj = mmproj_path;
@params.n_gpu_layers = ngl;
@params.n_gpu_layers_draft = ngl;
ctx_llava = Lib.llava_init(@params);
}
public string ProcessImage(Bitmap bitmap, string prompt, float temp = 0)
{
llava_image_embed image_embed = Lib.load_image(ctx_llava, bitmap, @params.n_threads);
string result = Lib.process_prompt(ctx_llava, image_embed, @params, prompt, temp);
Lib.llava_image_embed_free(image_embed);
Lib.llama_free_kv_cache(ctx_llava.ctx_llama);
GC.Collect();
return result;
}
public void Dispose()
{
Lib.llava_free(ctx_llava);
}
}
}
using System;
using System.Drawing;namespace LLavaSharp
{public class Helper : IDisposable{private llava_context ctx_llava;gpt_params @params = new gpt_params();public Helper(string model_path, string mmproj_path, int ngl = 32){@params.model = model_path;@params.mmproj = mmproj_path;@params.n_gpu_layers = ngl;@params.n_gpu_layers_draft = ngl;ctx_llava = Lib.llava_init(@params);}public string ProcessImage(Bitmap bitmap, string prompt, float temp = 0){llava_image_embed image_embed = Lib.load_image(ctx_llava, bitmap, @params.n_threads);string result = Lib.process_prompt(ctx_llava, image_embed, @params, prompt, temp);Lib.llava_image_embed_free(image_embed);Lib.llama_free_kv_cache(ctx_llava.ctx_llama);GC.Collect();return result;}public void Dispose(){Lib.llava_free(ctx_llava);}}
}
下载
源码下载
模型下载地址:https://hf-mirror.com/jartine/llava-v1.5-7B-GGUF/tree/main
这篇关于LLaVA: Large Language and Vision Assistant 图片解析 图生文的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!