WeamonZW
Convex Community4mo ago
4 replies
WeamonZ

Using image with agents

I'm trying to allow my agent to use an image URL I send him and call a tool to describe the image(s).
The problem is that by providing it in my content such as : {type: "image", [...]} my agent can't retrieve the URL anymore. So in the following calls, it can't perform actions on my image.

I found a solution for it to work but it's to add an other message in my content with the image url (but it's ugly AND, how am I supposed to hide it from the user ?).
That way my agent will have the url for further tool called.

export const unauthed = mutation({
  args: {
    threadId: v.string(),
    prompt: v.string(),
    imageUrls: v.array(v.string())
  },
  handler: async (ctx, { threadId, prompt, imageUrls }) => {
    // Build the message content with text and images
    const content: Array<ModelMessage> = [
      ...imageUrls.map((url) => [{
        type: "image" as const,
        image: new URL(url),
        mediaType: "image/png",
      },
      {
        type: "text" as const,
        text: `Image url: "${new URL(url)}".`
      }]).flat(),
      { type: "text", text: prompt }
    ]

    const { messageId } = await saveMessage(ctx, components.agent, {
      threadId, message: { role: "user", content }
    });

    await ctx.scheduler.runAfter(0, internal.llm.messages.text._internal, {
      promptMessageId: messageId,
      threadId,
    });
  },
});


export const _internal = internalAction({
  args: { threadId: v.string(), promptMessageId: v.string() },
  handler: async (ctx, { threadId, promptMessageId }) => {
    console.log("Basic Agent called");
    const { thread } = await basicAgent.continueThread(ctx, { threadId });
    await thread.streamText({ promptMessageId }, { saveStreamDeltas: { chunking: "line", throttleMs: 1000 } });
  },
});
Was this page helpful?