mirror of
https://github.com/open-webui/pipelines
synced 2025-05-10 23:50:45 +00:00
62 lines
1.9 KiB
Python
62 lines
1.9 KiB
Python
"""
|
|
title: Llama C++ Pipeline
|
|
author: open-webui
|
|
date: 2024-05-30
|
|
version: 1.0
|
|
license: MIT
|
|
description: A pipeline for generating responses using the Llama C++ library.
|
|
requirements: llama-cpp-python
|
|
"""
|
|
|
|
from typing import List, Union, Generator, Iterator
|
|
from schemas import OpenAIChatMessage
|
|
|
|
|
|
class Pipeline:
|
|
def __init__(self):
|
|
# Optionally, you can set the id and name of the pipeline.
|
|
# Assign a unique identifier to the pipeline.
|
|
# The identifier must be unique across all pipelines.
|
|
# The identifier must be an alphanumeric string that can include underscores or hyphens. It cannot contain spaces, special characters, slashes, or backslashes.
|
|
self.id = "llama_cpp_pipeline"
|
|
|
|
self.name = "Llama C++ Pipeline"
|
|
self.llm = None
|
|
pass
|
|
|
|
async def on_startup(self):
|
|
# This function is called when the server is started.
|
|
print(f"on_startup:{__name__}")
|
|
from llama_cpp import Llama
|
|
|
|
self.llm = Llama(
|
|
model_path="./models/llama3.gguf",
|
|
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
|
|
# seed=1337, # Uncomment to set a specific seed
|
|
# n_ctx=2048, # Uncomment to increase the context window
|
|
)
|
|
|
|
pass
|
|
|
|
async def on_shutdown(self):
|
|
# This function is called when the server is stopped.
|
|
print(f"on_shutdown:{__name__}")
|
|
pass
|
|
|
|
def pipe(
|
|
self, user_message: str, model_id: str, messages: List[dict], body: dict
|
|
) -> Union[str, Generator, Iterator]:
|
|
# This is where you can add your custom pipelines like RAG.
|
|
print(f"pipe:{__name__}")
|
|
|
|
print(messages)
|
|
print(user_message)
|
|
print(body)
|
|
|
|
response = self.llm.create_chat_completion_openai_v1(
|
|
messages=messages,
|
|
stream=body["stream"],
|
|
)
|
|
|
|
return response
|