Merge pull request #37 from x-isopod-x/dev/isopod/summarizer

feat: Summarizer FastAPI tool
This commit is contained in:
Tim Jaeryang Baek 2025-04-28 13:06:34 -07:00 committed by GitHub
commit 2b7844d634
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 189 additions and 0 deletions

View File

@ -0,0 +1,37 @@
# 📚 Local Summarizer Agent
This FastAPI server acts to summarize a given chunk of text.
It is assumed that you are running an ollama instance in an adjacent container with the default port available.
## 📦 Endpoints
### POST /summarize/text
Summarizes the given block of text
📥 Request
Body:
```
{
'text':'Your blob of text here. It can be unlimited, but is recommended to be within the context window of the LLM you are asking for a summary from.'
}
```
📤 Response:
```
{
"status": "success",
"summary": "A summary of your text."
}
```
### POST /summarize/chat
Not yet implemented. Summarizes an exported Open WebUI chat JSON blob.
## 🧩 Environment Variables
|Name|Description|Default|
|---|---|---|
|MODEL|The name of the model you are trying to reference. Should match the model in your ollama instance. | llama3|
|MODEL_URL|The URL path to the model you are trying to access.|http://host.docker.internal:11434|

View File

View File

@ -0,0 +1,22 @@
services:
summarizer:
container_name: summarizer
image: python:3-slim
ports:
- 16000:8000
restart: unless-stopped
environment:
- MODEL=llama3
- MODEL_URL=http://host.docker.internal:11434
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- .:/app
entrypoint: >
sh -c "
apt update &&
apt install -y git &&
cd /app &&
pip install -r ./requirements.txt &&
fastapi run
"

View File

@ -0,0 +1,28 @@
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from .summarizers.text_summarizer import TextSummarizer
app = FastAPI(
title="Summarizing Server",
version="1.0.0",
description="Leverages an LLM to summarize data",
)
summarizers = {
'TEXT':TextSummarizer()
}
class TextRequest(BaseModel):
text: str
@app.post("/summarize/text")
def summarize_text(data: TextRequest):
try:
result = summarizers['TEXT'].summarize(data.text)
if 'content' in result:
return {"status": "success", "summary":result['content']}
else:
raise HTTPException(status_code=500, detail=str(result['error']))
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View File

@ -0,0 +1,38 @@
annotated-types==0.7.0
anyio==4.9.0
certifi==2025.1.31
charset-normalizer==3.4.1
click==8.1.8
dnspython==2.7.0
email_validator==2.2.0
fastapi==0.115.12
fastapi-cli==0.0.7
h11==0.14.0
httpcore==1.0.8
httptools==0.6.4
httpx==0.28.1
idna==3.10
Jinja2==3.1.6
markdown-it-py==3.0.0
MarkupSafe==3.0.2
mdurl==0.1.2
pydantic==2.11.3
pydantic_core==2.33.1
Pygments==2.19.1
python-dotenv==1.1.0
python-multipart==0.0.20
PyYAML==6.0.2
requests==2.32.3
rich==14.0.0
rich-toolkit==0.14.1
shellingham==1.5.4
sniffio==1.3.1
starlette==0.46.2
typer==0.15.2
typing-inspection==0.4.0
typing_extensions==4.13.2
urllib3==2.4.0
uvicorn==0.34.1
uvloop==0.21.0
watchfiles==1.0.5
websockets==15.0.1

View File

@ -0,0 +1,7 @@
from abc import ABC, abstractmethod
class BaseSummarizer(ABC):
@abstractmethod
def summarize(self, data: str) -> dict:
"""Summarize data"""
pass

View File

@ -0,0 +1,57 @@
import requests
from .base import BaseSummarizer
import os
MODEL_URL=os.environ.get('MODEL_URL')
MODEL=os.environ.get('MODEL')
SUMMARIZE_PROMPT = """You are the summarizing agent in a long chain of agents.
It is your job to responsibly capture the entirety of what is being described in incoming documents.
You can scrap small details, but you must make sure to hit all the major points.
These documents will be used in RAG down the line.
For example, given the following text:
"I've got updates on the tiny brains if\nyou are not familiar with brain\norganoids they are tiny human brains\nthat we can grow from stem cells you can\ngrow them in a literal jar if you want\nto but you can also hook them up to a\ncomputer or llm since a company called\nfinal spark decided to release brain\norganoid computation for industrial use\n"
You would respond with
"The speaker is discussing human brain stem cells being grown for industrial use."
Another example:
hi, i'\''m isopod (formerly hornet)\n \ni'\''m a software engineer\n \ni write code, make costumes, and write music
You would respond with
Isopod, formerly hornet, is a software engineer who makes costumes and writes music.
You always sanitize data. You always remove \n. You never mention yourself in your summaries. You never infer, only summarize what is presented. You never describe the text as summarized: you always just give the summary.
"""
class TextSummarizer(BaseSummarizer):
def summarize(self, data):
payload = {
"model":MODEL,
"system": SUMMARIZE_PROMPT,
"prompt":data,
"stream":False,
"options":{
"temperature":0.5
}
}
url = MODEL_URL + '/api/generate'
result = requests.post(url=url, json=payload)
if result.status_code == 200:
json_data = result.json()
if 'response' in json_data:
return {
'type': 'text',
'source': url,
'content': json_data['response']
}
print(result.content)
return {
'type': 'text',
'source': url,
'error': result.status_code
}