pipelines/examples/wikipedia_pipeline.py
Timothy J. Baek 50020ba483 refac
2024-05-31 01:11:52 -07:00

69 lines
2.4 KiB
Python

from typing import List, Union, Generator, Iterator
from pydantic import BaseModel
from schemas import OpenAIChatMessage
import requests
import os
class Pipeline:
def __init__(self):
# Assign a unique identifier to the pipeline.
# The identifier must be unique across all pipelines.
# The identifier must be an alphanumeric string that can include underscores or hyphens. It cannot contain spaces, special characters, slashes, or backslashes.
self.id = "wiki_pipeline"
self.name = "Wikipedia Pipeline"
class Valves(BaseModel):
pass
# Initialize rate limits
self.valves = Valves(**{"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY", "")})
async def on_startup(self):
# This function is called when the server is started.
print(f"on_startup:{__name__}")
pass
async def on_shutdown(self):
# This function is called when the server is stopped.
print(f"on_shutdown:{__name__}")
pass
def pipe(
self, user_message: str, model_id: str, messages: List[dict], body: dict
) -> Union[str, Generator, Iterator]:
# This is where you can add your custom pipelines like RAG.
print(f"pipe:{__name__}")
if body.get("title", False):
print("Title Generation")
return "Wikipedia Pipeline"
else:
titles = []
for query in [user_message]:
query = query.replace(" ", "_")
r = requests.get(
f"https://en.wikipedia.org/w/api.php?action=opensearch&search={query}&limit=1&namespace=0&format=json"
)
response = r.json()
titles = titles + response[1]
print(titles)
context = None
if len(titles) > 0:
r = requests.get(
f"https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles={'|'.join(titles)}"
)
response = r.json()
# get extracts
pages = response["query"]["pages"]
for page in pages:
if context == None:
context = pages[page]["extract"] + "\n"
else:
context = context + pages[page]["extract"] + "\n"
return context if context else "No information found"