fix(wikipedia-streaming): convert to streaming-compatible responses

This commit is contained in:
Eric Z 2024-12-29 05:31:36 -06:00
parent 5a6e476fd9
commit 542d143a88

View File

@ -79,7 +79,10 @@ class Pipeline:
messages: List[dict],
body: dict
) -> Union[str, Generator, Iterator]:
# This is where you can add your custom pipelines like RAG.
"""
Main pipeline function. Performs wikipedia article lookup by query
and returns the summary of the first article.
"""
logger.debug(f"pipe:{self.name}")
# Check if title generation is requested
@ -101,34 +104,64 @@ class Pipeline:
# 'user': {'name': 'User', 'id': '235a828f-84a3-44a0-b7af-721ee8be6571',
# 'email': 'admin@localhost', 'role': 'admin'}}
re_query = re.compile(r"[^0-9A-Z]", re.IGNORECASE)
re_rough_word = re.compile(r"[\w]+", re.IGNORECASE)
topics = []
dt_start = datetime.now()
multi_part = False
streaming = body.get("stream", False)
logger.warning(f"Stream: {streaming}")
context = ""
# examples from https://pypi.org/project/wikipedia/
# new addition - ability to include multiple topics with a semicolon
for query in user_message.split(';'):
self.rate_check(dt_start)
query = query.strip()
if multi_part:
if streaming:
yield "---\n"
else:
context += "---\n"
if body.get("stream", True):
yield from self.stream_retrieve(query, dt_start)
else:
for chunk in self.stream_retrieve(query, dt_start):
context += chunk
multi_part = True
if not streaming:
return context if context else "No information found"
def stream_retrieve(
self, query:str, dt_start: datetime,
) -> Generator:
"""
Retrieve the wikipedia page for the query and return the summary. Return a generator
for streaming responses but can also be iterated for a single response.
"""
re_query = re.compile(r"[^0-9A-Z]", re.IGNORECASE)
re_rough_word = re.compile(r"[\w]+", re.IGNORECASE)
titles_found = None
try:
titles_found = wikipedia.search(query)
# r = requests.get(
# f"https://en.wikipedia.org/w/api.php?action=opensearch&search={query}&limit=1&namespace=0&format=json"
# )
logger.info(f"Query: {query}, Found: {titles_found}")
topics.append((query, titles_found))
except Exception as e:
logger.error(f"Search Error: {query} -> {e}")
return f"Page Search Error: {query}"
yield f"Page Search Error: {query}"
if titles_found is None or not titles_found: # no results
yield f"No information found for '{query}'"
return
context = ""
for query, titles_found in topics:
self.rate_check(dt_start)
if context: # add separator if multiple topics
context += "---\n"
# if context: # add separator if multiple topics
# context += "---\n"
try:
title_check = titles_found[0]
wiki_page = wikipedia.page(title_check, auto_suggest=False) # trick! don't auto-suggest
@ -136,14 +169,14 @@ class Pipeline:
str_error = str(e).replace("\n", ", ")
str_error = f"## Disambiguation Error ({query})\n* Status: {str_error}"
logger.error(str_error)
context += str_error + "\n"
continue
yield str_error + "\n"
return
except wikipedia.exceptions.RedirectError as e:
str_error = str(e).replace("\n", ", ")
str_error = f"## Redirect Error ({query})\n* Status: {str_error}"
logger.error(str_error)
context += str_error + "\n"
continue
yield str_error + "\n"
return
except Exception as e:
if titles_found:
str_error = f"## Page Retrieve Error ({query})\n* Found Topics (matched '{title_check}') {titles_found}"
@ -151,36 +184,35 @@ class Pipeline:
else:
str_error = f"## Page Not Found ({query})\n* Unknown error"
logger.error(f"{str_error} -> {e}")
context += str_error + "\n"
continue
yield str_error + "\n"
return
# found a page / section
logger.info(f"Page Sections[{query}]: {wiki_page.sections}")
context += f"## {title_check}\n"
yield f"## {title_check}\n"
# flatten internal links
# link_md = [f"[{x}]({self.valves.WIKIPEDIA_ROOT}/{re_query.sub('_', x)})" for x in wiki_page.links[:10]]
# context += "* Links (first 30): " + ",".join(link_md) + "\n"
# yield "* Links (first 30): " + ",".join(link_md) + "\n"
# add the textual summary
summary_full = wiki_page.summary
word_positions = [x.start() for x in re_rough_word.finditer(summary_full)]
if len(word_positions) > self.valves.WORD_LIMIT:
context += summary_full[:word_positions[self.valves.WORD_LIMIT]] + "...\n"
yield summary_full[:word_positions[self.valves.WORD_LIMIT]] + "...\n"
else:
context += summary_full + "\n"
yield summary_full + "\n"
# the more you know! link to further reading
context += "### Learn More" + "\n"
context += f"* [Read more on Wikipedia...]({wiki_page.url})\n"
yield "### Learn More" + "\n"
yield f"* [Read more on Wikipedia...]({wiki_page.url})\n"
# also spit out the related topics from search
link_md = [f"[{x}]({self.valves.WIKIPEDIA_ROOT}/{re_query.sub('_', x)})" for x in titles_found]
context += f"* Related topics: {', '.join(link_md)}\n"
yield f"* Related topics: {', '.join(link_md)}\n"
# throw in the first image for good measure
if wiki_page.images:
context += f"\n![Image: {title_check}]({wiki_page.images[0]})\n"
yield f"\n![Image: {title_check}]({wiki_page.images[0]})\n"
# done with querying for different pages
return context if context else "No information found"
return