fix(wikipedia-streaming): convert to streaming-compatible responses

This commit is contained in:
Eric Z 2024-12-29 05:31:36 -06:00
parent 5a6e476fd9
commit 542d143a88

View File

@ -79,7 +79,10 @@ class Pipeline:
messages: List[dict], messages: List[dict],
body: dict body: dict
) -> Union[str, Generator, Iterator]: ) -> Union[str, Generator, Iterator]:
# This is where you can add your custom pipelines like RAG. """
Main pipeline function. Performs wikipedia article lookup by query
and returns the summary of the first article.
"""
logger.debug(f"pipe:{self.name}") logger.debug(f"pipe:{self.name}")
# Check if title generation is requested # Check if title generation is requested
@ -101,86 +104,115 @@ class Pipeline:
# 'user': {'name': 'User', 'id': '235a828f-84a3-44a0-b7af-721ee8be6571', # 'user': {'name': 'User', 'id': '235a828f-84a3-44a0-b7af-721ee8be6571',
# 'email': 'admin@localhost', 'role': 'admin'}} # 'email': 'admin@localhost', 'role': 'admin'}}
re_query = re.compile(r"[^0-9A-Z]", re.IGNORECASE)
re_rough_word = re.compile(r"[\w]+", re.IGNORECASE)
topics = []
dt_start = datetime.now() dt_start = datetime.now()
multi_part = False
streaming = body.get("stream", False)
logger.warning(f"Stream: {streaming}")
context = ""
# examples from https://pypi.org/project/wikipedia/ # examples from https://pypi.org/project/wikipedia/
# new addition - ability to include multiple topics with a semicolon # new addition - ability to include multiple topics with a semicolon
for query in user_message.split(';'): for query in user_message.split(';'):
self.rate_check(dt_start) self.rate_check(dt_start)
query = query.strip() query = query.strip()
try:
titles_found = wikipedia.search(query)
# r = requests.get(
# f"https://en.wikipedia.org/w/api.php?action=opensearch&search={query}&limit=1&namespace=0&format=json"
# )
logger.info(f"Query: {query}, Found: {titles_found}")
topics.append((query, titles_found))
except Exception as e:
logger.error(f"Search Error: {query} -> {e}")
return f"Page Search Error: {query}"
context = "" if multi_part:
for query, titles_found in topics: if streaming:
self.rate_check(dt_start) yield "---\n"
if context: # add separator if multiple topics
context += "---\n"
try:
title_check = titles_found[0]
wiki_page = wikipedia.page(title_check, auto_suggest=False) # trick! don't auto-suggest
except wikipedia.exceptions.DisambiguationError as e:
str_error = str(e).replace("\n", ", ")
str_error = f"## Disambiguation Error ({query})\n* Status: {str_error}"
logger.error(str_error)
context += str_error + "\n"
continue
except wikipedia.exceptions.RedirectError as e:
str_error = str(e).replace("\n", ", ")
str_error = f"## Redirect Error ({query})\n* Status: {str_error}"
logger.error(str_error)
context += str_error + "\n"
continue
except Exception as e:
if titles_found:
str_error = f"## Page Retrieve Error ({query})\n* Found Topics (matched '{title_check}') {titles_found}"
logger.error(f"{str_error} -> {e}")
else: else:
str_error = f"## Page Not Found ({query})\n* Unknown error" context += "---\n"
logger.error(f"{str_error} -> {e}") if body.get("stream", True):
context += str_error + "\n" yield from self.stream_retrieve(query, dt_start)
continue
# found a page / section
logger.info(f"Page Sections[{query}]: {wiki_page.sections}")
context += f"## {title_check}\n"
# flatten internal links
# link_md = [f"[{x}]({self.valves.WIKIPEDIA_ROOT}/{re_query.sub('_', x)})" for x in wiki_page.links[:10]]
# context += "* Links (first 30): " + ",".join(link_md) + "\n"
# add the textual summary
summary_full = wiki_page.summary
word_positions = [x.start() for x in re_rough_word.finditer(summary_full)]
if len(word_positions) > self.valves.WORD_LIMIT:
context += summary_full[:word_positions[self.valves.WORD_LIMIT]] + "...\n"
else: else:
context += summary_full + "\n" for chunk in self.stream_retrieve(query, dt_start):
context += chunk
multi_part = True
# the more you know! link to further reading if not streaming:
context += "### Learn More" + "\n" return context if context else "No information found"
context += f"* [Read more on Wikipedia...]({wiki_page.url})\n"
# also spit out the related topics from search
link_md = [f"[{x}]({self.valves.WIKIPEDIA_ROOT}/{re_query.sub('_', x)})" for x in titles_found]
context += f"* Related topics: {', '.join(link_md)}\n"
# throw in the first image for good measure def stream_retrieve(
if wiki_page.images: self, query:str, dt_start: datetime,
context += f"\n![Image: {title_check}]({wiki_page.images[0]})\n" ) -> Generator:
"""
Retrieve the wikipedia page for the query and return the summary. Return a generator
for streaming responses but can also be iterated for a single response.
"""
# done with querying for different pages re_query = re.compile(r"[^0-9A-Z]", re.IGNORECASE)
return context if context else "No information found" re_rough_word = re.compile(r"[\w]+", re.IGNORECASE)
titles_found = None
try:
titles_found = wikipedia.search(query)
# r = requests.get(
# f"https://en.wikipedia.org/w/api.php?action=opensearch&search={query}&limit=1&namespace=0&format=json"
# )
logger.info(f"Query: {query}, Found: {titles_found}")
except Exception as e:
logger.error(f"Search Error: {query} -> {e}")
yield f"Page Search Error: {query}"
if titles_found is None or not titles_found: # no results
yield f"No information found for '{query}'"
return
self.rate_check(dt_start)
# if context: # add separator if multiple topics
# context += "---\n"
try:
title_check = titles_found[0]
wiki_page = wikipedia.page(title_check, auto_suggest=False) # trick! don't auto-suggest
except wikipedia.exceptions.DisambiguationError as e:
str_error = str(e).replace("\n", ", ")
str_error = f"## Disambiguation Error ({query})\n* Status: {str_error}"
logger.error(str_error)
yield str_error + "\n"
return
except wikipedia.exceptions.RedirectError as e:
str_error = str(e).replace("\n", ", ")
str_error = f"## Redirect Error ({query})\n* Status: {str_error}"
logger.error(str_error)
yield str_error + "\n"
return
except Exception as e:
if titles_found:
str_error = f"## Page Retrieve Error ({query})\n* Found Topics (matched '{title_check}') {titles_found}"
logger.error(f"{str_error} -> {e}")
else:
str_error = f"## Page Not Found ({query})\n* Unknown error"
logger.error(f"{str_error} -> {e}")
yield str_error + "\n"
return
# found a page / section
logger.info(f"Page Sections[{query}]: {wiki_page.sections}")
yield f"## {title_check}\n"
# flatten internal links
# link_md = [f"[{x}]({self.valves.WIKIPEDIA_ROOT}/{re_query.sub('_', x)})" for x in wiki_page.links[:10]]
# yield "* Links (first 30): " + ",".join(link_md) + "\n"
# add the textual summary
summary_full = wiki_page.summary
word_positions = [x.start() for x in re_rough_word.finditer(summary_full)]
if len(word_positions) > self.valves.WORD_LIMIT:
yield summary_full[:word_positions[self.valves.WORD_LIMIT]] + "...\n"
else:
yield summary_full + "\n"
# the more you know! link to further reading
yield "### Learn More" + "\n"
yield f"* [Read more on Wikipedia...]({wiki_page.url})\n"
# also spit out the related topics from search
link_md = [f"[{x}]({self.valves.WIKIPEDIA_ROOT}/{re_query.sub('_', x)})" for x in titles_found]
yield f"* Related topics: {', '.join(link_md)}\n"
# throw in the first image for good measure
if wiki_page.images:
yield f"\n![Image: {title_check}]({wiki_page.images[0]})\n"
return