open-webui/backend/utils/misc.py

from pathlib import Path
import hashlib
import re


def get_gravatar_url(email):
    # Trim leading and trailing whitespace from
    # an email address and force all characters
    # to lower case
    address = str(email).strip().lower()

    # Create a SHA256 hash of the final string
    hash_object = hashlib.sha256(address.encode())
    hash_hex = hash_object.hexdigest()

    # Grab the actual image URL
    return f"https://www.gravatar.com/avatar/{hash_hex}?d=mp"


def calculate_sha256(file):
    sha256 = hashlib.sha256()
    # Read the file in chunks to efficiently handle large files
    for chunk in iter(lambda: file.read(8192), b""):
        sha256.update(chunk)
    return sha256.hexdigest()


def calculate_sha256_string(string):
    # Create a new SHA-256 hash object
    sha256_hash = hashlib.sha256()
    # Update the hash object with the bytes of the input string
    sha256_hash.update(string.encode("utf-8"))
    # Get the hexadecimal representation of the hash
    hashed_string = sha256_hash.hexdigest()
    return hashed_string


def validate_email_format(email: str) -> bool:
    if not re.match(r"[^@]+@[^@]+\.[^@]+", email):
        return False
    return True


def sanitize_filename(file_name):
    # Convert to lowercase
    lower_case_file_name = file_name.lower()

    # Remove special characters using regular expression
    sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name)

    # Replace spaces with dashes
    final_file_name = re.sub(r"\s+", "-", sanitized_file_name)

    return final_file_name


def extract_folders_after_data_docs(path):
    # Convert the path to a Path object if it's not already
    path = Path(path)

    # Extract parts of the path
    parts = path.parts

    # Find the index of '/data/docs' in the path
    try:
        index_data_docs = parts.index("data") + 1
        index_docs = parts.index("docs", index_data_docs) + 1
    except ValueError:
        return []

    # Exclude the filename and accumulate folder names
    tags = []

    folders = parts[index_docs:-1]
    for idx, part in enumerate(folders):
        tags.append("/".join(folders[: idx + 1]))

    return tags
feat: rag folder scan support 2024-02-18 05:06:08 +00:00			`from pathlib import Path`
feat: basic RBAC support 2023-11-19 05:41:43 +00:00			`import hashlib`
feat/fix: email format validation 2024-01-03 00:22:48 +00:00			`import re`
feat: basic RBAC support 2023-11-19 05:41:43 +00:00

			`def get_gravatar_url(email):`
			`# Trim leading and trailing whitespace from`
			`# an email address and force all characters`
			`# to lower case`
			`address = str(email).strip().lower()`

			`# Create a SHA256 hash of the final string`
			`hash_object = hashlib.sha256(address.encode())`
			`hash_hex = hash_object.hexdigest()`

			`# Grab the actual image URL`
feat: gravatar default image updated 2023-11-19 08:46:27 +00:00			`return f"https://www.gravatar.com/avatar/{hash_hex}?d=mp"`
feat: gguf upload 2023-12-23 23:38:52 +00:00

			`def calculate_sha256(file):`
			`sha256 = hashlib.sha256()`
			`# Read the file in chunks to efficiently handle large files`
			`for chunk in iter(lambda: file.read(8192), b""):`
			`sha256.update(chunk)`
			`return sha256.hexdigest()`
feat/fix: email format validation 2024-01-03 00:22:48 +00:00

feat: web rag support 2024-01-27 06:17:28 +00:00			`def calculate_sha256_string(string):`
			`# Create a new SHA-256 hash object`
			`sha256_hash = hashlib.sha256()`
			`# Update the hash object with the bytes of the input string`
			`sha256_hash.update(string.encode("utf-8"))`
			`# Get the hexadecimal representation of the hash`
			`hashed_string = sha256_hash.hexdigest()`
			`return hashed_string`


feat/fix: email format validation 2024-01-03 00:22:48 +00:00			`def validate_email_format(email: str) -> bool:`
			`if not re.match(r"[^@]+@[^@]+\.[^@]+", email):`
			`return False`
			`return True`
feat: rag folder scan support 2024-02-18 05:06:08 +00:00

			`def sanitize_filename(file_name):`
			`# Convert to lowercase`
			`lower_case_file_name = file_name.lower()`

			`# Remove special characters using regular expression`
			`sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name)`

			`# Replace spaces with dashes`
			`final_file_name = re.sub(r"\s+", "-", sanitized_file_name)`

			`return final_file_name`


			`def extract_folders_after_data_docs(path):`
			`# Convert the path to a Path object if it's not already`
			`path = Path(path)`

			`# Extract parts of the path`
			`parts = path.parts`

			`# Find the index of '/data/docs' in the path`
			`try:`
			`index_data_docs = parts.index("data") + 1`
			`index_docs = parts.index("docs", index_data_docs) + 1`
			`except ValueError:`
			`return []`

			`# Exclude the filename and accumulate folder names`
			`tags = []`

			`folders = parts[index_docs:-1]`
			`for idx, part in enumerate(folders):`
			`tags.append("/".join(folders[: idx + 1]))`

			`return tags`