diff --git a/.serena/cache/python/document_symbols_cache_v20-05-25.pkl b/.serena/cache/python/document_symbols_cache_v20-05-25.pkl new file mode 100644 index 000000000..dc4daa57e Binary files /dev/null and b/.serena/cache/python/document_symbols_cache_v20-05-25.pkl differ diff --git a/.serena/memories/code_structure.md b/.serena/memories/code_structure.md new file mode 100644 index 000000000..8702d23c0 --- /dev/null +++ b/.serena/memories/code_structure.md @@ -0,0 +1,68 @@ +# Code Structure and Organization + +## Backend Structure (`backend/open_webui/`) + +- **`main.py`**: Main FastAPI application entry point +- **`config.py`**: Configuration management and environment variables +- **`env.py`**: Environment setup and constants +- **`constants.py`**: Application constants and message templates +- **`functions.py`**: Function execution and management +- **`tasks.py`**: Background task management + +## Router Organization (`backend/open_webui/routers/`) + +Each router handles a specific domain: + +- **`auths.py`**: Authentication and authorization +- **`users.py`**: User management +- **`chats.py`**: Chat conversations +- **`models.py`**: AI model management +- **`prompts.py`**: Prompt templates +- **`tools.py`**: Tool management +- **`functions.py`**: Function management +- **`files.py`**: File upload/management +- **`images.py`**: Image generation +- **`audio.py`**: Speech-to-text and text-to-speech +- **`retrieval.py`**: RAG and document processing +- **`memories.py`**: Memory management +- **`knowledge.py`**: Knowledge base management +- **`ollama.py`**: Ollama integration +- **`openai.py`**: OpenAI API integration +- **`pipelines.py`**: Pipeline management +- **`configs.py`**: Configuration management + +## Database Models (`backend/open_webui/models/`) + +- **`users.py`**: User model and settings +- **`chats.py`**: Chat conversations +- **`models.py`**: AI model definitions +- **`files.py`**: File metadata +- **`auths.py`**: Authentication data +- **`prompts.py`**: Prompt templates +- **`tools.py`**: Tool definitions +- **`functions.py`**: Function definitions +- **`memories.py`**: Memory storage +- **`knowledge.py`**: Knowledge base +- **`channels.py`**: Communication channels +- **`folders.py`**: Organization folders +- **`feedbacks.py`**: User feedback + +## Frontend Structure (`src/`) + +- **`app.html`**: Main HTML template +- **`app.css`**: Global styles +- **`lib/`**: Reusable components and utilities +- **`routes/`**: SvelteKit page routes + +## Utilities (`backend/open_webui/utils/`) + +- **`auth.py`**: Authentication utilities +- **`misc.py`**: General utilities +- **`models.py`**: Model utilities +- **`chat.py`**: Chat processing +- **`middleware.py`**: Request/response processing +- **`tools.py`**: Tool execution +- **`embeddings.py`**: Embedding generation +- **`code_interpreter.py`**: Code execution +- **`filter.py`**: Content filtering +- **`plugin.py`**: Plugin management diff --git a/.serena/memories/code_style_conventions.md b/.serena/memories/code_style_conventions.md new file mode 100644 index 000000000..4195f2bce --- /dev/null +++ b/.serena/memories/code_style_conventions.md @@ -0,0 +1,66 @@ +# Code Style and Conventions + +## Python Backend Style + +- **Formatter**: Black with default settings +- **Linter**: Pylint +- **Type Hints**: Strongly encouraged, especially for function signatures +- **Docstrings**: Use for public APIs and complex functions +- **Import Organization**: Follow PEP 8 standards +- **Variable Naming**: snake_case for variables and functions, PascalCase for classes +- **Constants**: UPPER_CASE for module-level constants + +## Code Quality Standards + +- **Line Length**: Black default (88 characters) +- **String Quotes**: Black will standardize (double quotes preferred) +- **Trailing Commas**: Black handles automatically +- **Function Organization**: Keep functions focused and single-purpose +- **Error Handling**: Use proper exception handling with specific exception types + +## API Design Patterns + +- **FastAPI Routers**: Organize endpoints by domain (users, chats, models, etc.) +- **Pydantic Models**: Use for request/response validation +- **Response Models**: Consistent JSON structure with proper HTTP status codes +- **Authentication**: JWT-based with dependency injection +- **Database Models**: SQLAlchemy ORM with proper relationships + +## Frontend Style + +- **Framework**: SvelteKit with TypeScript +- **Styling**: Tailwind CSS utility classes +- **Component Organization**: Modular components in `src/lib/` +- **State Management**: Svelte stores for global state +- **Type Safety**: TypeScript throughout the frontend + +## Configuration Management + +- **Environment Variables**: Extensive use of env vars for configuration +- **Default Values**: Sensible defaults in `config.py` +- **Validation**: Pydantic for configuration validation +- **Documentation**: Document all configuration options + +## Database Design + +- **Migrations**: Alembic for database schema changes +- **Relationships**: Proper foreign keys and relationships +- **Indexes**: Strategic indexing for performance +- **Naming**: Descriptive table and column names + +## Security Practices + +- **Authentication**: JWT tokens with proper expiration +- **Authorization**: Role-based access control +- **Input Validation**: Pydantic models for all inputs +- **SQL Injection**: SQLAlchemy ORM prevents direct SQL +- **CORS**: Proper CORS configuration +- **Environment Secrets**: Never commit secrets to version control + +## Testing Conventions + +- **Backend Tests**: Pytest with fixtures +- **Frontend Tests**: Vitest for unit tests +- **E2E Tests**: Cypress for integration testing +- **Test Organization**: Mirror source code structure +- **Mocking**: Mock external dependencies in tests diff --git a/.serena/memories/macos_development_guide.md b/.serena/memories/macos_development_guide.md new file mode 100644 index 000000000..514dcbdc8 --- /dev/null +++ b/.serena/memories/macos_development_guide.md @@ -0,0 +1,124 @@ +# macOS Development Environment Setup + +## System Requirements + +- **Operating System**: macOS +- **Python**: 3.11+ (required for backend) +- **Node.js**: 18.13.0+ (required for frontend) +- **Package Managers**: npm 6.0.0+, optionally uv for Python + +## macOS Specific Commands + +### System Information + +```bash +# Check macOS version +sw_vers + +# Check available memory +vm_stat | head -5 + +# Check disk space +df -h + +# Check CPU information +sysctl -n machdep.cpu.brand_string + +# Check running processes +ps aux | grep open-webui +``` + +### Package Management + +```bash +# Install Homebrew (if not installed) +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + +# Install Python with Homebrew +brew install python@3.11 + +# Install Node.js with Homebrew +brew install node + +# Install Docker Desktop for Mac +brew install --cask docker + +# Install uv (modern Python package manager) +brew install uv +``` + +### File System Operations + +```bash +# Navigate to project directory +cd /path/to/open-webui-next + +# Find files +find . -name "*.py" -type f # Find Python files +find . -name "*.svelte" -type f # Find Svelte files + +# Search in files +grep -r "search_term" backend/ # Search in backend +grep -r "search_term" src/ # Search in frontend + +# File permissions +chmod +x backend/start.sh # Make script executable +``` + +### Network and Ports + +```bash +# Check if port is in use +lsof -i :8080 # Check port 8080 +lsof -i :3000 # Check port 3000 (frontend dev) + +# Kill process on port +kill -9 $(lsof -ti:8080) # Kill process on port 8080 +``` + +### Environment Management + +```bash +# Python virtual environment +python3 -m venv venv +source venv/bin/activate +deactivate + +# Environment variables +export OPENAI_API_KEY="your-key" +echo $OPENAI_API_KEY +printenv | grep WEBUI +``` + +### Troubleshooting Commands + +```bash +# Check Python installation +which python3 +python3 --version + +# Check Node.js installation +which node +node --version +npm --version + +# Check Docker +docker --version +docker ps +docker images + +# Clear npm cache +npm cache clean --force + +# Clear Python cache +find . -type d -name "__pycache__" -delete +find . -name "*.pyc" -delete +``` + +## Development Workflow on macOS + +1. Use Terminal or iTerm2 for command line operations +2. Consider using VS Code or PyCharm for development +3. Use Docker Desktop for containerized development +4. Monitor system resources with Activity Monitor +5. Use Homebrew for package management diff --git a/.serena/memories/project_overview.md b/.serena/memories/project_overview.md new file mode 100644 index 000000000..f643e4c96 --- /dev/null +++ b/.serena/memories/project_overview.md @@ -0,0 +1,46 @@ +# Open WebUI Project Overview + +## Purpose + +Open WebUI is an extensible, feature-rich, and user-friendly self-hosted AI platform designed to operate entirely offline. It supports various LLM runners like Ollama and OpenAI-compatible APIs, with built-in inference engine for RAG, making it a powerful AI deployment solution. + +## Key Features + +- Effortless setup with Docker or Kubernetes +- Ollama/OpenAI API integration +- Granular permissions and user groups +- Responsive design with PWA support +- Full Markdown and LaTeX support +- Voice/video call functionality +- Model builder for custom models +- Native Python function calling +- Local RAG integration +- Web search capabilities +- Image generation integration +- Multi-model conversations +- Role-based access control (RBAC) +- Multilingual support +- Plugin framework with Pipelines + +## Tech Stack + +- **Backend**: Python 3.11+ with FastAPI +- **Frontend**: SvelteKit with TypeScript +- **Database**: SQLAlchemy with support for PostgreSQL, MySQL, SQLite +- **Vector Database**: Chroma, Milvus, Qdrant, OpenSearch, Elasticsearch, PGVector, Pinecone +- **Deployment**: Docker, Kubernetes +- **Build Tools**: Vite, Node.js +- **Styling**: Tailwind CSS +- **Testing**: Pytest (backend), Vitest (frontend), Cypress (e2e) + +## Architecture + +The project follows a modern full-stack architecture: + +- **Backend**: Python FastAPI application serving REST APIs and WebSocket connections +- **Frontend**: SvelteKit SPA that communicates with the backend APIs +- **Database Layer**: SQLAlchemy ORM with Alembic migrations +- **Vector Storage**: Pluggable vector database support for RAG functionality +- **Authentication**: JWT-based authentication with OAuth support +- **Real-time**: WebSocket support for live features +- **File Storage**: Configurable storage providers (Local, S3, GCS, Azure) diff --git a/.serena/memories/suggested_commands.md b/.serena/memories/suggested_commands.md new file mode 100644 index 000000000..440c921eb --- /dev/null +++ b/.serena/memories/suggested_commands.md @@ -0,0 +1,111 @@ +# Development Commands and Scripts + +## Essential Commands for Development + +### Backend Development + +```bash +# Install dependencies +pip install -r backend/requirements.txt + +# Run backend in development mode +cd backend && python -m uvicorn open_webui.main:app --host 0.0.0.0 --port 8080 --reload + +# Run with uv (modern Python package manager) +cd backend && uv run uvicorn open_webui.main:app --host 0.0.0.0 --port 8080 --reload + +# Database migrations +alembic upgrade head +alembic revision --autogenerate -m "description" + +# Run tests +pytest backend/ + +# Code formatting +black . --exclude ".venv/|/venv/" + +# Linting +pylint backend/ +``` + +### Frontend Development + +```bash +# Install dependencies +npm install + +# Development server +npm run dev +npm run dev:5050 # Run on port 5050 + +# Build for production +npm run build + +# Build with watch mode +npm run build:watch + +# Preview production build +npm run preview + +# Type checking +npm run check +npm run check:watch + +# Linting +npm run lint:frontend + +# Formatting +npm run format + +# Prepare Pyodide +npm run pyodide:fetch + +# Internationalization +npm run i18n:parse +``` + +### Full Stack Commands + +```bash +# Format both frontend and backend +npm run format && npm run format:backend + +# Lint everything +npm run lint # Runs lint:frontend, lint:types, lint:backend + +# Testing +npm run test:frontend +pytest backend/ # Backend tests + +# End-to-end testing +npm run cy:open +``` + +### Docker Development + +```bash +# Using Makefile +make install # docker-compose up -d +make start # docker-compose start +make stop # docker-compose stop +make startAndBuild # docker-compose up -d --build +make update # Update and rebuild + +# Direct docker-compose +docker-compose up -d +docker-compose up -d --build +docker-compose logs -f +``` + +### Database Commands + +```bash +# Reset database +rm backend/data/webui.db # For SQLite + +# Run migrations +cd backend && alembic upgrade head + +# Create new migration +cd backend && alembic revision --autogenerate -m "migration description" +``` diff --git a/.serena/memories/task_completion_workflow.md b/.serena/memories/task_completion_workflow.md new file mode 100644 index 000000000..792f8a241 --- /dev/null +++ b/.serena/memories/task_completion_workflow.md @@ -0,0 +1,106 @@ +# Task Completion Workflow + +## When a Development Task is Completed + +### 1. Code Quality Checks + +```bash +# Format code +npm run format # Frontend formatting +npm run format:backend # Backend formatting (Black) + +# Lint code +npm run lint # Full linting (frontend + backend) +pylint backend/ # Backend specific linting + +# Type checking +npm run check # TypeScript type checking +``` + +### 2. Testing + +```bash +# Run unit tests +npm run test:frontend # Frontend tests with Vitest +pytest backend/ # Backend tests with Pytest + +# Run integration tests (if applicable) +npm run cy:open # Cypress e2e tests +``` + +### 3. Build Verification + +```bash +# Test production build +npm run build # Build frontend +npm run preview # Preview production build + +# Test backend startup +cd backend && python -m uvicorn open_webui.main:app --host 0.0.0.0 --port 8080 +``` + +### 4. Database Migrations (if schema changed) + +```bash +# Generate migration if database models were modified +cd backend && alembic revision --autogenerate -m "description of changes" + +# Apply migrations +cd backend && alembic upgrade head +``` + +### 5. Documentation Updates + +- Update README.md if new features added +- Update API documentation if endpoints changed +- Update configuration documentation if new env vars added +- Update CHANGELOG.md following semantic versioning + +### 6. Git Workflow + +```bash +# Stage changes +git add . + +# Commit with descriptive message +git commit -m "feat: add new feature description" +# or +git commit -m "fix: resolve bug description" +# or +git commit -m "docs: update documentation" + +# Push changes +git push origin feature-branch +``` + +### 7. System Verification Commands (macOS) + +```bash +# Check system resources +ps aux | grep open-webui # Check if processes are running +lsof -i :8080 # Check if port is in use +df -h # Check disk space +free -m # Check memory usage (if available) + +# Docker verification (if using Docker) +docker ps # Check running containers +docker logs open-webui # Check container logs +``` + +### 8. Performance Verification + +- Check application startup time +- Verify API response times +- Test memory usage under load +- Verify frontend bundle sizes are reasonable + +### 9. Pre-deployment Checklist + +- [ ] All tests passing +- [ ] Code properly formatted and linted +- [ ] Documentation updated +- [ ] Environment variables documented +- [ ] Database migrations tested +- [ ] No secrets in code +- [ ] Performance is acceptable +- [ ] Security considerations addressed diff --git a/.serena/project.yml b/.serena/project.yml new file mode 100644 index 000000000..3a6b04e6c --- /dev/null +++ b/.serena/project.yml @@ -0,0 +1,66 @@ +# language of the project (csharp, python, rust, java, typescript, javascript, go, cpp, or ruby) +# Special requirements: +# * csharp: Requires the presence of a .sln file in the project folder. +language: python + +# whether to use the project's gitignore file to ignore files +# Added on 2025-04-07 +ignore_all_files_in_gitignore: true +# list of additional paths to ignore +# same syntax as gitignore, so you can use * and ** +# Was previously called `ignored_dirs`, please update your config if you are using that. +# Added (renamed)on 2025-04-07 +ignored_paths: [] + +# whether the project is in read-only mode +# If set to true, all editing tools will be disabled and attempts to use them will result in an error +# Added on 2025-04-18 +read_only: false + + +# list of tool names to exclude. We recommend not excluding any tools, see the readme for more details. +# Below is the complete list of tools for convenience. +# To make sure you have the latest list of tools, and to view their descriptions, +# execute `uv run scripts/print_tool_overview.py`. +# +# * `activate_project`: Activates a project by name. +# * `check_onboarding_performed`: Checks whether project onboarding was already performed. +# * `create_text_file`: Creates/overwrites a file in the project directory. +# * `delete_lines`: Deletes a range of lines within a file. +# * `delete_memory`: Deletes a memory from Serena's project-specific memory store. +# * `execute_shell_command`: Executes a shell command. +# * `find_referencing_code_snippets`: Finds code snippets in which the symbol at the given location is referenced. +# * `find_referencing_symbols`: Finds symbols that reference the symbol at the given location (optionally filtered by type). +# * `find_symbol`: Performs a global (or local) search for symbols with/containing a given name/substring (optionally filtered by type). +# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes. +# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file or directory. +# * `initial_instructions`: Gets the initial instructions for the current project. +# Should only be used in settings where the system prompt cannot be set, +# e.g. in clients you have no control over, like Claude Desktop. +# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol. +# * `insert_at_line`: Inserts content at a given line in a file. +# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol. +# * `list_dir`: Lists files and directories in the given directory (optionally with recursion). +# * `list_memories`: Lists memories in Serena's project-specific memory store. +# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building). +# * `prepare_for_new_conversation`: Provides instructions for preparing for a new conversation (in order to continue with the necessary context). +# * `read_file`: Reads a file within the project directory. +# * `read_memory`: Reads the memory with the given name from Serena's project-specific memory store. +# * `remove_project`: Removes a project from the Serena configuration. +# * `replace_lines`: Replaces a range of lines within a file with new content. +# * `replace_symbol_body`: Replaces the full definition of a symbol. +# * `restart_language_server`: Restarts the language server, may be necessary when edits not through Serena happen. +# * `search_for_pattern`: Performs a search for a pattern in the project. +# * `summarize_changes`: Provides instructions for summarizing the changes made to the codebase. +# * `switch_modes`: Activates modes by providing a list of their names +# * `think_about_collected_information`: Thinking tool for pondering the completeness of collected information. +# * `think_about_task_adherence`: Thinking tool for determining whether the agent is still on track with the current task. +# * `think_about_whether_you_are_done`: Thinking tool for determining whether the task is truly completed. +# * `write_memory`: Writes a named memory (for future reference) to Serena's project-specific memory store. +excluded_tools: [] + +# initial prompt for the project. It will always be given to the LLM upon activating the project +# (contrary to the memories, which are loaded on demand). +initial_prompt: "" + +project_name: "open-webui-next" diff --git a/backend/open_webui/routers/utils.py b/backend/open_webui/routers/utils.py index bbf139951..f0027e989 100644 --- a/backend/open_webui/routers/utils.py +++ b/backend/open_webui/routers/utils.py @@ -63,7 +63,7 @@ async def execute_code( else None ), request.app.state.config.CODE_EXECUTION_JUPYTER_TIMEOUT, - form_data.chat_id # Pass chat_id to the enhanced function + form_data.chat_id, # Pass chat_id to the enhanced function ) return output diff --git a/backend/open_webui/utils/code_interpreter.py b/backend/open_webui/utils/code_interpreter.py index 7a7288a7a..1e4bc8f02 100644 --- a/backend/open_webui/utils/code_interpreter.py +++ b/backend/open_webui/utils/code_interpreter.py @@ -10,6 +10,7 @@ import websockets from pydantic import BaseModel from open_webui.env import SRC_LOG_LEVELS + # Import necessary models for chat and file operations from open_webui.models.chats import Chats from open_webui.models.files import Files @@ -24,24 +25,24 @@ def get_attached_files_from_chat(chat_id: str) -> List[Dict[str, Any]]: Returns a list of file metadata dictionaries. """ logger.info(f"Scanning chat {chat_id} for attached files") - + try: # Get the chat data chat = Chats.get_chat_by_id(chat_id) if not chat: logger.warning(f"Chat {chat_id} not found") return [] - + attached_files = [] chat_data = chat.chat - + # Extract messages from chat history messages = chat_data.get("history", {}).get("messages", {}) - + for message_id, message in messages.items(): # Check if message has files attached files = message.get("files", []) - + for file_info in files: # Extract file metadata file_data = { @@ -50,37 +51,41 @@ def get_attached_files_from_chat(chat_id: str) -> List[Dict[str, Any]]: "type": file_info.get("type", "file"), "size": file_info.get("size"), "url": file_info.get("url"), - "message_id": message_id + "message_id": message_id, } - + # Only include files with valid IDs if file_data["id"]: attached_files.append(file_data) - logger.debug(f"Found attached file: {file_data['name']} (ID: {file_data['id']})") - + logger.debug( + f"Found attached file: {file_data['name']} (ID: {file_data['id']})" + ) + logger.info(f"Found {len(attached_files)} attached files in chat {chat_id}") return attached_files - + except Exception as e: logger.error(f"Error scanning chat {chat_id} for files: {str(e)}") return [] -async def auto_prepare_chat_files(chat_id: str, data_dir: str = "data") -> Dict[str, Any]: +async def auto_prepare_chat_files( + chat_id: str, data_dir: str = "data" +) -> Dict[str, Any]: """ Automatically prepare files attached to chat messages for use in the Jupyter environment. Creates symbolic links in the Jupyter data directory pointing to the uploaded files. Falls back to copying files if symlinks don't work (e.g., Docker environments). - + Args: chat_id: The chat ID to prepare files for data_dir: Base data directory (default: "data") - + Returns: Dictionary with preparation results including success status, prepared files count, and any errors """ logger.info(f"Auto-preparing files for chat {chat_id}") - + result = { "success": False, "chat_id": chat_id, @@ -88,76 +93,81 @@ async def auto_prepare_chat_files(chat_id: str, data_dir: str = "data") -> Dict[ "skipped_files": [], "errors": [], "total_files": 0, - "method": None # Will be "symlink" or "copy" + "method": None, # Will be "symlink" or "copy" } - + try: # Get attached files from chat attached_files = get_attached_files_from_chat(chat_id) result["total_files"] = len(attached_files) - + if not attached_files: logger.info(f"No files found in chat {chat_id}") result["success"] = True return result - + # Create chat-specific data directory chat_data_dir = os.path.join(data_dir, "uploads", chat_id) os.makedirs(chat_data_dir, exist_ok=True) logger.info(f"Created/verified chat data directory: {chat_data_dir}") - + # Test which method to use: symlink or copy # Force copy method for Docker compatibility - symlinks often fail in bind volumes use_symlinks = False # use_symlinks = await _test_symlink_accessibility(chat_data_dir, data_dir) method = "symlink" if use_symlinks else "copy" result["method"] = method - logger.info(f"Using {method} method for file preparation (hardcoded for Docker compatibility)") - + logger.info( + f"Using {method} method for file preparation (hardcoded for Docker compatibility)" + ) + # Track successfully processed files to avoid duplicates processed_file_ids = set() - + for file_info in attached_files: file_id = file_info["id"] file_name = file_info["name"] - + try: # Skip if already processed (deduplication) if file_id in processed_file_ids: logger.debug(f"Skipping duplicate file {file_name} (ID: {file_id})") - result["skipped_files"].append({ - "name": file_name, - "id": file_id, - "reason": "duplicate" - }) + result["skipped_files"].append( + {"name": file_name, "id": file_id, "reason": "duplicate"} + ) continue - + # Get file from database file_record = Files.get_file_by_id(file_id) if not file_record: logger.warning(f"File record not found for ID: {file_id}") - result["errors"].append(f"File record not found: {file_name} (ID: {file_id})") + result["errors"].append( + f"File record not found: {file_name} (ID: {file_id})" + ) continue - + # Use the actual file path from the database if not file_record.path: logger.warning(f"File path not found in record for ID: {file_id}") - result["errors"].append(f"File path not found: {file_name} (ID: {file_id})") + result["errors"].append( + f"File path not found: {file_name} (ID: {file_id})" + ) continue - + # Get the actual file path (handles different storage providers) from open_webui.storage.provider import Storage + source_file_path = Storage.get_file(file_record.path) - + # Check if source file exists if not os.path.exists(source_file_path): logger.warning(f"Source file not found: {source_file_path}") result["errors"].append(f"Source file not found: {file_name}") continue - + # Create target path in chat data directory target_path = os.path.join(chat_data_dir, file_name) - + # Remove existing file/symlink if it exists if os.path.exists(target_path) or os.path.islink(target_path): if os.path.islink(target_path): @@ -166,47 +176,56 @@ async def auto_prepare_chat_files(chat_id: str, data_dir: str = "data") -> Dict[ else: os.remove(target_path) logger.debug(f"Removed existing file: {target_path}") - + # Prepare file using the appropriate method if use_symlinks: # Create symbolic link using absolute path to ensure it resolves correctly source_file_path_abs = os.path.abspath(source_file_path) os.symlink(source_file_path_abs, target_path) - logger.info(f"Created symlink: {target_path} -> {source_file_path_abs}") + logger.info( + f"Created symlink: {target_path} -> {source_file_path_abs}" + ) else: # Copy file import shutil + shutil.copy2(source_file_path, target_path) logger.info(f"Copied file: {source_file_path} -> {target_path}") - + # Record successful preparation - result["prepared_files"].append({ - "name": file_name, - "id": file_id, - "target_path": target_path, - "source_path": source_file_path, - "size": file_info.get("size"), - "type": file_info.get("type"), - "method": method - }) - + result["prepared_files"].append( + { + "name": file_name, + "id": file_id, + "target_path": target_path, + "source_path": source_file_path, + "size": file_info.get("size"), + "type": file_info.get("type"), + "method": method, + } + ) + processed_file_ids.add(file_id) - + except Exception as e: error_msg = f"Error preparing file {file_name}: {str(e)}" logger.error(error_msg) result["errors"].append(error_msg) - + # Set success if we prepared at least some files or if there were no errors - result["success"] = len(result["prepared_files"]) > 0 or len(result["errors"]) == 0 - - logger.info(f"Auto-prepare completed for chat {chat_id}: " - f"{len(result['prepared_files'])} prepared using {method}, " - f"{len(result['skipped_files'])} skipped, " - f"{len(result['errors'])} errors") - + result["success"] = ( + len(result["prepared_files"]) > 0 or len(result["errors"]) == 0 + ) + + logger.info( + f"Auto-prepare completed for chat {chat_id}: " + f"{len(result['prepared_files'])} prepared using {method}, " + f"{len(result['skipped_files'])} skipped, " + f"{len(result['errors'])} errors" + ) + return result - + except Exception as e: error_msg = f"Failed to auto-prepare files for chat {chat_id}: {str(e)}" logger.error(error_msg) @@ -219,41 +238,41 @@ async def _test_symlink_accessibility(chat_data_dir: str, data_dir: str) -> bool """ Test whether symlinks will work in the target environment. This is especially important for Docker environments where symlinks may not be accessible. - + Args: chat_data_dir: The directory where files will be prepared data_dir: The base data directory - + Returns: True if symlinks should be used, False if files should be copied """ test_dir = os.path.join(chat_data_dir, ".test_symlink") test_source = None test_symlink = None - + try: # Create test directory os.makedirs(test_dir, exist_ok=True) - + # Ensure uploads directory exists for source file uploads_dir = os.path.join(data_dir, "uploads") os.makedirs(uploads_dir, exist_ok=True) - + # Create a test source file in the uploads directory test_source = os.path.join(uploads_dir, ".test_source_file") with open(test_source, "w") as f: f.write("test_content_for_symlink_detection") - + # Create test symlink using absolute path to ensure it resolves correctly test_symlink = os.path.join(test_dir, "test_symlink") test_source_abs = os.path.abspath(test_source) os.symlink(test_source_abs, test_symlink) - + # Test 1: Can we create the symlink? if not os.path.islink(test_symlink): logger.warning("Symlink creation test failed - file is not a symlink") return False - + # Test 2: Can we read through the symlink? try: with open(test_symlink, "r") as f: @@ -262,9 +281,11 @@ async def _test_symlink_accessibility(chat_data_dir: str, data_dir: str) -> bool logger.warning("Symlink accessibility test failed - content mismatch") return False except Exception as e: - logger.warning(f"Symlink accessibility test failed - cannot read through symlink: {e}") + logger.warning( + f"Symlink accessibility test failed - cannot read through symlink: {e}" + ) return False - + # Test 3: Can we stat the symlink target? try: stat_result = os.stat(test_symlink) @@ -274,15 +295,19 @@ async def _test_symlink_accessibility(chat_data_dir: str, data_dir: str) -> bool except Exception as e: logger.warning(f"Symlink stat test failed: {e}") return False - + logger.info("Symlink accessibility test passed - using symlinks") return True - + except OSError as e: if "Operation not supported" in str(e) or "Function not implemented" in str(e): - logger.info("Symlinks not supported on this filesystem - using file copying") + logger.info( + "Symlinks not supported on this filesystem - using file copying" + ) else: - logger.warning(f"Symlink test failed with OS error: {e} - using file copying") + logger.warning( + f"Symlink test failed with OS error: {e} - using file copying" + ) return False except Exception as e: logger.warning(f"Symlink test failed: {e} - using file copying") @@ -290,7 +315,9 @@ async def _test_symlink_accessibility(chat_data_dir: str, data_dir: str) -> bool finally: # Clean up test files try: - if test_symlink and (os.path.exists(test_symlink) or os.path.islink(test_symlink)): + if test_symlink and ( + os.path.exists(test_symlink) or os.path.islink(test_symlink) + ): os.unlink(test_symlink) if test_source and os.path.exists(test_source): os.remove(test_source) @@ -300,19 +327,21 @@ async def _test_symlink_accessibility(chat_data_dir: str, data_dir: str) -> bool logger.debug(f"Test cleanup failed (non-critical): {e}") -async def prepare_multiple_chats_files(chat_ids: List[str], data_dir: str = "data") -> Dict[str, Any]: +async def prepare_multiple_chats_files( + chat_ids: List[str], data_dir: str = "data" +) -> Dict[str, Any]: """ Prepare files for multiple chats at once (bulk operation). - + Args: chat_ids: List of chat IDs to prepare files for data_dir: Base data directory (default: "data") - + Returns: Dictionary with overall results and per-chat results """ logger.info(f"Bulk preparing files for {len(chat_ids)} chats") - + overall_result = { "success": True, "total_chats": len(chat_ids), @@ -322,37 +351,43 @@ async def prepare_multiple_chats_files(chat_ids: List[str], data_dir: str = "dat "summary": { "total_prepared_files": 0, "total_skipped_files": 0, - "total_errors": 0 - } + "total_errors": 0, + }, } - + for chat_id in chat_ids: try: chat_result = await auto_prepare_chat_files(chat_id, data_dir) overall_result["chat_results"][chat_id] = chat_result - + if chat_result["success"]: overall_result["successful_chats"] += 1 else: overall_result["failed_chats"] += 1 overall_result["success"] = False - + # Update summary - overall_result["summary"]["total_prepared_files"] += len(chat_result["prepared_files"]) - overall_result["summary"]["total_skipped_files"] += len(chat_result["skipped_files"]) + overall_result["summary"]["total_prepared_files"] += len( + chat_result["prepared_files"] + ) + overall_result["summary"]["total_skipped_files"] += len( + chat_result["skipped_files"] + ) overall_result["summary"]["total_errors"] += len(chat_result["errors"]) - + except Exception as e: error_msg = f"Failed to prepare chat {chat_id}: {str(e)}" logger.error(error_msg) overall_result["chat_results"][chat_id] = { "success": False, - "errors": [error_msg] + "errors": [error_msg], } overall_result["failed_chats"] += 1 overall_result["success"] = False - - logger.info(f"Bulk prepare completed: {overall_result['successful_chats']}/{overall_result['total_chats']} successful") + + logger.info( + f"Bulk prepare completed: {overall_result['successful_chats']}/{overall_result['total_chats']} successful" + ) return overall_result @@ -360,24 +395,19 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: """ Test filesystem support for symlinks and file operations. Helps identify permission problems and symlink support issues. - + Args: data_dir: Base data directory to test in - + Returns: Dictionary with test results """ logger.info(f"Testing filesystem support in {data_dir}") - - test_result = { - "success": True, - "tests": {}, - "errors": [], - "recommendations": [] - } - + + test_result = {"success": True, "tests": {}, "errors": [], "recommendations": []} + test_dir = os.path.join(data_dir, "test_auto_prepare") - + try: # Test 1: Directory creation try: @@ -388,7 +418,7 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: test_result["tests"]["directory_creation"] = False test_result["errors"].append(f"Directory creation failed: {str(e)}") test_result["success"] = False - + # Test 2: File creation test_file = os.path.join(test_dir, "test_file.txt") try: @@ -400,7 +430,7 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: test_result["tests"]["file_creation"] = False test_result["errors"].append(f"File creation failed: {str(e)}") test_result["success"] = False - + # Test 3: Symlink creation test_symlink = os.path.join(test_dir, "test_symlink.txt") try: @@ -412,7 +442,9 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: logger.debug("✓ Symlink creation test passed") else: test_result["tests"]["symlink_creation"] = False - test_result["errors"].append("Cannot test symlink: source file doesn't exist") + test_result["errors"].append( + "Cannot test symlink: source file doesn't exist" + ) except Exception as e: test_result["tests"]["symlink_creation"] = False test_result["errors"].append(f"Symlink creation failed: {str(e)}") @@ -421,7 +453,7 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: test_result["recommendations"].append( "Filesystem may not support symlinks. Consider using file copies instead." ) - + # Test 4: Path resolution try: if os.path.exists(test_symlink): @@ -434,11 +466,13 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: test_result["errors"].append("Symlink path resolution incorrect") else: test_result["tests"]["path_resolution"] = False - test_result["errors"].append("Cannot test path resolution: symlink doesn't exist") + test_result["errors"].append( + "Cannot test path resolution: symlink doesn't exist" + ) except Exception as e: test_result["tests"]["path_resolution"] = False test_result["errors"].append(f"Path resolution test failed: {str(e)}") - + # Test 5: Docker symlink accessibility (new test) if test_result["tests"].get("symlink_creation", False): try: @@ -450,7 +484,9 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: logger.debug("✓ Symlink accessibility test passed") else: test_result["tests"]["symlink_accessibility"] = False - test_result["errors"].append("Symlink content mismatch - possible Docker volume issue") + test_result["errors"].append( + "Symlink content mismatch - possible Docker volume issue" + ) test_result["recommendations"].append( "Symlinks may not work in Docker environment. Auto-prepare will use file copying." ) @@ -462,7 +498,7 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: ) else: test_result["tests"]["symlink_accessibility"] = False - + finally: # Cleanup test files try: @@ -475,18 +511,20 @@ def test_filesystem_support(data_dir: str = "data") -> Dict[str, Any]: logger.debug("✓ Test cleanup completed") except Exception as e: logger.warning(f"Test cleanup failed: {str(e)}") - + # Add recommendations based on test results if not test_result["tests"].get("symlink_creation", False): test_result["recommendations"].append( "Consider implementing file copying as fallback for symlink failures" ) - + if test_result["success"]: logger.info("✓ All filesystem tests passed") else: - logger.warning(f"⚠ Some filesystem tests failed: {len(test_result['errors'])} errors") - + logger.warning( + f"⚠ Some filesystem tests failed: {len(test_result['errors'])} errors" + ) + return test_result @@ -713,29 +751,35 @@ class EnterpriseGatewayCodeExecutor: self.username = username self.chat_id = chat_id self.data_dir = data_dir - + # Modify code to replace /mnt/data with chat-specific path self.code = self._prepare_code_with_path_replacement(code) # Auto-prepare files for this chat before code execution self.prepare_result = None if self.chat_id: - logger.info(f"Auto-preparing files for chat {self.chat_id} before code execution") + logger.info( + f"Auto-preparing files for chat {self.chat_id} before code execution" + ) try: # Note: This is synchronous but auto_prepare_chat_files is async # We'll need to handle this in the run() method instead self._auto_prepare_needed = True logger.debug(f"Marked auto-prepare as needed for chat {self.chat_id}") except Exception as e: - logger.error(f"Failed to mark auto-prepare for chat {self.chat_id}: {str(e)}") + logger.error( + f"Failed to mark auto-prepare for chat {self.chat_id}: {str(e)}" + ) self._auto_prepare_needed = False else: self._auto_prepare_needed = False - + if self.base_url[-1] != "/": self.base_url += "/" - - logger.info(f"Initializing Enterprise Gateway connection to {self.base_url} with kernel {self.kernel_name}") + + logger.info( + f"Initializing Enterprise Gateway connection to {self.base_url} with kernel {self.kernel_name}" + ) if self.chat_id: logger.info(f"Using chat ID {self.chat_id} for path replacement") self.session = aiohttp.ClientSession(trust_env=True, base_url=self.base_url) @@ -746,19 +790,27 @@ class EnterpriseGatewayCodeExecutor: """Auto-prepare files for this chat if needed""" if not self._auto_prepare_needed or not self.chat_id: return - + try: - self.prepare_result = await auto_prepare_chat_files(self.chat_id, self.data_dir) + self.prepare_result = await auto_prepare_chat_files( + self.chat_id, self.data_dir + ) if self.prepare_result["success"]: prepared_count = len(self.prepare_result["prepared_files"]) if prepared_count > 0: - logger.info(f"Successfully prepared {prepared_count} files for chat {self.chat_id}") + logger.info( + f"Successfully prepared {prepared_count} files for chat {self.chat_id}" + ) else: logger.debug(f"No files to prepare for chat {self.chat_id}") else: - logger.warning(f"File preparation had issues for chat {self.chat_id}: {self.prepare_result['errors']}") + logger.warning( + f"File preparation had issues for chat {self.chat_id}: {self.prepare_result['errors']}" + ) except Exception as e: - logger.error(f"Failed to auto-prepare files for chat {self.chat_id}: {str(e)}") + logger.error( + f"Failed to auto-prepare files for chat {self.chat_id}: {str(e)}" + ) # Continue with execution even if file preparation fails def _prepare_code_with_path_replacement(self, code: str) -> str: @@ -769,22 +821,22 @@ class EnterpriseGatewayCodeExecutor: if not self.chat_id: logger.debug("No chat_id provided, using code as-is") return code - + # Create chat-specific path chat_data_path = f"{self.data_dir}/uploads/{self.chat_id}" - + # Ensure the directory exists os.makedirs(chat_data_path, exist_ok=True) logger.info(f"Ensured chat data path exists: {chat_data_path}") - + # Replace /mnt/data with the chat-specific path modified_code = code.replace("/mnt/data", chat_data_path) - + if modified_code != code: logger.debug(f"Replaced '/mnt/data' with '{chat_data_path}' in code") logger.debug(f"Original code: {code}") logger.debug(f"Modified code: {modified_code}") - + return modified_code def _prepare_results_with_path_replacement(self, text: str) -> str: @@ -794,16 +846,16 @@ class EnterpriseGatewayCodeExecutor: """ if not self.chat_id or not text: return text - + # Create chat-specific path chat_data_path = f"{self.data_dir}/uploads/{self.chat_id}" - + # Replace the chat-specific path back to /mnt/data for user display modified_text = text.replace(chat_data_path, "/mnt/data") - + if modified_text != text: logger.debug(f"Replaced '{chat_data_path}' back to '/mnt/data' in output") - + return modified_text async def __aenter__(self): @@ -812,7 +864,9 @@ class EnterpriseGatewayCodeExecutor: async def __aexit__(self, exc_type, exc_val, exc_tb): if self.kernel_id: try: - async with self.session.delete(f"api/kernels/{self.kernel_id}", headers=self.headers) as response: + async with self.session.delete( + f"api/kernels/{self.kernel_id}", headers=self.headers + ) as response: response.raise_for_status() logger.info(f"Closed kernel {self.kernel_id}") except Exception as err: @@ -823,7 +877,7 @@ class EnterpriseGatewayCodeExecutor: try: # Auto-prepare files first if needed await self._auto_prepare_files() - + await self.setup_auth() await self.init_kernel() await self.execute_code() @@ -843,9 +897,9 @@ class EnterpriseGatewayCodeExecutor: "env": { "KERNEL_USERNAME": self.username, "KERNEL_ID": str(uuid.uuid4()), - } + }, } - + logger.info(f"Starting {self.kernel_name} kernel for user {self.username}") try: async with self.session.post( @@ -870,7 +924,9 @@ class EnterpriseGatewayCodeExecutor: async def execute_code(self) -> None: websocket_url, headers = self.init_ws() try: - async with websockets.connect(websocket_url, additional_headers=headers) as ws: + async with websockets.connect( + websocket_url, additional_headers=headers + ) as ws: await self.execute_in_gateway(ws) except websockets.exceptions.WebSocketException as e: logger.error(f"WebSocket error: {e}") @@ -884,7 +940,7 @@ class EnterpriseGatewayCodeExecutor: logger.debug(f"Chat ID: {self.chat_id}, Data dir: {self.data_dir}") chat_data_path = f"{self.data_dir}/uploads/{self.chat_id}" logger.debug(f"Replacing '/mnt/data' with '{chat_data_path}'") - + # Send message using Enterprise Gateway format msg_id = str(uuid.uuid4()) request = { @@ -893,7 +949,7 @@ class EnterpriseGatewayCodeExecutor: "msg_type": "execute_request", "username": self.username, "session": str(uuid.uuid4()), - "version": "5.4" + "version": "5.4", }, "parent_header": {}, "metadata": {}, @@ -903,34 +959,34 @@ class EnterpriseGatewayCodeExecutor: "store_history": True, "user_expressions": {}, "allow_stdin": False, - "stop_on_error": True + "stop_on_error": True, }, "buffers": [], - "channel": "shell" + "channel": "shell", } - + logger.debug(f"Sending execute request with msg_id {msg_id}") logger.debug(f"Code to execute: {self.code}") await ws.send(json.dumps(request)) - + # Parse responses outputs, results = [], [] stdout_content, stderr_content = "", "" error = None - + while True: try: # Wait for message message = await asyncio.wait_for(ws.recv(), self.timeout) response = json.loads(message) - + # Check if this message is a response to our request if response.get("parent_header", {}).get("msg_id") != msg_id: continue - + msg_type = response.get("msg_type") logger.debug(f"Received message of type {msg_type}") - + if msg_type == "stream": if response["content"]["name"] == "stdout": stdout_content += response["content"]["text"] @@ -938,7 +994,7 @@ class EnterpriseGatewayCodeExecutor: elif response["content"]["name"] == "stderr": stderr_content += response["content"]["text"] logger.debug(f"STDERR: {response['content']['text']}") - + elif msg_type == "execute_result": logger.debug(f"Execute result: {response['content']}") if "data" in response["content"]: @@ -947,9 +1003,11 @@ class EnterpriseGatewayCodeExecutor: results.append(result_text) logger.debug(f"Result text: {result_text}") if "image/png" in response["content"]["data"]: - results.append(f"data:image/png;base64,{response['content']['data']['image/png']}") + results.append( + f"data:image/png;base64,{response['content']['data']['image/png']}" + ) logger.debug("Added image result") - + elif msg_type == "display_data": logger.debug(f"Display data: {response['content']}") if "data" in response["content"]: @@ -958,48 +1016,60 @@ class EnterpriseGatewayCodeExecutor: results.append(result_text) logger.debug(f"Display text: {result_text}") if "image/png" in response["content"]["data"]: - results.append(f"data:image/png;base64,{response['content']['data']['image/png']}") + results.append( + f"data:image/png;base64,{response['content']['data']['image/png']}" + ) logger.debug("Added image display") - + elif msg_type == "error": error = { "ename": response["content"]["ename"], "evalue": response["content"]["evalue"], - "traceback": response["content"]["traceback"] + "traceback": response["content"]["traceback"], } stderr_content += "\n".join(error["traceback"]) logger.debug(f"Execution error: {error}") - + elif msg_type == "execute_reply": - logger.debug(f"Execute reply status: {response['content']['status']}") + logger.debug( + f"Execute reply status: {response['content']['status']}" + ) if response["content"]["status"] == "ok": logger.debug("Received execute_reply with status=ok") break elif response["content"]["status"] == "error": - if not error: # Only add if we haven't already processed an error message + if ( + not error + ): # Only add if we haven't already processed an error message error = { "ename": response["content"]["ename"], "evalue": response["content"]["evalue"], - "traceback": response["content"]["traceback"] + "traceback": response["content"]["traceback"], } stderr_content += "\n".join(error["traceback"]) logger.debug("Received execute_reply with status=error") break - + elif msg_type == "status": if response["content"]["execution_state"] == "idle": # We still wait for execute_reply before breaking out logger.debug("Kernel is idle") - + except asyncio.TimeoutError: stderr_content += "\nExecution timed out." logger.warning(f"Execution timed out after {self.timeout}s") break - - self.result.stdout = self._prepare_results_with_path_replacement(stdout_content.strip()) - self.result.stderr = self._prepare_results_with_path_replacement(stderr_content.strip()) - self.result.result = self._prepare_results_with_path_replacement("\n".join(results).strip() if results else "") - + + self.result.stdout = self._prepare_results_with_path_replacement( + stdout_content.strip() + ) + self.result.stderr = self._prepare_results_with_path_replacement( + stderr_content.strip() + ) + self.result.result = self._prepare_results_with_path_replacement( + "\n".join(results).strip() if results else "" + ) + logger.debug(f"Final result - stdout: {self.result.stdout}") logger.debug(f"Final result - stderr: {self.result.stderr}") logger.debug(f"Final result - result: {self.result.result}") @@ -1015,14 +1085,15 @@ async def deprecated_execute_code_jupyter( result = await executor.run() return result.model_dump() + async def execute_code_jupyter( - base_url: str, - code: str, - token: str = "", - password: str = "", + base_url: str, + code: str, + token: str = "", + password: str = "", timeout: int = 60, chat_id: str = "", - data_dir: str = "data" + data_dir: str = "data", ) -> dict: async with EnterpriseGatewayCodeExecutor( base_url, code, token, password, timeout, chat_id=chat_id, data_dir=data_dir @@ -1032,18 +1103,18 @@ async def execute_code_jupyter( def generate_dynamic_code_interpreter_prompt( - base_prompt: str, - chat_id: str = "", - attached_files: Optional[List[Dict[str, Any]]] = None + base_prompt: str, + chat_id: str = "", + attached_files: Optional[List[Dict[str, Any]]] = None, ) -> str: """ Generate a dynamic code interpreter prompt that includes information about attached files. - + Args: base_prompt: The base code interpreter prompt template chat_id: Chat ID for context attached_files: List of attached file information - + Returns: Enhanced prompt with file information """ @@ -1051,22 +1122,24 @@ def generate_dynamic_code_interpreter_prompt( if chat_id: # Try to get attached files from chat attached_files = get_attached_files_from_chat(chat_id) - + if not attached_files: # No files attached, return base prompt return base_prompt - + # Create file information section file_info_lines = [] file_info_lines.append("\n#### Available Files") - file_info_lines.append("The following files have been attached to this conversation and are available in `/mnt/data/`:") + file_info_lines.append( + "The following files have been attached to this conversation and are available in `/mnt/data/`:" + ) file_info_lines.append("") - + for file_info in attached_files: file_name = file_info.get("name", "unknown_file") file_type = file_info.get("type", "file") file_size = file_info.get("size") - + # Format file size if available size_str = "" if file_size: @@ -1076,47 +1149,65 @@ def generate_dynamic_code_interpreter_prompt( size_str = f" ({file_size / 1024:.1f} KB)" else: size_str = f" ({file_size / (1024 * 1024):.1f} MB)" - - file_info_lines.append(f"- **{file_name}**{size_str} - Available at `/mnt/data/{file_name}`") - + + file_info_lines.append( + f"- **{file_name}**{size_str} - Available at `/mnt/data/{file_name}`" + ) + # Add file type specific suggestions - if file_name.lower().endswith(('.csv', '.tsv')): - file_info_lines.append(f" - Data file - Use `pd.read_csv('/mnt/data/{file_name}')` to load") - elif file_name.lower().endswith(('.xlsx', '.xls')): - file_info_lines.append(f" - Excel file - Use `pd.read_excel('/mnt/data/{file_name}')` to load") - elif file_name.lower().endswith(('.json', '.jsonl')): - file_info_lines.append(f" - JSON file - Use `pd.read_json('/mnt/data/{file_name}')` or `json.load()` to load") - elif file_name.lower().endswith(('.txt', '.md', '.py', '.js', '.html', '.css')): - file_info_lines.append(f" - Text file - Use `open('/mnt/data/{file_name}', 'r').read()` to load") - elif file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')): - file_info_lines.append(f" - Image file - Use `PIL.Image.open('/mnt/data/{file_name}')` or `cv2.imread()` to load") - elif file_name.lower().endswith(('.pdf')): - file_info_lines.append(f" - PDF file - Use `PyPDF2` or `pdfplumber` to extract text/data") - + if file_name.lower().endswith((".csv", ".tsv")): + file_info_lines.append( + f" - Data file - Use `pd.read_csv('/mnt/data/{file_name}')` to load" + ) + elif file_name.lower().endswith((".xlsx", ".xls")): + file_info_lines.append( + f" - Excel file - Use `pd.read_excel('/mnt/data/{file_name}')` to load" + ) + elif file_name.lower().endswith((".json", ".jsonl")): + file_info_lines.append( + f" - JSON file - Use `pd.read_json('/mnt/data/{file_name}')` or `json.load()` to load" + ) + elif file_name.lower().endswith((".txt", ".md", ".py", ".js", ".html", ".css")): + file_info_lines.append( + f" - Text file - Use `open('/mnt/data/{file_name}', 'r').read()` to load" + ) + elif file_name.lower().endswith( + (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff") + ): + file_info_lines.append( + f" - Image file - Use `PIL.Image.open('/mnt/data/{file_name}')` or `cv2.imread()` to load" + ) + elif file_name.lower().endswith((".pdf")): + file_info_lines.append( + f" - PDF file - Use `PyPDF2` or `pdfplumber` to extract text/data" + ) + file_info_lines.append("") - file_info_lines.append("**Important**: These files are immediately ready to use - no upload needed. Reference them directly by their paths above.") - + file_info_lines.append( + "**Important**: These files are immediately ready to use - no upload needed. Reference them directly by their paths above." + ) + # Insert file information after the main code interpreter description but before the final note file_info_section = "\n".join(file_info_lines) - + # Find a good insertion point in the base prompt - prompt_lines = base_prompt.split('\n') - + prompt_lines = base_prompt.split("\n") + # Look for the line about /mnt/data and insert file info after it insertion_point = -1 for i, line in enumerate(prompt_lines): if "drive at '/mnt/data'" in line.lower(): insertion_point = i + 1 break - + if insertion_point > 0: # Insert file information after the /mnt/data line enhanced_lines = ( - prompt_lines[:insertion_point] + - file_info_section.split('\n') + - prompt_lines[insertion_point:] + prompt_lines[:insertion_point] + + file_info_section.split("\n") + + prompt_lines[insertion_point:] ) - return '\n'.join(enhanced_lines) + return "\n".join(enhanced_lines) else: # Fallback: append file information at the end - return base_prompt + "\n" + file_info_section \ No newline at end of file + return base_prompt + "\n" + file_info_section diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 2b147a94a..341e64526 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -81,7 +81,10 @@ from open_webui.utils.filter import ( get_sorted_filter_ids, process_filter_functions, ) -from open_webui.utils.code_interpreter import execute_code_jupyter, generate_dynamic_code_interpreter_prompt +from open_webui.utils.code_interpreter import ( + execute_code_jupyter, + generate_dynamic_code_interpreter_prompt, +) from open_webui.tasks import create_task @@ -846,17 +849,17 @@ async def process_chat_payload(request, form_data, user, metadata, model): if request.app.state.config.CODE_INTERPRETER_PROMPT_TEMPLATE != "" else DEFAULT_CODE_INTERPRETER_PROMPT ) - + # Get attached files from metadata attached_files = metadata.get("files", []) - + # Generate enhanced prompt with file information enhanced_prompt = generate_dynamic_code_interpreter_prompt( base_prompt=base_prompt, attached_files=attached_files, - chat_id=metadata.get("chat_id", "") + chat_id=metadata.get("chat_id", ""), ) - + form_data["messages"] = add_or_update_user_message( enhanced_prompt, form_data["messages"], @@ -2278,7 +2281,7 @@ async def process_chat_response( ), request.app.state.config.CODE_INTERPRETER_JUPYTER_TIMEOUT, chat_id=metadata.get("chat_id", ""), - data_dir="data" + data_dir="data", ) else: output = { diff --git a/package-lock.json b/package-lock.json index d17e57180..cba289b7f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "open-webui", - "version": "0.6.15", + "version": "0.6.15c", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "open-webui", - "version": "0.6.15", + "version": "0.6.15c", "dependencies": { "@azure/msal-browser": "^4.5.0", "@codemirror/lang-javascript": "^6.2.2", diff --git a/package.json b/package.json index 7f0d121be..48b10fd28 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "open-webui", - "version": "0.6.15", + "version": "0.6.15c", "private": true, "scripts": { "dev": "npm run pyodide:fetch && vite dev --host",