#!/usr/bin/env python3
# Only use stdlib to avoid needing to install dependencies
import ast
import sys
import urllib.request
import os


def find_env_vars(code, filename):
    tree = ast.parse(code)
    env_vars_found = {}  # Dictionary to store env vars, filenames, defaults, and types

    class EnvVarVisitor(ast.NodeVisitor):
        def __init__(self):
            self.current_env_vars = {}  # Store env vars with potential defaults and types

        def visit_Subscript(self, node):
            if isinstance(node.value, ast.Attribute):
                if (
                    isinstance(node.value.value, ast.Name)
                    and node.value.value.id == "os"
                    and node.value.attr == "environ"
                ):
                    if isinstance(node.slice, ast.Constant):
                        env_var_name = node.slice.value
                        if env_var_name not in self.current_env_vars:
                            self.current_env_vars[env_var_name] = {"default": None, "type": "str"} # Default type str for os.environ
                    elif isinstance(node.slice, ast.BinOp):
                        # Handle dynamically constructed env var names like os.environ["VAR_" + "NAME"]
                        env_var_name = ast.unparse(node.slice)
                        if env_var_name not in self.current_env_vars:
                            self.current_env_vars[env_var_name] = {"default": None, "type": "str"} # Default type str for os.environ
            self.generic_visit(node)

        def visit_Call(self, node):
            if isinstance(node.func, ast.Attribute):
                # Check for os.getenv("VAR_NAME", "default_value")
                if (
                    isinstance(node.func.value, ast.Name)
                    and node.func.value.id == "os"
                    and node.func.attr == "getenv"
                ):
                    if node.args and isinstance(node.args[0], ast.Constant):
                        env_var_name = node.args[0].value
                        default_value = None
                        var_type = "str" # Default type str for os.getenv
                        if len(node.args) > 1:
                            default_node = node.args[1]
                            if isinstance(default_node, ast.Constant):
                                default_value = default_node.value
                                var_type = "str" # Still str if default is constant string
                            elif isinstance(default_node, ast.Name) and default_node.id == 'None': # Check for None literal
                                default_value = None
                                var_type = "str" # Still str even if default is None in getenv
                            else:  # Capture other default expressions as unparsed code
                                default_value = ast.unparse(default_node)
                                var_type = "str" # Assume str if complex default in getenv
                        if env_var_name not in self.current_env_vars:
                            self.current_env_vars[env_var_name] = {"default": default_value, "type": var_type}

                # Check for os.environ.get("VAR_NAME", "default_value")
                elif (
                    isinstance(node.func.value, ast.Attribute)
                    and isinstance(node.func.value.value, ast.Name)
                    and node.func.value.value.id == "os"
                    and node.func.value.attr == "environ"
                    and node.func.attr == "get"
                ):
                    if node.args and isinstance(node.args[0], ast.Constant):
                        env_var_name = node.args[0].value
                        default_value = None
                        var_type = "str" # Default type str for os.environ.get
                        if len(node.args) > 1:
                            default_node = node.args[1]
                            if isinstance(default_node, ast.Constant):
                                default_value = default_node.value
                                var_type = "str" # Still str if default is constant string
                            elif isinstance(default_node, ast.Name) and default_node.id == 'None': # Check for None literal
                                default_value = None
                                var_type = "str" # Still str even if default is None in get
                            else:  # Capture other default expressions as unparsed code
                                default_value = ast.unparse(default_node)
                                var_type = "str" # Assume str if complex default in get
                        if env_var_name not in self.current_env_vars:
                            self.current_env_vars[env_var_name] = {"default": default_value, "type": var_type}

            elif isinstance(node.func, ast.Name) and node.func.id == "PersistentConfig":
                if node.args and isinstance(node.args[0], ast.Constant):
                    env_var_name = node.args[0].value
                    default_value = None
                    var_type = "str" # Assume str as base type for PersistentConfig, will refine
                    if len(node.args) > 2:  # Default value is the third argument
                        default_node = node.args[2]
                        if isinstance(default_node, ast.Constant):
                            default_value = default_node.value
                            if isinstance(default_value, bool):
                                var_type = "bool"
                            elif isinstance(default_value, int):
                                var_type = "int"
                            elif isinstance(default_value, float):
                                var_type = "float"
                            else:
                                var_type = "str" # String constant
                        elif isinstance(default_node, ast.List):
                            default_value = ast.unparse(default_node)
                            var_type = "list[dict]" # Assuming list of dicts for DEFAULT_PROMPT_SUGGESTIONS case, refine if needed
                        elif isinstance(default_node, ast.Dict):
                            default_value = ast.unparse(default_node)
                            var_type = "dict"
                        elif isinstance(default_node, ast.Tuple):
                            default_value = ast.unparse(default_node)
                            var_type = "tuple"
                        elif isinstance(default_node, ast.Set):
                            default_value = ast.unparse(default_node)
                            var_type = "set"
                        elif isinstance(default_node, ast.Name):  # Capture variable name as default
                            default_value = default_node.id
                            var_type = "str" # Assume str if variable default
                        elif isinstance(default_node, ast.Call):  # Check if default_node is a Call (function call)
                            if isinstance(default_node.func, ast.Name) and default_node.func.id == "int":
                                var_type = "int"
                            elif isinstance(default_node.func, ast.Name) and default_node.func.id == "float":
                                var_type = "float"
                            elif isinstance(default_node.func, ast.Name) and default_node.func.id == "bool":
                                var_type = "bool"
                            elif isinstance(default_node.func, ast.Name) and default_node.func.id == "str":
                                var_type = "str"
                            elif isinstance(default_node.func, ast.Attribute) and default_node.func.attr == 'getenv' and isinstance(default_node.func.value, ast.Name) and default_node.func.value.id == 'os':
                                if len(default_node.args) > 1 and isinstance(default_node.args[1], ast.Constant):
                                    default_value = default_node.args[1].value  # Extract default from os.getenv within PersistentConfig
                                    var_type = "str" # Still string from getenv
                                elif len(default_node.args) == 1:
                                    default_value = None  # No default in os.getenv
                                    var_type = "str" # Still string from getenv
                            elif isinstance(default_node.func, ast.Attribute) and default_node.func.attr == 'get' and isinstance(default_node.func.value, ast.Attribute) and default_node.func.value.attr == 'environ' and isinstance(default_node.func.value.value, ast.Name) and default_node.func.value.value.id == 'os':
                                if len(default_node.args) > 1 and isinstance(default_node.args[1], ast.Constant):
                                    default_value = default_node.args[1].value  # Extract default from os.environ.get within PersistentConfig
                                    var_type = "str" # Still string from getenv
                                elif len(default_node.args) == 1:
                                    default_value = None  # No default in os.environ.get
                                    var_type = "str" # Still string from getenv
                            else:  # Capture other function calls as unparsed code
                                default_value = ast.unparse(default_node)
                                var_type = "str" # Assume str for complex call
                        elif isinstance(default_node, ast.Compare):  # Handle boolean expressions like 'os.getenv(...) == "true"'
                            default_value = ast.unparse(default_node)  # Capture the whole boolean expression as unparsed code
                            var_type = "bool" # Likely boolean from comparison

                        elif isinstance(default_node, ast.Name) and default_node.id == 'None':  # Check for None literal in PersistentConfig
                            default_value = None
                            var_type = "str" # Could be anything, but let's say str as base

                        elif default_node:  # Capture any other default expressions as unparsed code
                            default_value = ast.unparse(default_node)
                            var_type = "str" # Assume str for other expressions

                    if env_var_name not in self.current_env_vars:
                        self.current_env_vars[env_var_name] = {"default": default_value, "type": var_type}

            self.generic_visit(node)

        def finalize_env_vars(self, filename, env_vars_found):
            for env_var, context in self.current_env_vars.items(): # context is now a dict with default and type
                if env_var not in env_vars_found:
                    env_vars_found[env_var] = {"files": set(), "default": None, "type": "str"} # Initialize type as str if not found before
                env_vars_found[env_var]["files"].add(filename)
                if env_vars_found[env_var]["default"] is None:  # Only set default if not already set
                    env_vars_found[env_var]["default"] = context["default"]
                if env_vars_found[env_var]["type"] == "str": # Only set type if still default str, otherwise keep more specific type
                     env_vars_found[env_var]["type"] = context["type"]


    visitor = EnvVarVisitor()
    visitor.visit(tree)
    visitor.finalize_env_vars(filename, env_vars_found)  # Pass filename to finalize
    return env_vars_found


def main():
    if len(sys.argv) < 2:
        print("Usage: scan-missing-env-vars.py <git_ref>")
        print("Example: scan-missing-env-vars.py main")
        sys.exit(0)

    git_ref = sys.argv[1]
    print(f"Scanning git ref: {git_ref}")

    urls = [
        f"https://raw.githubusercontent.com/open-webui/open-webui/{git_ref}/backend/open_webui/config.py",
        f"https://raw.githubusercontent.com/open-webui/open-webui/{git_ref}/backend/open_webui/env.py",
        f"https://raw.githubusercontent.com/open-webui/open-webui/{git_ref}/backend/open_webui/migrations/env.py",
    ]
    filenames = ["config.py", "env.py", "migrations/env.py"]

    all_env_vars_with_context = {}  # Changed to dictionary to store context

    try:
        for url, filename in zip(urls, filenames):
            with urllib.request.urlopen(url) as response:
                contents = response.read().decode("utf-8")

            file_env_vars = find_env_vars(contents, filename)  # Pass filename here
            for env_var, context in file_env_vars.items():  # context is now a dict
                if env_var not in all_env_vars_with_context:
                    all_env_vars_with_context[env_var] = {"files": set(), "default": None, "type": "str"} # Initialize type as str
                all_env_vars_with_context[env_var]["files"].update(context["files"])  # Merge file sets
                if all_env_vars_with_context[env_var]["default"] is None:  # Only set default if not already set
                    all_env_vars_with_context[env_var]["default"] = context["default"]
                if all_env_vars_with_context[env_var]["type"] == "str": # Only update type if still default str, keep more specific type
                    all_env_vars_with_context[env_var]["type"] = context["type"]


    except urllib.error.URLError as e:
        print(f"Failed to open URL: {e}")
        sys.exit(1)

    ignored_env_vars = {
        "FROM_INIT_PY",
        "GLOBAL_LOG_LEVEL",
        "http_proxy",
        "https_proxy",
        "no_proxy",
        "PORT",
        "WEBUI_JWT_SECRET_KEY",
    }

    documented_env_vars = set()
    script_dir = os.path.dirname(os.path.abspath(__file__))
    docs_file = os.path.join(
        script_dir, *[part for part in ["..", "docs", "getting-started", "env-configuration.md"]]
    )

    try:
        with open(docs_file, "r", encoding="utf-8", errors="ignore") as f:
            for line in f:
                if line.startswith("#### `"):
                    env_var = line.split("`")[1]
                    documented_env_vars.add(env_var)
    except FileNotFoundError as e:
        print(f"Failed to open file: {e}")
        sys.exit(1)

    print("\nEnvironment variables accessed but not documented:")
    not_documented_env_vars_with_context = {
        env_var: context
        for env_var, context in all_env_vars_with_context.items()
        if env_var not in documented_env_vars and env_var not in ignored_env_vars
    }

    persistent_config_vars = {}
    other_undocumented_vars = {}

    for env_var, context in not_documented_env_vars_with_context.items():
        if "config.py" in context["files"]:  # Check if 'config.py' is in the set of files
            persistent_config_vars[env_var] = context
        else:
            other_undocumented_vars[env_var] = context

    def format_default_output(default, var_type):
        if default is None:
            return "(default: None)"
        elif var_type == "list[dict]" or var_type == "dict" or var_type == "tuple" or var_type == "set":
            return f"(default: {default}, type: {var_type})" # Show full default for complex types
        else:
            return f"(default: '{default}', type: {var_type})" # Quote string defaults

    if persistent_config_vars:
        print("\n  PersistentConfig environment variables (accessed in config.py):")
        for env_var in sorted(persistent_config_vars.keys()):
            default_str = format_default_output(persistent_config_vars[env_var]['default'], persistent_config_vars[env_var]['type'])
            print(f"    - {env_var} {default_str}")

    if other_undocumented_vars:
        print("\n  Other undocumented environment variables:")
        for env_var in sorted(other_undocumented_vars.keys()):
            default_str = format_default_output(other_undocumented_vars[env_var]['default'], other_undocumented_vars[env_var]['type'])
            print(
                f"    - {env_var} {default_str} (in files: {', '.join(sorted(other_undocumented_vars[env_var]['files']))})"
            )  # Show files and defaults and types

    if not persistent_config_vars and not other_undocumented_vars:
        print("  None")

    print("\nEnvironment variables documented but not accessed:")
    diff = documented_env_vars - set(all_env_vars_with_context.keys()) - ignored_env_vars  # Use keys of the dict
    for env_var in sorted(diff):
        print(env_var)
    if not diff:
        print("None")


if __name__ == "__main__":
    main()