diff --git a/scripts/scan-missing-env-vars.py b/scripts/scan-missing-env-vars.py index af9f88b..7efbf95 100755 --- a/scripts/scan-missing-env-vars.py +++ b/scripts/scan-missing-env-vars.py @@ -6,12 +6,13 @@ import urllib.request import os -def find_env_vars(code): +def find_env_vars(code, filename): tree = ast.parse(code) + env_vars_found = {} # Dictionary to store env vars, filenames, defaults, and types class EnvVarVisitor(ast.NodeVisitor): def __init__(self): - self.env_vars = set() + self.current_env_vars = {} # Store env vars with potential defaults and types def visit_Subscript(self, node): if isinstance(node.value, ast.Attribute): @@ -21,32 +22,158 @@ def find_env_vars(code): and node.value.attr == "environ" ): if isinstance(node.slice, ast.Constant): - self.env_vars.add(node.slice.value) + env_var_name = node.slice.value + if env_var_name not in self.current_env_vars: + self.current_env_vars[env_var_name] = {"default": None, "type": "str"} # Default type str for os.environ elif isinstance(node.slice, ast.BinOp): # Handle dynamically constructed env var names like os.environ["VAR_" + "NAME"] - self.env_vars.add(ast.unparse(node.slice)) + env_var_name = ast.unparse(node.slice) + if env_var_name not in self.current_env_vars: + self.current_env_vars[env_var_name] = {"default": None, "type": "str"} # Default type str for os.environ self.generic_visit(node) def visit_Call(self, node): if isinstance(node.func, ast.Attribute): + # Check for os.getenv("VAR_NAME", "default_value") if ( isinstance(node.func.value, ast.Name) and node.func.value.id == "os" - and node.func.attr in ("getenv", "get") - ) or ( + and node.func.attr == "getenv" + ): + if node.args and isinstance(node.args[0], ast.Constant): + env_var_name = node.args[0].value + default_value = None + var_type = "str" # Default type str for os.getenv + if len(node.args) > 1: + default_node = node.args[1] + if isinstance(default_node, ast.Constant): + default_value = default_node.value + var_type = "str" # Still str if default is constant string + elif isinstance(default_node, ast.Name) and default_node.id == 'None': # Check for None literal + default_value = None + var_type = "str" # Still str even if default is None in getenv + else: # Capture other default expressions as unparsed code + default_value = ast.unparse(default_node) + var_type = "str" # Assume str if complex default in getenv + if env_var_name not in self.current_env_vars: + self.current_env_vars[env_var_name] = {"default": default_value, "type": var_type} + + # Check for os.environ.get("VAR_NAME", "default_value") + elif ( isinstance(node.func.value, ast.Attribute) and isinstance(node.func.value.value, ast.Name) and node.func.value.value.id == "os" and node.func.value.attr == "environ" and node.func.attr == "get" ): - if isinstance(node.args[0], ast.Constant): - self.env_vars.add(node.args[0].value) + if node.args and isinstance(node.args[0], ast.Constant): + env_var_name = node.args[0].value + default_value = None + var_type = "str" # Default type str for os.environ.get + if len(node.args) > 1: + default_node = node.args[1] + if isinstance(default_node, ast.Constant): + default_value = default_node.value + var_type = "str" # Still str if default is constant string + elif isinstance(default_node, ast.Name) and default_node.id == 'None': # Check for None literal + default_value = None + var_type = "str" # Still str even if default is None in get + else: # Capture other default expressions as unparsed code + default_value = ast.unparse(default_node) + var_type = "str" # Assume str if complex default in get + if env_var_name not in self.current_env_vars: + self.current_env_vars[env_var_name] = {"default": default_value, "type": var_type} + + elif isinstance(node.func, ast.Name) and node.func.id == "PersistentConfig": + if node.args and isinstance(node.args[0], ast.Constant): + env_var_name = node.args[0].value + default_value = None + var_type = "str" # Assume str as base type for PersistentConfig, will refine + if len(node.args) > 2: # Default value is the third argument + default_node = node.args[2] + if isinstance(default_node, ast.Constant): + default_value = default_node.value + if isinstance(default_value, bool): + var_type = "bool" + elif isinstance(default_value, int): + var_type = "int" + elif isinstance(default_value, float): + var_type = "float" + else: + var_type = "str" # String constant + elif isinstance(default_node, ast.List): + default_value = ast.unparse(default_node) + var_type = "list[dict]" # Assuming list of dicts for DEFAULT_PROMPT_SUGGESTIONS case, refine if needed + elif isinstance(default_node, ast.Dict): + default_value = ast.unparse(default_node) + var_type = "dict" + elif isinstance(default_node, ast.Tuple): + default_value = ast.unparse(default_node) + var_type = "tuple" + elif isinstance(default_node, ast.Set): + default_value = ast.unparse(default_node) + var_type = "set" + elif isinstance(default_node, ast.Name): # Capture variable name as default + default_value = default_node.id + var_type = "str" # Assume str if variable default + elif isinstance(default_node, ast.Call): # Check if default_node is a Call (function call) + if isinstance(default_node.func, ast.Name) and default_node.func.id == "int": + var_type = "int" + elif isinstance(default_node.func, ast.Name) and default_node.func.id == "float": + var_type = "float" + elif isinstance(default_node.func, ast.Name) and default_node.func.id == "bool": + var_type = "bool" + elif isinstance(default_node.func, ast.Name) and default_node.func.id == "str": + var_type = "str" + elif isinstance(default_node.func, ast.Attribute) and default_node.func.attr == 'getenv' and isinstance(default_node.func.value, ast.Name) and default_node.func.value.id == 'os': + if len(default_node.args) > 1 and isinstance(default_node.args[1], ast.Constant): + default_value = default_node.args[1].value # Extract default from os.getenv within PersistentConfig + var_type = "str" # Still string from getenv + elif len(default_node.args) == 1: + default_value = None # No default in os.getenv + var_type = "str" # Still string from getenv + elif isinstance(default_node.func, ast.Attribute) and default_node.func.attr == 'get' and isinstance(default_node.func.value, ast.Attribute) and default_node.func.value.attr == 'environ' and isinstance(default_node.func.value.value, ast.Name) and default_node.func.value.value.id == 'os': + if len(default_node.args) > 1 and isinstance(default_node.args[1], ast.Constant): + default_value = default_node.args[1].value # Extract default from os.environ.get within PersistentConfig + var_type = "str" # Still string from getenv + elif len(default_node.args) == 1: + default_value = None # No default in os.environ.get + var_type = "str" # Still string from getenv + else: # Capture other function calls as unparsed code + default_value = ast.unparse(default_node) + var_type = "str" # Assume str for complex call + elif isinstance(default_node, ast.Compare): # Handle boolean expressions like 'os.getenv(...) == "true"' + default_value = ast.unparse(default_node) # Capture the whole boolean expression as unparsed code + var_type = "bool" # Likely boolean from comparison + + elif isinstance(default_node, ast.Name) and default_node.id == 'None': # Check for None literal in PersistentConfig + default_value = None + var_type = "str" # Could be anything, but let's say str as base + + elif default_node: # Capture any other default expressions as unparsed code + default_value = ast.unparse(default_node) + var_type = "str" # Assume str for other expressions + + if env_var_name not in self.current_env_vars: + self.current_env_vars[env_var_name] = {"default": default_value, "type": var_type} + self.generic_visit(node) + def finalize_env_vars(self, filename, env_vars_found): + for env_var, context in self.current_env_vars.items(): # context is now a dict with default and type + if env_var not in env_vars_found: + env_vars_found[env_var] = {"files": set(), "default": None, "type": "str"} # Initialize type as str if not found before + env_vars_found[env_var]["files"].add(filename) + if env_vars_found[env_var]["default"] is None: # Only set default if not already set + env_vars_found[env_var]["default"] = context["default"] + if env_vars_found[env_var]["type"] == "str": # Only set type if still default str, otherwise keep more specific type + env_vars_found[env_var]["type"] = context["type"] + + visitor = EnvVarVisitor() visitor.visit(tree) - return visitor.env_vars + visitor.finalize_env_vars(filename, env_vars_found) # Pass filename to finalize + return env_vars_found def main(): @@ -65,15 +192,24 @@ def main(): ] filenames = ["config.py", "env.py", "migrations/env.py"] - all_env_vars = set() + all_env_vars_with_context = {} # Changed to dictionary to store context try: for url, filename in zip(urls, filenames): with urllib.request.urlopen(url) as response: contents = response.read().decode("utf-8") - for env_var in find_env_vars(contents): - all_env_vars.add(env_var) + file_env_vars = find_env_vars(contents, filename) # Pass filename here + for env_var, context in file_env_vars.items(): # context is now a dict + if env_var not in all_env_vars_with_context: + all_env_vars_with_context[env_var] = {"files": set(), "default": None, "type": "str"} # Initialize type as str + all_env_vars_with_context[env_var]["files"].update(context["files"]) # Merge file sets + if all_env_vars_with_context[env_var]["default"] is None: # Only set default if not already set + all_env_vars_with_context[env_var]["default"] = context["default"] + if all_env_vars_with_context[env_var]["type"] == "str": # Only update type if still default str, keep more specific type + all_env_vars_with_context[env_var]["type"] = context["type"] + + except urllib.error.URLError as e: print(f"Failed to open URL: {e}") sys.exit(1) @@ -91,7 +227,7 @@ def main(): documented_env_vars = set() script_dir = os.path.dirname(os.path.abspath(__file__)) docs_file = os.path.join( - script_dir, *[part for part in ["..", "docs", "getting-started", "advanced-topics", "env-configuration.md"]] + script_dir, *[part for part in ["..", "docs", "getting-started", "env-configuration.md"]] ) try: @@ -105,14 +241,48 @@ def main(): sys.exit(1) print("\nEnvironment variables accessed but not documented:") - not_documented_env_vars = all_env_vars - documented_env_vars - ignored_env_vars - for env_var in sorted(not_documented_env_vars): - print(env_var) - if not not_documented_env_vars: - print("None") + not_documented_env_vars_with_context = { + env_var: context + for env_var, context in all_env_vars_with_context.items() + if env_var not in documented_env_vars and env_var not in ignored_env_vars + } + + persistent_config_vars = {} + other_undocumented_vars = {} + + for env_var, context in not_documented_env_vars_with_context.items(): + if "config.py" in context["files"]: # Check if 'config.py' is in the set of files + persistent_config_vars[env_var] = context + else: + other_undocumented_vars[env_var] = context + + def format_default_output(default, var_type): + if default is None: + return "(default: None)" + elif var_type == "list[dict]" or var_type == "dict" or var_type == "tuple" or var_type == "set": + return f"(default: {default}, type: {var_type})" # Show full default for complex types + else: + return f"(default: '{default}', type: {var_type})" # Quote string defaults + + if persistent_config_vars: + print("\n PersistentConfig environment variables (accessed in config.py):") + for env_var in sorted(persistent_config_vars.keys()): + default_str = format_default_output(persistent_config_vars[env_var]['default'], persistent_config_vars[env_var]['type']) + print(f" - {env_var} {default_str}") + + if other_undocumented_vars: + print("\n Other undocumented environment variables:") + for env_var in sorted(other_undocumented_vars.keys()): + default_str = format_default_output(other_undocumented_vars[env_var]['default'], other_undocumented_vars[env_var]['type']) + print( + f" - {env_var} {default_str} (in files: {', '.join(sorted(other_undocumented_vars[env_var]['files']))})" + ) # Show files and defaults and types + + if not persistent_config_vars and not other_undocumented_vars: + print(" None") print("\nEnvironment variables documented but not accessed:") - diff = documented_env_vars - all_env_vars - ignored_env_vars + diff = documented_env_vars - set(all_env_vars_with_context.keys()) - ignored_env_vars # Use keys of the dict for env_var in sorted(diff): print(env_var) if not diff: @@ -120,4 +290,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main()