mirror of
https://github.com/deepseek-ai/DeepSeek-Coder
synced 2025-01-23 19:07:17 +00:00
731 lines
30 KiB
Python
731 lines
30 KiB
Python
import contextlib
|
|
import faulthandler
|
|
import io
|
|
import multiprocessing
|
|
import os
|
|
import platform
|
|
import signal
|
|
import random
|
|
import subprocess
|
|
import tempfile
|
|
import gzip
|
|
import json
|
|
from typing import *
|
|
import traceback
|
|
|
|
java_exec = ""
|
|
node_exec = ""
|
|
tsc_exec = ""
|
|
go_exec = ""
|
|
php_exec = ""
|
|
cs_exec = ""
|
|
|
|
def check_correctness(
|
|
task_id: str,
|
|
sample: dict,
|
|
language_type: str,
|
|
timeout: float = 3.0,
|
|
tmp_dir: str = None,
|
|
completion_id: Optional[int] = None,
|
|
) -> Dict:
|
|
"""
|
|
Evaluates the functional correctness of a completion by running the test
|
|
suite provided in the problem.
|
|
"""
|
|
|
|
def unsafe_execute(tmp_dir):
|
|
random_id = random.randint(1, 100000)
|
|
if "python" in language_type.lower():
|
|
with create_tempdir():
|
|
|
|
# These system calls are needed when cleaning up tempdir.
|
|
import os
|
|
import shutil
|
|
rmtree = shutil.rmtree
|
|
rmdir = os.rmdir
|
|
chdir = os.chdir
|
|
|
|
# Disable functionalities that can make destructive changes to the test.
|
|
reliability_guard()
|
|
|
|
try:
|
|
exec_globals = {}
|
|
with swallow_io():
|
|
with time_limit(timeout):
|
|
# WARNING
|
|
# This program exists to execute untrusted model-generated code. Although
|
|
# it is highly unlikely that model-generated code will do something overtly
|
|
# malicious in response to this test suite, model-generated code may act
|
|
# destructively due to a lack of model capability or alignment.
|
|
# Users are strongly encouraged to sandbox this evaluation suite so that it
|
|
# does not perform destructive actions on their host or network.
|
|
# Once you have read this disclaimer and taken appropriate precautions,
|
|
# uncomment the following line and proceed at your own risk:
|
|
exec(sample["test_code"], exec_globals)
|
|
result.append("passed")
|
|
except TimeoutException:
|
|
result.append("timed out")
|
|
except AssertionError as e:
|
|
result.append(f"failed: AssertionError")
|
|
except BaseException as e:
|
|
result.append(f"failed: {e}")
|
|
#print(sample["test_code"])
|
|
#print(result)
|
|
# Needed for cleaning up.
|
|
shutil.rmtree = rmtree
|
|
os.rmdir = rmdir
|
|
os.chdir = chdir
|
|
|
|
elif "go" in language_type.lower():
|
|
assert tmp_dir is not None, "Go should be evaluated in a dir where necessary module files installed."
|
|
|
|
import os
|
|
import shutil
|
|
|
|
if "tmp" not in tmp_dir:
|
|
tmp_dir = os.path.join(tmp_dir, "tmp")
|
|
tmp_dir = os.path.join(tmp_dir, f"{task_id.replace('/', '-')}-{random_id}")
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
origin_path = os.getcwd()
|
|
os.chdir(tmp_dir)
|
|
open(f"main_test.go", 'w').write(sample["test_code"])
|
|
try:
|
|
exec_result = None
|
|
with time_limit(timeout):
|
|
# WARNING
|
|
# This program exists to execute untrusted model-generated code. Although
|
|
# it is highly unlikely that model-generated code will do something overtly
|
|
# malicious in response to this test suite, model-generated code may act
|
|
# destructively due to a lack of model capability or alignment.
|
|
# Users are strongly encouraged to sandbox this evaluation suite so that it
|
|
# does not perform destructive actions on their host or network.
|
|
# Once you have read this disclaimer and taken appropriate precautions,
|
|
# uncomment the following line and proceed at your own risk:
|
|
exec_result = subprocess.run([f"{go_exec}go", "test", f"-timeout={timeout}s", "main_test.go"], timeout=timeout, capture_output=True)
|
|
|
|
if exec_result.returncode == 0:
|
|
result.append("passed")
|
|
else:
|
|
if exec_result.stderr:
|
|
try:
|
|
err = exec_result.stderr.decode()
|
|
except:
|
|
err = exec_result.stderr
|
|
else:
|
|
try:
|
|
err = exec_result.stdout.decode()
|
|
except:
|
|
err = exec_result.stdout
|
|
result.append(f"failed: {err}")
|
|
|
|
except TimeoutException:
|
|
result.append("timed out")
|
|
os.chdir(origin_path)
|
|
shutil.rmtree(tmp_dir)
|
|
elif "js" in language_type.lower():
|
|
import os
|
|
import shutil
|
|
|
|
if "tmp" not in tmp_dir:
|
|
tmp_dir = os.path.join(tmp_dir, "tmp")
|
|
tmp_dir = os.path.join(tmp_dir, f"{task_id.replace('/', '-')}-{random_id}")
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
origin_path = os.getcwd()
|
|
os.chdir(tmp_dir)
|
|
open(f"test.js", 'w').write(sample["test_code"])
|
|
try:
|
|
exec_result = None
|
|
with time_limit(timeout):
|
|
# WARNING
|
|
# This program exists to execute untrusted model-generated code. Although
|
|
# it is highly unlikely that model-generated code will do something overtly
|
|
# malicious in response to this test suite, model-generated code may act
|
|
# destructively due to a lack of model capability or alignment.
|
|
# Users are strongly encouraged to sandbox this evaluation suite so that it
|
|
# does not perform destructive actions on their host or network.
|
|
# Once you have read this disclaimer and taken appropriate precautions,
|
|
# uncomment the following line and proceed at your own risk:
|
|
exec_result = subprocess.run([f"{node_exec}node", "test.js"], timeout=timeout, capture_output=True)
|
|
|
|
if exec_result.stderr.decode():
|
|
err = exec_result.stderr.decode()
|
|
result.append(f"failed: {err}")
|
|
elif exec_result.stdout.decode():
|
|
err = exec_result.stdout.decode()
|
|
result.append(f"failed: {err}")
|
|
else:
|
|
result.append("passed")
|
|
|
|
except TimeoutException:
|
|
result.append("timed out")
|
|
os.chdir(origin_path)
|
|
shutil.rmtree(tmp_dir)
|
|
elif "cpp" in language_type.lower():
|
|
import os
|
|
import shutil
|
|
origin_path = os.getcwd()
|
|
if "tmp" not in tmp_dir:
|
|
tmp_dir = os.path.join(tmp_dir, "tmp")
|
|
tmp_dir = os.path.join(tmp_dir, f"{task_id.replace('/', '-')}-{random_id}")
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
|
|
os.chdir(tmp_dir)
|
|
open(f"test.cpp", 'w').write(sample["test_code"])
|
|
if "162" in task_id:
|
|
compilation_result = subprocess.run(["/usr/bin/g++", "-std=c++17", "test.cpp", "-lcrypto", "-lssl"],
|
|
timeout=timeout,
|
|
capture_output=True)
|
|
else:
|
|
compilation_result = subprocess.run(["/usr/bin/g++", "-std=c++17", "test.cpp"], timeout=timeout,
|
|
capture_output=True)
|
|
if compilation_result.returncode != 0:
|
|
if compilation_result.stderr:
|
|
err = compilation_result.stderr.decode()
|
|
else:
|
|
err = compilation_result.stdout.decode()
|
|
result.append(f"failed: compilation error: {err}")
|
|
else:
|
|
try:
|
|
exec_result = None
|
|
with time_limit(timeout):
|
|
# WARNING
|
|
# This program exists to execute untrusted model-generated code. Although
|
|
# it is highly unlikely that model-generated code will do something overtly
|
|
# malicious in response to this test suite, model-generated code may act
|
|
# destructively due to a lack of model capability or alignment.
|
|
# Users are strongly encouraged to sandbox this evaluation suite so that it
|
|
# does not perform destructive actions on their host or network.
|
|
# Once you have read this disclaimer and taken appropriate precautions,
|
|
# uncomment the following line and proceed at your own risk:
|
|
exec_result = subprocess.run(["./a.out"], timeout=timeout, capture_output=True)
|
|
|
|
if exec_result.returncode == 0:
|
|
result.append("passed")
|
|
else:
|
|
if exec_result.stderr:
|
|
try:
|
|
err = exec_result.stderr.decode()
|
|
except:
|
|
err = exec_result.stderr
|
|
else:
|
|
try:
|
|
err = exec_result.stdout.decode()
|
|
except:
|
|
err = exec_result.stdout
|
|
result.append(f"failed: {err}")
|
|
except TimeoutException:
|
|
result.append("timed out")
|
|
#print(result[-1])
|
|
#print(sample["test_code"])
|
|
os.chdir(origin_path)
|
|
shutil.rmtree(tmp_dir)
|
|
elif "php" in language_type.lower():
|
|
import os
|
|
import shutil
|
|
origin_path = os.getcwd()
|
|
if "tmp" not in tmp_dir:
|
|
tmp_dir = os.path.join(tmp_dir, "tmp")
|
|
tmp_dir = os.path.join(tmp_dir, f"{task_id.replace('/', '-')}-{random_id}")
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
|
|
os.chdir(tmp_dir)
|
|
open(f"test.php", 'w').write(sample["test_code"])
|
|
try:
|
|
exec_result = None
|
|
with time_limit(timeout):
|
|
cmd = f"{php_exec}php -f test.php"
|
|
exec_result = subprocess.run(cmd, timeout=timeout, capture_output=True, shell=True)
|
|
|
|
if exec_result.returncode == 0:
|
|
result.append("passed")
|
|
else:
|
|
if exec_result.stderr:
|
|
try:
|
|
err = exec_result.stderr.decode()
|
|
except:
|
|
err = exec_result.stderr
|
|
else:
|
|
try:
|
|
err = exec_result.stdout.decode()
|
|
except:
|
|
err = exec_result.stdout
|
|
result.append(f"failed: {err}")
|
|
except TimeoutException:
|
|
result.append("timed out")
|
|
print(result[-1])
|
|
print(sample["test_code"])
|
|
os.chdir(origin_path)
|
|
shutil.rmtree(tmp_dir)
|
|
elif "sh" in language_type.lower():
|
|
import os
|
|
import shutil
|
|
origin_path = os.getcwd()
|
|
if "tmp" not in tmp_dir:
|
|
tmp_dir = os.path.join(tmp_dir, "tmp")
|
|
tmp_dir = os.path.join(tmp_dir, f"{task_id.replace('/', '-')}-{random_id}")
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
|
|
os.chdir(tmp_dir)
|
|
open(f"test.sh", 'w').write(sample["test_code"])
|
|
try:
|
|
exec_result = None
|
|
with time_limit(timeout):
|
|
cmd = "/bin/bash test.sh"
|
|
exec_result = subprocess.run(cmd, timeout=10, capture_output=True, shell=True)
|
|
|
|
if exec_result.returncode == 0:
|
|
result.append("passed")
|
|
else:
|
|
if exec_result.stderr:
|
|
try:
|
|
err = exec_result.stderr.decode()
|
|
except:
|
|
err = exec_result.stderr
|
|
else:
|
|
try:
|
|
err = exec_result.stdout.decode()
|
|
except:
|
|
err = exec_result.stdout
|
|
result.append(f"failed: {err}")
|
|
except TimeoutException:
|
|
result.append("timed out")
|
|
#print(result[-1])
|
|
#print(sample["test_code"])
|
|
os.chdir(origin_path)
|
|
shutil.rmtree(tmp_dir)
|
|
elif "ts" in language_type.lower():
|
|
import os
|
|
import shutil
|
|
origin_path = os.getcwd()
|
|
if "tmp" not in tmp_dir:
|
|
tmp_dir = os.path.join(tmp_dir, "tmp")
|
|
tmp_dir = os.path.join(tmp_dir, f"{task_id.replace('/', '-')}-{random_id}")
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
|
|
os.chdir(tmp_dir)
|
|
env = {"PATH": f"{node_exec}:" + subprocess.os.environ["PATH"]}
|
|
open(f"test.ts", 'w').write(sample["test_code"])
|
|
cmd = f"{tsc_exec}tsc test.ts --target ES2015 --lib ES2015,DOM"
|
|
compilation_result = subprocess.run(cmd, timeout=timeout, capture_output=True, env=env, shell=True)
|
|
if compilation_result.returncode != 0:
|
|
if compilation_result.stderr:
|
|
err = compilation_result.stderr.decode()
|
|
else:
|
|
err = compilation_result.stdout.decode()
|
|
result.append(f"failed: compilation error: {err}")
|
|
else:
|
|
try:
|
|
exec_result = None
|
|
with time_limit(timeout):
|
|
exec_result = subprocess.run([f"{node_exec}node", "test.js"], timeout=timeout, capture_output=True)
|
|
|
|
if exec_result.returncode == 0:
|
|
result.append("passed")
|
|
else:
|
|
if exec_result.stderr:
|
|
try:
|
|
err = exec_result.stderr.decode()
|
|
except:
|
|
err = exec_result.stderr
|
|
else:
|
|
try:
|
|
err = exec_result.stdout.decode()
|
|
except:
|
|
err = exec_result.stdout
|
|
result.append(f"failed: {err}")
|
|
except TimeoutException:
|
|
result.append("timed out")
|
|
if result[-1] != "passed":
|
|
env = {"PATH": f"{node_exec}:" + subprocess.os.environ["PATH"]}
|
|
cmd = f"{tsc_exec}tsc test.ts"
|
|
compilation_result = subprocess.run(cmd, timeout=timeout, capture_output=True, env=env, shell=True)
|
|
if compilation_result.returncode != 0:
|
|
if compilation_result.stderr:
|
|
err = compilation_result.stderr.decode()
|
|
else:
|
|
err = compilation_result.stdout.decode()
|
|
result[-1] = f"failed: compilation error: {err}"
|
|
else:
|
|
try:
|
|
exec_result = None
|
|
with time_limit(timeout):
|
|
exec_result = subprocess.run([f"{node_exec}node", "test.js"], timeout=timeout, capture_output=True)
|
|
|
|
if exec_result.returncode == 0:
|
|
result[-1] = "passed"
|
|
else:
|
|
if exec_result.stderr:
|
|
try:
|
|
err = exec_result.stderr.decode()
|
|
except:
|
|
err = exec_result.stderr
|
|
else:
|
|
try:
|
|
err = exec_result.stdout.decode()
|
|
except:
|
|
err = exec_result.stdout
|
|
result[-1] = f"failed: {err}"
|
|
except TimeoutException:
|
|
result[-1] = "timed out"
|
|
|
|
os.chdir(origin_path)
|
|
shutil.rmtree(tmp_dir)
|
|
elif "cs" in language_type.lower():
|
|
import os
|
|
import shutil
|
|
origin_path = os.getcwd()
|
|
if "tmp" not in tmp_dir:
|
|
tmp_dir = os.path.join(tmp_dir, "tmp")
|
|
tmp_dir = os.path.join(tmp_dir, f"{task_id.replace('/', '-')}-{random_id}")
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
os.chdir(tmp_dir)
|
|
open(f"Program.cs", 'w').write(sample["test_code"])
|
|
cmd = f"{cs_exec}mcs -d:DEBUG Program.cs"
|
|
compilation_result = subprocess.run(cmd, shell=True, capture_output=True)
|
|
if compilation_result.returncode != 0:
|
|
if compilation_result.stderr:
|
|
err = compilation_result.stderr.decode()
|
|
else:
|
|
err = compilation_result.stdout.decode()
|
|
result.append(f"failed: compilation error: {err}")
|
|
else:
|
|
try:
|
|
exec_result = None
|
|
cmd = f"{cs_exec}mono Program.exe"
|
|
env = dict(MONO_TRACE_LISTENER="Console.Error")
|
|
with time_limit(timeout):
|
|
exec_result = subprocess.run(cmd, timeout=timeout, shell=True, capture_output=True, env=env)
|
|
|
|
if "Fail" not in exec_result.stderr.decode():
|
|
result.append("passed")
|
|
else:
|
|
if exec_result.stderr:
|
|
try:
|
|
err = exec_result.stderr.decode()
|
|
except:
|
|
err = exec_result.stderr
|
|
else:
|
|
try:
|
|
err = exec_result.stdout.decode()
|
|
except:
|
|
err = exec_result.stdout
|
|
result.append(f"failed: {err}")
|
|
except TimeoutException:
|
|
result.append("timed out")
|
|
except Exception as e:
|
|
result.append(f"failed: {e}")
|
|
os.chdir(origin_path)
|
|
shutil.rmtree(tmp_dir)
|
|
elif "rust" in language_type.lower():
|
|
import os
|
|
|
|
WD: str = os.path.dirname(os.path.abspath(__file__))
|
|
RUST_DIR: str = os.path.join(WD, "rust")
|
|
RUST_SRC: str = os.path.join(RUST_DIR, "src")
|
|
RUST_BIN: str = os.path.join(RUST_SRC, "bin")
|
|
RUST_TMP_DIR: str = os.path.join(RUST_DIR, "tmp")
|
|
RUST_LOGS: str = os.path.join(RUST_TMP_DIR, "logs")
|
|
RUST_EXT: str = ".rs"
|
|
|
|
# Create mandatory tmp directories
|
|
os.makedirs(RUST_TMP_DIR, exist_ok=True)
|
|
os.makedirs(RUST_LOGS, exist_ok=True)
|
|
os.makedirs(RUST_SRC, exist_ok=True)
|
|
os.makedirs(RUST_BIN, exist_ok=True)
|
|
|
|
with tempfile.NamedTemporaryFile(dir = RUST_BIN, delete=False) as f:
|
|
#temporal file name
|
|
file_prefix = sample["task_id"].lower().replace("/", "_")
|
|
file_name:str = file_prefix +RUST_EXT
|
|
|
|
os.rename(f.name, os.path.join(RUST_BIN, file_name))
|
|
|
|
# Sample to pure Rust function
|
|
rust_code: str = sample["test_code"]
|
|
|
|
# dump the rust source code in the target temporal file
|
|
f.write(rust_code.encode('utf-8'))
|
|
|
|
# Proceed towards Rust binaries compilation. Therefore move to Rust module root dir.
|
|
os.chdir(RUST_DIR)
|
|
|
|
# Two possible outcomes
|
|
# Pass OR Fail compilation
|
|
log_filename: str = file_prefix + ".jsonl"
|
|
log_path: str = os.path.join(RUST_LOGS, log_filename)
|
|
cargo_check: str = "cargo check --bin " + file_prefix + " --message-format json >> " + log_path
|
|
# Compilation build status
|
|
returned_val_compilation: int
|
|
|
|
# Overwrite file content
|
|
if os.path.exists(log_path):
|
|
if(file_size := os.path.getsize(log_path)) >= 0:
|
|
os.remove(log_path)
|
|
returned_val_compilation = os.system(cargo_check)
|
|
|
|
else:
|
|
returned_val_compilation = os.system(cargo_check)
|
|
|
|
# 0 means success
|
|
if returned_val_compilation == 0:
|
|
|
|
#Execution pipeline
|
|
cargo_test: str = "cargo test --bin " +file_prefix+ " --message-format json >> " + log_path
|
|
returned_val_execution = os.system(cargo_test)
|
|
|
|
if returned_val_execution == 0:
|
|
result.append("passed")
|
|
else:
|
|
result.append(f"failed: execution error")
|
|
|
|
else:
|
|
result.append(f"failed: compilation error")
|
|
|
|
|
|
elif "java" in language_type.lower():
|
|
assert tmp_dir is not None, "Java should be evaluated in a temporary dir."
|
|
|
|
import os
|
|
import shutil
|
|
|
|
if "tmp" not in tmp_dir:
|
|
tmp_dir = os.path.join(tmp_dir, "tmp")
|
|
tmp_dir = os.path.join(tmp_dir, f"{task_id.replace('/', '-')}-{random_id}")
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
open(os.path.join(tmp_dir, "Problem.java"), 'w').write(sample["test_code"])
|
|
origin_path = os.getcwd()
|
|
os.system(f"cp ./javatuples-1.2.jar {tmp_dir}/")
|
|
os.chdir(tmp_dir)
|
|
res = "failed: unknown error"
|
|
compile_returncode = -1
|
|
for _ in range(5):
|
|
try:
|
|
cmd = f"{java_exec}javac -cp javatuples-1.2.jar Problem.java"
|
|
compilation_result = subprocess.run(cmd, timeout=60, capture_output=True, shell=True)
|
|
compile_returncode = compilation_result.returncode
|
|
break
|
|
except subprocess.TimeoutExpired as e:
|
|
continue
|
|
if compile_returncode != 0:
|
|
res = "failed: compilation error"
|
|
else:
|
|
exec_result = None
|
|
try:
|
|
# WARNING
|
|
# This program exists to execute untrusted model-generated code. Although
|
|
# it is highly unlikely that model-generated code will do something overtly
|
|
# malicious in response to this test suite, model-generated code may act
|
|
# destructively due to a lack of model capability or alignment.
|
|
# Users are strongly encouraged to sandbox this evaluation suite so that it
|
|
# does not perform destructive actions on their host or network.
|
|
# Once you have read this disclaimer and taken appropriate precautions,
|
|
# uncomment the following line and proceed at your own risk:
|
|
cmd = f"{java_exec}java -ea -cp .:javatuples-1.2.jar Problem"
|
|
exec_result = subprocess.run(cmd, timeout=timeout, capture_output=True, shell=True)
|
|
if exec_result.returncode == 0:
|
|
res = "passed"
|
|
elif exec_result.returncode == 1:
|
|
if "AssertionError" in exec_result.stderr.decode('unicode-escape'):
|
|
res = "failed: wrong answer"
|
|
else:
|
|
res = f"failed: {exec_result.stderr.decode()}"
|
|
except subprocess.TimeoutExpired as e:
|
|
res = "time out"
|
|
except BaseException as e:
|
|
res = f"failed: {e}"
|
|
|
|
result.append(res)
|
|
os.chdir(origin_path)
|
|
shutil.rmtree(tmp_dir)
|
|
|
|
manager = multiprocessing.Manager()
|
|
result = manager.list()
|
|
|
|
p = multiprocessing.Process(target=unsafe_execute, args=(tmp_dir,))
|
|
p.start()
|
|
p.join(timeout=timeout + 1)
|
|
if p.is_alive():
|
|
p.kill()
|
|
|
|
if not result:
|
|
result.append("timed out")
|
|
|
|
return {
|
|
"task_id" : task_id,
|
|
"completion_id": completion_id,
|
|
"result" : result[0],
|
|
"passed" : result[0] == "passed",
|
|
"finish" : -1 if "finish" not in sample else sample["finish"],
|
|
"code" : sample["test_code"]
|
|
}
|
|
|
|
# Copyright (c) OpenAI (https://openai.com)
|
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
# THE SOFTWARE.
|
|
# ============================================================================
|
|
@contextlib.contextmanager
|
|
def time_limit(seconds: float):
|
|
def signal_handler(signum, frame):
|
|
raise TimeoutException("Timed out!")
|
|
|
|
signal.setitimer(signal.ITIMER_REAL, seconds)
|
|
signal.signal(signal.SIGALRM, signal_handler)
|
|
try:
|
|
yield
|
|
finally:
|
|
signal.setitimer(signal.ITIMER_REAL, 0)
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def swallow_io():
|
|
stream = WriteOnlyStringIO()
|
|
with contextlib.redirect_stdout(stream):
|
|
with contextlib.redirect_stderr(stream):
|
|
with redirect_stdin(stream):
|
|
yield
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def create_tempdir():
|
|
with tempfile.TemporaryDirectory() as dirname:
|
|
with chdir(dirname):
|
|
yield dirname
|
|
|
|
|
|
class TimeoutException(Exception):
|
|
pass
|
|
|
|
|
|
class WriteOnlyStringIO(io.StringIO):
|
|
""" StringIO that throws an exception when it's read from """
|
|
|
|
def read(self, *args, **kwargs):
|
|
raise IOError
|
|
|
|
def readline(self, *args, **kwargs):
|
|
raise IOError
|
|
|
|
def readlines(self, *args, **kwargs):
|
|
raise IOError
|
|
|
|
def readable(self, *args, **kwargs):
|
|
""" Returns True if the IO object can be read. """
|
|
return False
|
|
|
|
|
|
class redirect_stdin(contextlib._RedirectStream): # type: ignore
|
|
_stream = 'stdin'
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def chdir(root):
|
|
if root == ".":
|
|
yield
|
|
return
|
|
cwd = os.getcwd()
|
|
os.chdir(root)
|
|
try:
|
|
yield
|
|
except BaseException as exc:
|
|
raise exc
|
|
finally:
|
|
os.chdir(cwd)
|
|
|
|
|
|
def reliability_guard(maximum_memory_bytes: Optional[int] = None):
|
|
"""
|
|
This disables various destructive functions and prevents the generated code
|
|
from interfering with the test (e.g. fork bomb, killing other processes,
|
|
removing filesystem files, etc.)
|
|
|
|
WARNING
|
|
This function is NOT a security sandbox. Untrusted code, including, model-
|
|
generated code, should not be blindly executed outside of one. See the
|
|
Codex paper for more information about OpenAI's code sandbox, and proceed
|
|
with caution.
|
|
"""
|
|
|
|
if maximum_memory_bytes is not None:
|
|
import resource
|
|
resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
|
|
resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))
|
|
if not platform.uname().system == 'Darwin':
|
|
resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))
|
|
|
|
faulthandler.disable()
|
|
|
|
import builtins
|
|
builtins.exit = None
|
|
builtins.quit = None
|
|
|
|
import os
|
|
os.environ['OMP_NUM_THREADS'] = '1'
|
|
|
|
os.kill = None
|
|
os.system = None
|
|
os.putenv = None
|
|
os.remove = None
|
|
os.removedirs = None
|
|
os.rmdir = None
|
|
os.fchdir = None
|
|
os.setuid = None
|
|
os.fork = None
|
|
os.forkpty = None
|
|
os.killpg = None
|
|
os.rename = None
|
|
os.renames = None
|
|
os.truncate = None
|
|
os.replace = None
|
|
os.unlink = None
|
|
os.fchmod = None
|
|
os.fchown = None
|
|
os.chmod = None
|
|
os.chown = None
|
|
os.chroot = None
|
|
os.fchdir = None
|
|
os.lchflags = None
|
|
os.lchmod = None
|
|
os.lchown = None
|
|
os.getcwd = None
|
|
os.chdir = None
|
|
|
|
import shutil
|
|
shutil.rmtree = None
|
|
shutil.move = None
|
|
shutil.chown = None
|
|
|
|
import subprocess
|
|
subprocess.Popen = None # type: ignore
|
|
|
|
__builtins__['help'] = None
|
|
|
|
import sys
|
|
sys.modules['ipdb'] = None
|
|
sys.modules['joblib'] = None
|
|
sys.modules['resource'] = None
|
|
sys.modules['psutil'] = None
|
|
sys.modules['tkinter'] = None |