DeepSeek-Prover-V1.5/prover/lean/ast_parser.py

1030 lines
48 KiB
Python
Raw Normal View History

2024-08-16 03:33:21 +00:00
import re
line_break_regex = re.compile(r'(?<=\n)')
def process_lean_file(file_contents, byte_idx_1, byte_idx_2):
def get_line(lines, line_number):
if 1 <= line_number <= len(lines):
return lines[line_number - 1]
else:
raise IndexError("Line number out of range")
def convert_pos(lines, byte_idx):
num_bytes = [len(line.encode('utf-8')) for line in lines]
n = 0
for i, num_bytes_in_line in enumerate(num_bytes, start=1):
n += num_bytes_in_line
if n > byte_idx:
line_byte_idx = byte_idx - (n - num_bytes_in_line)
if line_byte_idx == 0:
return i, 1
line = get_line(lines, i)
m = 0
for j, c in enumerate(line, start=1):
m += len(c.encode("utf-8"))
if m >= line_byte_idx:
return i, j + 1
return len(lines), len(lines[-1])
def extract_string_between_positions(lines, byte_idx_1, byte_idx_2):
line_1, column_1 = convert_pos(lines, byte_idx_1)
line_2, column_2 = convert_pos(lines, byte_idx_2)
extracted_string = []
if line_1 == line_2:
# If both positions are in the same line
substring = lines[line_1 - 1][column_1 - 1:column_2 - 1]
extracted_string.append(substring)
else:
# If positions span multiple lines
extracted_string.append(lines[line_1 - 1][column_1 - 1:])
for line in range(line_1 + 1, line_2):
extracted_string.append(lines[line - 1])
extracted_string.append(lines[line_2 - 1][:column_2 - 1])
return ''.join(extracted_string)
def convert_line_col_to_char_idx(lines, line, col):
char_idx = 0
for i in range(line - 1):
char_idx += len(lines[i])
char_idx += col - 1
return char_idx
lines = line_break_regex.split(file_contents)
line_1, column_1 = convert_pos(lines, byte_idx_1)
line_2, column_2 = convert_pos(lines, byte_idx_2)
extracted_string = extract_string_between_positions(lines, byte_idx_1, byte_idx_2)
char_idx_1 = convert_line_col_to_char_idx(lines, line_1, column_1)
char_idx_2 = convert_line_col_to_char_idx(lines, line_2, column_2)
return extracted_string, line_1, column_1, line_2, column_2, char_idx_1, char_idx_2
def extract_positions(node):
positions = []
if isinstance(node, dict):
if 'info' in node and 'original' in node['info']:
info = node['info']['original']
positions.append((info.get('pos'), info.get('endPos')))
for key, value in node.items():
positions.extend(extract_positions(value))
elif isinstance(node, list):
for item in node:
positions.extend(extract_positions(item))
return positions
def extract_vals(data):
vals = []
if isinstance(data, dict):
if "val" in data:
vals.append(data["val"].strip())
for value in data.values():
vals.extend(extract_vals(value))
elif isinstance(data, list):
for item in data:
vals.extend(extract_vals(item))
return vals
def find_doccomment_vals(data):
vals = []
positions = []
if isinstance(data, dict):
if data.get("kind") == "Lean.Parser.Command.docComment":
args = data.get("args", [])
for arg in args:
if "atom" in arg:
vals.append(arg["atom"]["val"])
positions.append((arg["atom"]["info"]["original"]["pos"], arg["atom"]["info"]["original"]["endPos"]))
for value in data.values():
v, p = find_doccomment_vals(value)
vals.extend(v)
positions.extend(p)
elif isinstance(data, list):
for item in data:
v, p = find_doccomment_vals(item)
vals.extend(v)
positions.extend(p)
return vals, positions
def find_attributes_vals(data):
vals = []
positions = []
if isinstance(data, dict):
if data.get("kind") in ["Lean.Parser.Term.attributes"]:
args = data.get("args", [])
for arg in args:
vals.extend(extract_vals(arg))
positions.extend(extract_positions(arg))
for value in data.values():
v, p = find_attributes_vals(value)
vals.extend(v)
positions.extend(p)
elif isinstance(data, list):
for item in data:
v, p = find_attributes_vals(item)
vals.extend(v)
positions.extend(p)
return vals, positions
def find_pripro_vals(data):
vals = []
positions = []
if isinstance(data, dict):
if data.get("kind") in ["Lean.Parser.Command.private", "Lean.Parser.Command.protected"]:
args = data.get("args", [])
for arg in args:
vals.extend(extract_vals(arg))
positions.extend(extract_positions(arg))
for value in data.values():
v, p = find_pripro_vals(value)
vals.extend(v)
positions.extend(p)
elif isinstance(data, list):
for item in data:
v, p = find_pripro_vals(item)
vals.extend(v)
positions.extend(p)
return vals, positions
def extract_other_vals(data):
vals = []
if isinstance(data, dict):
if "val" in data:
vals.append(data["val"])
for value in data.values():
vals.extend(extract_other_vals(value))
elif isinstance(data, list):
for item in data:
vals.extend(extract_other_vals(item))
return vals
def find_kind_name_theorem_lemma_abbrev_def_instance_inductive(file_content, data):
kind = None
name = None
kind_pos = None
kind_end = None
name_pos = None
name_end = None
if isinstance(data, dict):
if len(data.get("node", {}).get("args", [])) > 1:
second_node=data["node"]["args"][1]["node"]
if second_node["kind"]== "Lean.Parser.Command.instance":
atom = second_node.get("args", [])[1].get("atom", {})
kind = atom.get("val")
kind_pos = atom.get("info", {}).get("original", {}).get("pos")
kind_end = atom.get("info", {}).get("original", {}).get("endPos")
for arg in second_node.get("args", []):
if arg.get("node"):
if arg.get("node")["args"]:
if arg.get("node")["args"][0].get("node",{}).get("kind") == "Lean.Parser.Command.declId":
ident = arg.get("node")["args"][0]["node"]["args"][0]["ident"]
name = ident.get("val")
# print(name)
name_pos = ident.get("info", {}).get("original", {}).get("pos")
# print(name_pos)
name_end = ident.get("info", {}).get("original", {}).get("endPos")
break
else:
atom = second_node.get("args", [])[0].get("atom", {})
kind = atom.get("val")
kind_pos = atom.get("info", {}).get("original", {}).get("pos")
kind_end = atom.get("info", {}).get("original", {}).get("endPos")
for arg in second_node.get("args", []):
if arg.get("node", {}).get("kind") == "Lean.Parser.Command.declId":
ident = arg.get("node", {}).get("args", [])[0].get("ident", {})
name = ident.get("val")
name_pos = ident.get("info", {}).get("original", {}).get("pos")
name_end = ident.get("info", {}).get("original", {}).get("endPos")
break
if kind_pos and kind_end:
kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2 = process_lean_file(file_content, kind_pos, kind_end)
else:
kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2 = None,None,None, None,None,None,None
if name_pos and name_end:
name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2 = process_lean_file(file_content, name_pos, name_end)
else:
name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2 = None,None,None, None,None,None,None
return kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2, name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2
def find_statement_theorem_lemma_abbrev(file_content,data):
explicitBinder_list = []
type_list=[]
second_node=data["node"]["args"][1]["node"]
for arg in second_node.get("args", []):
if arg.get("node", {}).get("kind") in ["Lean.Parser.Command.declSig", "Lean.Parser.Command.optDeclSig"]:
node_declSig = arg.get("node")
statement_positions= extract_positions(node_declSig)
node_1=node_declSig["args"][0]["node"]
node_2=node_declSig["args"][1]["node"] if len(node_declSig["args"]) > 1 else None
for arg in node_1.get("args", []):
if arg.get("node", {}).get("kind") in[ "Lean.Parser.Term.explicitBinder","Lean.Parser.Term.implicitBinder","Lean.Parser.Term.instBinder"]:
node=arg.get("node", {})
val=extract_vals(node)
val=' '.join(val)
positions=extract_positions(node)
start_pos = min(pos[0] for pos in positions)
end_pos = max(pos[1] for pos in positions)
explicit, explicit_line_1, explicit_column_1, explicit_line_2, explicit_column_2, explicit_char_idx_1, explicit_char_idx_2 =process_lean_file(file_content,start_pos, end_pos)
explicitBinder_list.append({
"explicitBinder":{
"content":explicit,
"start_pos": {
"position" :explicit_char_idx_1,
"line" : explicit_line_1,
"column" : explicit_column_1
},
"end_pos": {
"position" : explicit_char_idx_2,
"line" : explicit_line_2,
"column" : explicit_column_2
}
}
})
if node_2 is not None and second_node["kind"] in ["Lean.Parser.Command.theorem", "group", "Lean.Parser.Command.abbrev"]:
if node_2.get("kind") in ["Lean.Parser.Term.typeSpec"]:
node=node_2
val=extract_vals(node)
val=' '.join(val)
positions=extract_positions(node)
start_pos = min(pos[0] for pos in positions)
end_pos = max(pos[1] for pos in positions)
explicit, explicit_line_1, explicit_column_1, explicit_line_2, explicit_column_2, explicit_char_idx_1, explicit_char_idx_2 =process_lean_file(file_content,start_pos, end_pos)
type_list.append({
"typeSpec":{
"content":explicit,
"start_pos": {
"position" :explicit_char_idx_1,
"line" : explicit_line_1,
"column" : explicit_column_1
},
"end_pos": {
"position" : explicit_char_idx_2,
"line" : explicit_line_2,
"column" : explicit_column_2
}
}
})
# print(explicitBinder_list,statement_positions)
return explicitBinder_list, type_list, statement_positions
def find_proof(file_content, data):
vals = []
positions = []
if isinstance(data, dict):
second_node = data["node"]["args"][1]["node"]
for arg in second_node.get("args", []):
if arg.get("node", {}).get("kind") in ["Lean.Parser.Command.declValSimple", "Lean.Parser.Command.declValEqns"]:
vals.extend(extract_vals(arg))
positions.extend(extract_positions(arg))
break # 找到后即可退出
if positions:
proof_start_pos = min(pos[0] for pos in positions)
proof_end_pos = max(pos[1] for pos in positions)
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = process_lean_file(file_content, proof_start_pos, proof_end_pos)
else:
proof_start_pos = None
proof_end_pos = None
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = None,None,None, None,None,None, None
return proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2
def process_modifier(file_content, declaration, tactics):
doccomment_vals, doccomment_positions = find_doccomment_vals(declaration)
if doccomment_vals:
comment_start_pos = min(pos[0] for pos in doccomment_positions)
comment_end_pos = max(pos[1] for pos in doccomment_positions)
comment, comment_line_1, comment_column_1, comment_line_2, comment_column_2, comment_char_idx_1, comment_char_idx_2 = process_lean_file(file_content,comment_start_pos,comment_end_pos)
else:
comment = None
comment_start_pos = None
comment_end_pos = None
comment= None
comment_line_1, comment_column_1, comment_line_2, comment_column_2, comment_char_idx_1, comment_char_idx_2 = None, None, None, None, None, None
attributes_vals, attributes_positions = find_attributes_vals(declaration)
if attributes_vals:
attributes_start_pos = min(pos[0] for pos in attributes_positions)
attributes_end_pos = max(pos[1] for pos in attributes_positions)
attributes, attributes_line_1, attributes_column_1, attributes_line_2, attributes_column_2, attributes_char_idx_1, attributes_char_idx_2 = process_lean_file(file_content,attributes_start_pos,attributes_end_pos)
else:
attributes = None
attributes_start_pos = None
attributes_end_pos = None
attributes_line_1, attributes_column_1, attributes_line_2, attributes_column_2, attributes_char_idx_1, attributes_char_idx_2 = None, None, None, None, None, None
pripro_vals, pripro_positions = find_pripro_vals(declaration)
if pripro_vals:
pripro_start_pos = min(pos[0] for pos in pripro_positions)
pripro_end_pos = max(pos[1] for pos in pripro_positions)
pripro, pripro_line_1, pripro_column_1, pripro_line_2, pripro_column_2, pripro_char_idx_1, pripro_char_idx_2 = process_lean_file(file_content,pripro_start_pos,pripro_end_pos)
else:
pripro = None
pripro_start_pos = None
pripro_end_pos = None
pripro_line_1, pripro_column_1, pripro_line_2, pripro_column_2, pripro_char_idx_1, pripro_char_idx_2 = None, None, None, None, None, None
positions = extract_positions(declaration)
positions = [(start, end) for start, end in positions if start is not None and end is not None]
start_pos = min(pos[0] for pos in positions)
end_pos = max(pos[1] for pos in positions)
whole, whole_line_1, whole_column_1, whole_line_2, whole_column_2, whole_char_idx_1, whole_char_idx_2 = process_lean_file(file_content, start_pos, end_pos)
tactics_list = []
for tactic in tactics:
if start_pos <= tactic["pos"] <= end_pos:
_, A,B,c,d, tactic["pos"], tactic["endPos"] = process_lean_file(file_content, tactic["pos"], tactic["endPos"])
tactics_list.append(tactic)
return tactics_list, comment, comment_line_1, comment_column_1, comment_line_2, comment_column_2, comment_char_idx_1, comment_char_idx_2, attributes, attributes_line_1, attributes_column_1, attributes_line_2, attributes_column_2, attributes_char_idx_1, attributes_char_idx_2, pripro, pripro_line_1, pripro_column_1, pripro_line_2, pripro_column_2, pripro_char_idx_1, pripro_char_idx_2, whole, whole_line_1, whole_column_1, whole_line_2, whole_column_2, whole_char_idx_1, whole_char_idx_2
def theorem_lemma_abbrev(file_content, declaration,tactics):
tactics_list, comment, comment_line_1, comment_column_1, comment_line_2, comment_column_2, comment_char_idx_1, comment_char_idx_2, attributes, attributes_line_1, attributes_column_1, attributes_line_2, attributes_column_2, attributes_char_idx_1, attributes_char_idx_2, pripro, pripro_line_1, pripro_column_1, pripro_line_2, pripro_column_2, pripro_char_idx_1, pripro_char_idx_2, whole, whole_line_1, whole_column_1, whole_line_2, whole_column_2, whole_char_idx_1, whole_char_idx_2= process_modifier(file_content, declaration,tactics)
kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2,name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2 = find_kind_name_theorem_lemma_abbrev_def_instance_inductive(file_content, declaration)
explicitBinder_list, type_list, statement_positions = find_statement_theorem_lemma_abbrev(file_content, declaration)
positions = extract_positions(declaration)
positions = [(start, end) for start, end in positions if start is not None and end is not None]
if not positions:
return {"kind" : kind}
whole_start_pos = min(pos[0] for pos in positions)
whole_end_pos = max(pos[1] for pos in positions)
if statement_positions:
statement_start_pos = whole_start_pos
statement_end_pos = max(pos[1] for pos in statement_positions)
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = process_lean_file(file_content, statement_start_pos, statement_end_pos)
else:
statement_start_pos = None
statement_end_pos = None
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = None, None, None, None, None, None,None
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = find_proof(file_content, declaration)
declaration_info = {
"kind" : kind,
"whole_start_pos" : {
"position" : whole_char_idx_1,
"line" : whole_line_1,
"column" : whole_column_1
},
"whole_end_pos" :{
"position" : whole_char_idx_2,
"line" : whole_line_2,
"column" : whole_column_2
},
"attributes": attributes,
"comment": {
"content": comment,
"start_pos": {
"position" : comment_char_idx_1,
"line" : comment_line_1,
"column" : comment_column_1
},
"end_pos": {
"position" : comment_char_idx_2,
"line" : comment_line_2,
"column" : comment_column_2
}
},
"private_protected": pripro,
"name": {
"content": name,
"start_pos": {
"position" : name_char_idx_1,
"line" : name_line_1,
"column" : name_column_1
},
"end_pos": {
"position" : name_char_idx_2,
"line" : name_line_2,
"column" : name_column_2
}
},
"parameters": explicitBinder_list,
"Type":type_list,
"statement":{
"content": statement_val_str,
"start_pos": {
"position" : statement_char_idx_1,
"line" : statement_line_1,
"column" : statement_column_1
},
"end_pos": {
"position" : statement_char_idx_2,
"line" : statement_line_2,
"column" : statement_column_2
}
},
"proof" :{
"content": proof,
"start_pos": {
"position" : proof_char_idx_1,
"line" : proof_line_1,
"column" : proof_column_1
},
"end_pos": {
"position" : proof_char_idx_2,
"line" : proof_line_2,
"column" : proof_column_2
}
}
}
return declaration_info
def find_statement_def(file_content,data):
explicitBinder_list = []
type_list=[]
second_node=data["node"]["args"][1]["node"]
for arg in second_node.get("args", []):
if arg.get("node", {}).get("kind") in ["Lean.Parser.Command.declSig", "Lean.Parser.Command.optDeclSig"]:
node_declSig = arg.get("node")
statement_positions= extract_positions(node_declSig)
node_1=node_declSig["args"][0]["node"]
node_2=node_declSig["args"][1]["node"]
for arg in node_1.get("args", []):
if arg.get("node", {}).get("kind") in[ "Lean.Parser.Term.explicitBinder","Lean.Parser.Term.implicitBinder","Lean.Parser.Term.instBinder"]:
node=arg.get("node", {})
val=extract_vals(node)
val=' '.join(val)
positions=extract_positions(node)
start_pos = min(pos[0] for pos in positions)
end_pos = max(pos[1] for pos in positions)
explicit, explicit_line_1, explicit_column_1, explicit_line_2, explicit_column_2, explicit_char_idx_1, explicit_char_idx_2 =process_lean_file(file_content,start_pos, end_pos)
# explicitBinder_list.append({"explicitBinder": [explicit, explicit_char_idx_1, explicit_line_1, explicit_column_1, explicit_char_idx_2, explicit_line_2, explicit_column_2]})
explicitBinder_list.append({
"explicitBinder":{
"content":explicit,
"start_pos": {
"position" :explicit_char_idx_1,
"line" : explicit_line_1,
"column" : explicit_column_1
},
"end_pos": {
"position" : explicit_char_idx_2,
"line" : explicit_line_2,
"column" : explicit_column_2
}
}
})
if second_node["kind"] in[ "Lean.Parser.Command.definition"] :
for arg in node_2.get("args", []):
if arg.get("node", {}).get("kind") in ["Lean.Parser.Term.typeSpec"]:
node=arg.get("node", {})
val=extract_vals(node)
val=' '.join(val)
positions=extract_positions(node)
start_pos = min(pos[0] for pos in positions)
end_pos = max(pos[1] for pos in positions)
explicit, explicit_line_1, explicit_column_1, explicit_line_2, explicit_column_2, explicit_char_idx_1, explicit_char_idx_2 =process_lean_file(file_content,start_pos, end_pos)
type_list.append({
"typeSpec":{
"content":explicit,
"start_pos": {
"position" :explicit_char_idx_1,
"line" : explicit_line_1,
"column" : explicit_column_1
},
"end_pos": {
"position" : explicit_char_idx_2,
"line" : explicit_line_2,
"column" : explicit_column_2
}
}
})
elif second_node["kind"] in[ "Lean.Parser.Command.instance"]:
val=extract_vals(node_2)
val=' '.join(val)
positions=extract_positions(node_2)
start_pos = min(pos[0] for pos in positions)
end_pos = max(pos[1] for pos in positions)
explicit, explicit_line_1, explicit_column_1, explicit_line_2, explicit_column_2, explicit_char_idx_1, explicit_char_idx_2 =process_lean_file(file_content,start_pos, end_pos)
type_list.append({
"typeSpec":{
"content":explicit,
"start_pos": {
"position" :explicit_char_idx_1,
"line" : explicit_line_1,
"column" : explicit_column_1
},
"end_pos": {
"position" : explicit_char_idx_2,
"line" : explicit_line_2,
"column" : explicit_column_2
}
}
})
return explicitBinder_list,type_list, statement_positions
def definition_instance(file_content, declaration,tactics):
tactics_list, comment, comment_line_1, comment_column_1, comment_line_2, comment_column_2, comment_char_idx_1, comment_char_idx_2, attributes, attributes_line_1, attributes_column_1, attributes_line_2, attributes_column_2, attributes_char_idx_1, attributes_char_idx_2, pripro, pripro_line_1, pripro_column_1, pripro_line_2, pripro_column_2, pripro_char_idx_1, pripro_char_idx_2, whole, whole_line_1, whole_column_1, whole_line_2, whole_column_2, whole_char_idx_1, whole_char_idx_2= process_modifier(file_content, declaration,tactics)
kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2,name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2 = find_kind_name_theorem_lemma_abbrev_def_instance_inductive(file_content, declaration)
explicitBinder_list,type_list, statement_positions = find_statement_def(file_content,declaration)
positions = extract_positions(declaration)
positions = [(start, end) for start, end in positions if start is not None and end is not None]
if not positions:
return {"kind" : kind}
whole_start_pos = min(pos[0] for pos in positions)
whole_end_pos = max(pos[1] for pos in positions)
if statement_positions:
statement_start_pos = whole_start_pos
statement_end_pos = max(pos[1] for pos in statement_positions)
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = process_lean_file(file_content, statement_start_pos, statement_end_pos)
else:
statement_start_pos = None
statement_end_pos = None
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = None, None, None, None, None, None,None
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = find_proof(file_content, declaration)
declaration_info = {
"kind" : kind,
"whole_start_pos" : {
"position" : whole_char_idx_1,
"line" : whole_line_1,
"column" : whole_column_1
},
"whole_end_pos" :{
"position" : whole_char_idx_2,
"line" : whole_line_2,
"column" : whole_column_2
},
"attributes": attributes,
"comment": {
"content": comment,
"start_pos": {
"position" : comment_char_idx_1,
"line" : comment_line_1,
"column" : comment_column_1
},
"end_pos": {
"position" : comment_char_idx_2,
"line" : comment_line_2,
"column" : comment_column_2
}
},
"private_protected": pripro,
"name": {
"content": name,
"start_pos": {
"position" : name_char_idx_1,
"line" : name_line_1,
"column" : name_column_1
},
"end_pos": {
"position" : name_char_idx_2,
"line" : name_line_2,
"column" : name_column_2
}
},
"parameters": explicitBinder_list,
"Type":type_list,
"statement":{
"content": statement_val_str,
"start_pos": {
"position" : statement_char_idx_1,
"line" : statement_line_1,
"column" : statement_column_1
},
"end_pos": {
"position" : statement_char_idx_2,
"line" : statement_line_2,
"column" : statement_column_2
}
},
"proof" :{
"content": proof,
"start_pos": {
"position" : proof_char_idx_1,
"line" : proof_line_1,
"column" : proof_column_1
},
"end_pos": {
"position" : proof_char_idx_2,
"line" : proof_line_2,
"column" : proof_column_2
}
}
}
return declaration_info
def find_kind_name_structure(file_content, data):
kind = None
name = None
kind_pos = None
kind_end = None
name_pos = None
name_end = None
if isinstance(data, dict):
if len(data.get("node", {}).get("args", [])) > 1:
second_node=data["node"]["args"][1]["node"]
if second_node["kind"]== "Lean.Parser.Command.structure":
node_structureTk=second_node["args"][0]["node"]
atom = node_structureTk["args"][0]["atom"]
kind = atom.get("val")
kind_pos = atom.get("info", {}).get("original", {}).get("pos")
kind_end = atom.get("info", {}).get("original", {}).get("endPos")
for arg in second_node.get("args", []):
if arg.get("node"):
if arg.get("node").get("kind") == "Lean.Parser.Command.declId":
node_declId = second_node["args"][1]["node"]
val=extract_vals(node_declId)
val=' '.join(val)
positions=extract_positions(node_declId)
name_pos = min(pos[0] for pos in positions)
name_end = max(pos[1] for pos in positions)
if kind_pos and kind_end:
kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2 = process_lean_file(file_content, kind_pos, kind_end)
else:
kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2 = None,None,None, None,None,None,None
if name_pos and name_end:
name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2 = process_lean_file(file_content, name_pos, name_end)
else:
name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2 = None,None,None, None,None,None,None
return kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2, name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2
def find_statement_structure(file_content,data):
explicitBinder_list = []
second_node=data["node"]["args"][1]["node"]
statement_positions=None
for arg in second_node.get("args", []):
if arg.get("node"):
if arg.get("node")["args"]:
if arg.get("node")["args"][0].get("node"):
if arg.get("node")["args"][0].get("node")["kind"] in [ "Lean.Parser.Term.explicitBinder","Lean.Parser.Term.implicitBinder","Lean.Parser.Term.instBinder"]:
node_all= arg.get("node")
statement_positions= extract_positions(node_all)
# print(statement_positions)
for arg in node_all.get("args", []):
if arg.get("node", {}).get("kind") in[ "Lean.Parser.Term.explicitBinder","Lean.Parser.Term.implicitBinder","Lean.Parser.Term.instBinder"]:
node=arg.get("node", {})
val=extract_vals(node)
val=' '.join(val)
positions=extract_positions(node)
start_pos = min(pos[0] for pos in positions)
end_pos = max(pos[1] for pos in positions)
explicit, explicit_line_1, explicit_column_1, explicit_line_2, explicit_column_2, explicit_char_idx_1, explicit_char_idx_2 =process_lean_file(file_content,start_pos, end_pos)
# explicitBinder_list.append({"explicitBinder": [explicit, explicit_char_idx_1, explicit_line_1, explicit_column_1, explicit_char_idx_2, explicit_line_2, explicit_column_2]})
explicitBinder_list.append({
"explicitBinder":{
"content":explicit,
"start_pos": {
"position" :explicit_char_idx_1,
"line" : explicit_line_1,
"column" : explicit_column_1
},
"end_pos": {
"position" : explicit_char_idx_2,
"line" : explicit_line_2,
"column" : explicit_column_2
}
}
})
return explicitBinder_list, statement_positions
def find_proof_structure(file_content, declaration):
vals = []
positions = []
second_node=declaration["node"]["args"][1]["node"]
for arg in second_node.get("args", []):
if arg.get("node"):
if arg.get("node")["args"]:
if arg.get("node")["args"][0].get("atom"):
if arg.get("node")["args"][0].get("atom").get("val")== "where":
vals.extend(extract_vals(arg))
positions.extend(extract_positions(arg))
break
if positions:
proof_start_pos = min(pos[0] for pos in positions)
proof_end_pos = max(pos[1] for pos in positions)
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = process_lean_file(file_content, proof_start_pos, proof_end_pos)
else:
proof_start_pos = None
proof_end_pos = None
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = None,None,None, None,None,None, None
return proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2
def structure(file_content, declaration,tactics):
tactics_list, comment, comment_line_1, comment_column_1, comment_line_2, comment_column_2, comment_char_idx_1, comment_char_idx_2, attributes, attributes_line_1, attributes_column_1, attributes_line_2, attributes_column_2, attributes_char_idx_1, attributes_char_idx_2, pripro, pripro_line_1, pripro_column_1, pripro_line_2, pripro_column_2, pripro_char_idx_1, pripro_char_idx_2, whole, whole_line_1, whole_column_1, whole_line_2, whole_column_2, whole_char_idx_1, whole_char_idx_2= process_modifier(file_content, declaration,tactics)
kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2,name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2 = find_kind_name_structure(file_content, declaration)
explicitBinder_list, statement_positions = find_statement_structure(file_content, declaration)
positions = extract_positions(declaration)
positions = [(start, end) for start, end in positions if start is not None and end is not None]
if not positions:
return {"kind" : kind}
whole_start_pos = min(pos[0] for pos in positions)
whole_end_pos = max(pos[1] for pos in positions)
if statement_positions:
statement_start_pos = whole_start_pos
statement_end_pos = max(pos[1] for pos in statement_positions)
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = process_lean_file(file_content, statement_start_pos, statement_end_pos)
else:
statement_start_pos = None
statement_end_pos = None
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = None, None, None, None, None, None,None
if statement_positions:
statement_start_pos = min(pos[0] for pos in statement_positions)
statement_end_pos = max(pos[1] for pos in statement_positions)
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = process_lean_file(file_content, statement_start_pos, statement_end_pos)
else:
statement_start_pos = None
statement_end_pos = None
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = None, None, None, None, None, None,None
parts = [attributes, pripro, kind, name, statement_val_str]
non_none_parts = [part for part in parts if part is not None]
statement = ' '.join(non_none_parts).strip()
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = find_proof_structure(file_content, declaration)
declaration_info = {
"kind" : kind,
"whole_start_pos" : {
"position" : whole_char_idx_1,
"line" : whole_line_1,
"column" : whole_column_1
},
"whole_end_pos" :{
"position" : whole_char_idx_2,
"line" : whole_line_2,
"column" : whole_column_2
},
"attributes": attributes,
"comment": {
"content": comment,
"start_pos": {
"position" : comment_char_idx_1,
"line" : comment_line_1,
"column" : comment_column_1
},
"end_pos": {
"position" : comment_char_idx_2,
"line" : comment_line_2,
"column" : comment_column_2
}
},
"private_protected": pripro,
"name": {
"content": name,
"start_pos": {
"position" : name_char_idx_1,
"line" : name_line_1,
"column" : name_column_1
},
"end_pos": {
"position" : name_char_idx_2,
"line" : name_line_2,
"column" : name_column_2
}
},
"parameters": explicitBinder_list,
"statement":{
"content": statement_val_str,
"start_pos": {
"position" : statement_char_idx_1,
"line" : statement_line_1,
"column" : statement_column_1
},
"end_pos": {
"position" : statement_char_idx_2,
"line" : statement_line_2,
"column" : statement_column_2
}
},
"proof" :{
"content": proof,
"start_pos": {
"position" : proof_char_idx_1,
"line" : proof_line_1,
"column" : proof_column_1
},
"end_pos": {
"position" : proof_char_idx_2,
"line" : proof_line_2,
"column" : proof_column_2
}
}
}
return declaration_info
def find_proof_inductive(file_content, declaration):
vals = []
positions = []
second_node=declaration["node"]["args"][1]["node"]
for arg in second_node.get("args", []):
if arg.get("node"):
if arg.get("node")["args"]:
if arg.get("node")["args"][0].get("node",{}).get("kind",{}) == "Lean.Parser.Command.ctor":
vals.extend(extract_vals(arg))
positions.extend(extract_positions(arg))
break
if positions:
proof_start_pos = min(pos[0] for pos in positions)
proof_end_pos = max(pos[1] for pos in positions)
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = process_lean_file(file_content, proof_start_pos, proof_end_pos)
else:
proof_start_pos = None
proof_end_pos = None
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = None,None,None, None,None,None, None
return proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2
def inductive(file_content, declaration,tactics):
tactics_list, comment, comment_line_1, comment_column_1, comment_line_2, comment_column_2, comment_char_idx_1, comment_char_idx_2, attributes, attributes_line_1, attributes_column_1, attributes_line_2, attributes_column_2, attributes_char_idx_1, attributes_char_idx_2, pripro, pripro_line_1, pripro_column_1, pripro_line_2, pripro_column_2, pripro_char_idx_1, pripro_char_idx_2, whole, whole_line_1, whole_column_1, whole_line_2, whole_column_2, whole_char_idx_1, whole_char_idx_2= process_modifier(file_content, declaration,tactics)
kind, kind_line_1, kind_column_1, kind_line_2, kind_column_2, kind_char_idx_1, kind_char_idx_2,name, name_line_1, name_column_1, name_line_2, name_column_2, name_char_idx_1, name_char_idx_2 = find_kind_name_theorem_lemma_abbrev_def_instance_inductive(file_content, declaration)
explicitBinder_list,type_list, statement_positions = find_statement_theorem_lemma_abbrev(file_content, declaration)
positions = extract_positions(declaration)
positions = [(start, end) for start, end in positions if start is not None and end is not None]
if not positions:
return {"kind" : kind}
whole_start_pos = min(pos[0] for pos in positions)
whole_end_pos = max(pos[1] for pos in positions)
# statement_val_str = ' '.join(statement_vals)
if statement_positions:
statement_start_pos = whole_start_pos
statement_end_pos = max(pos[1] for pos in statement_positions)
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = process_lean_file(file_content, statement_start_pos, statement_end_pos)
# print(statement_val_str)
else:
statement_start_pos = None
statement_end_pos = None
statement_val_str, statement_line_1, statement_column_1, statement_line_2, statement_column_2, statement_char_idx_1, statement_char_idx_2 = None, None, None, None, None, None,None
parts = [attributes, pripro, kind, name, statement_val_str]
non_none_parts = [part for part in parts if part is not None]
statement = ' '.join(non_none_parts).strip()
proof, proof_line_1, proof_column_1, proof_line_2, proof_column_2, proof_char_idx_1, proof_char_idx_2 = find_proof_inductive(file_content, declaration)
declaration_info = {
"kind" : kind,
"whole_start_pos" : {
"position" : whole_char_idx_1,
"line" : whole_line_1,
"column" : whole_column_1
},
"whole_end_pos" :{
"position" : whole_char_idx_2,
"line" : whole_line_2,
"column" : whole_column_2
},
"attributes": attributes,
"comment": {
"content": comment,
"start_pos": {
"position" : comment_char_idx_1,
"line" : comment_line_1,
"column" : comment_column_1
},
"end_pos": {
"position" : comment_char_idx_2,
"line" : comment_line_2,
"column" : comment_column_2
}
},
"private_protected": pripro,
"name": {
"content": name,
"start_pos": {
"position" : name_char_idx_1,
"line" : name_line_1,
"column" : name_column_1
},
"end_pos": {
"position" : name_char_idx_2,
"line" : name_line_2,
"column" : name_column_2
}
},
"parameters": explicitBinder_list,
"Type":type_list,
"statement":{
"content": statement_val_str,
"start_pos": {
"position" : statement_char_idx_1,
"line" : statement_line_1,
"column" : statement_column_1
},
"end_pos": {
"position" : statement_char_idx_2,
"line" : statement_line_2,
"column" : statement_column_2
}
},
"proof" :{
"content": proof,
"start_pos": {
"position" : proof_char_idx_1,
"line" : proof_line_1,
"column" : proof_column_1
},
"end_pos": {
"position" : proof_char_idx_2,
"line" : proof_line_2,
"column" : proof_column_2
}
}
}
return declaration_info
def lean4_parser(file_content, data):
tactics = data.get("tactics")
premises = data.get("premises")
command_asts = data.get("commandASTs")
declarations_info = []
for i, declaration in enumerate(command_asts):
kind = declaration.get("node", {}).get("kind")
if kind in ["Lean.Parser.Command.declaration", "lemma"]:
if len(declaration.get("node", {}).get("args", [])) > 1 and declaration["node"]["args"][1]["node"]["kind"] in ["Lean.Parser.Command.theorem","Lean.Parser.Command.example", "group", "Lean.Parser.Command.abbrev"]:
declaration_info = theorem_lemma_abbrev(file_content, declaration,tactics)
declarations_info.append(declaration_info)
elif len(declaration.get("node", {}).get("args", [])) > 1 and declaration["node"]["args"][1]["node"]["kind"] in [ "Lean.Parser.Command.definition","Lean.Parser.Command.instance"]:
declaration_info = definition_instance(file_content, declaration,tactics)
declarations_info.append(declaration_info)
elif len(declaration.get("node", {}).get("args", [])) > 1 and declaration["node"]["args"][1]["node"]["kind"] in [ "Lean.Parser.Command.structure"]:
declaration_info = structure(file_content, declaration,tactics)
declarations_info.append(declaration_info)
elif len(declaration.get("node", {}).get("args", [])) > 1 and declaration["node"]["args"][1]["node"]["kind"] in [ "Lean.Parser.Command.inductive"]:
declaration_info = inductive(file_content, declaration,tactics)
declarations_info.append(declaration_info)
else:
positions = extract_positions(declaration)
positions = [(start, end) for start, end in positions if start is not None and end is not None]
if positions:
start_pos = min(pos[0] for pos in positions)
end_pos = max(pos[1] for pos in positions)
whole, whole_line_1, whole_column_1, whole_line_2, whole_column_2, whole_char_idx_1, whole_char_idx_2 = process_lean_file(file_content, start_pos, end_pos)
else:
start_pos=None
end_pos =None
whole, whole_line_1, whole_column_1, whole_line_2, whole_column_2, whole_char_idx_1, whole_char_idx_2=None,None,None,None,None,None,None
declaration_info={
"kind": kind,
"content":whole,
"whole_start_pos" : {
"position" : whole_char_idx_1,
"line" : whole_line_1,
"column" : whole_column_1
},
"whole_end_pos" :{
"position" : whole_char_idx_2,
"line" : whole_line_2,
"column" : whole_column_2
}
}
declarations_info.append(declaration_info)
return dict(
tactics=tactics,
premises=premises,
declarations=declarations_info,
)