mirror of
https://github.com/deepseek-ai/3FS
synced 2025-06-26 18:16:45 +00:00
2244 lines
88 KiB
OpenEdge ABL
2244 lines
88 KiB
OpenEdge ABL
/* User Defined Types */
|
|
|
|
enum tErrorCode {
|
|
ErrorCode_SUCCESS,
|
|
ErrorCode_ERROR,
|
|
ErrorCode_CHAIN_VERION_MISMATCH,
|
|
ErrorCode_TARGET_OFFLINE,
|
|
ErrorCode_TARGET_INACTIVE,
|
|
ErrorCode_TARGET_ALREADY_UPTODATE,
|
|
ErrorCode_CHUNK_NOT_FOUND,
|
|
ErrorCode_CHUNK_NOT_CLEAN,
|
|
ErrorCode_CHUNK_STALE_COMMIT,
|
|
ErrorCode_CHUNK_COMMITTED_UPDATE,
|
|
ErrorCode_CHUNK_STALE_UPDATE,
|
|
ErrorCode_CHUNK_MISSING_UPDATE,
|
|
ErrorCode_CHUNK_NOT_COMMIT,
|
|
ErrorCode_CHUNK_IO_ERROR,
|
|
ErrorCode_CHUNK_BUSY,
|
|
ErrorCode_DUPLICATE_REQUEST,
|
|
ErrorCode_TIMEOUT
|
|
}
|
|
|
|
type tBytes = seq[int];
|
|
type tChainId = int;
|
|
type tChainVer = int;
|
|
type tChunkId = int;
|
|
type tChunkVer = int;
|
|
type tTargetId = int;
|
|
type tNodeId = int;
|
|
type tRequestId = int;
|
|
type tVersionedChainId = (chainId: tChainId, chainVer: tChainVer);
|
|
type tReplicaChain = (vChainId: tVersionedChainId, targets: seq[tTargetId], states: map[tTargetId, tPublicTargetState], nodes: map[tTargetId, tNodeId], services: map[tTargetId, StorageService]);
|
|
type tGlobalKey = (vChainId: tVersionedChainId, chunkId: tChunkId);
|
|
|
|
type tMessageTag = (nodeId: tNodeId, requestId: tRequestId);
|
|
type tWriteReq = (from: machine, retries: int, tag: tMessageTag, key: tGlobalKey,
|
|
updateVer: tChunkVer, commitChainVer: tChainVer, fullChunkReplace: bool, removeChunk: bool, fromClient: bool,
|
|
offset: int, length: int, dataBytes: tBytes);
|
|
type tReadReq = (from: machine, retries: int, tag: tMessageTag, key: tGlobalKey, offset: int, length: int);
|
|
|
|
type tGenericResp = (tag: tMessageTag, status: tErrorCode);
|
|
type tWriteResp = (tag: tMessageTag, key: tGlobalKey, status: tErrorCode, commitVer: tChunkVer);
|
|
type tReadResp = (tag: tMessageTag, key: tGlobalKey, status: tErrorCode, chunkMetadata: tChunkMetadata, dataBytes: tBytes);
|
|
|
|
type tUpdateMsg = (from: machine, tag: tMessageTag, writeReq: tWriteReq);
|
|
type tCommitMsg = (retries: int, tag: tMessageTag, key: tGlobalKey, status: tErrorCode, commitVer: tChunkVer, commitChainVer: tChainVer, currentChunkContent: tBytes);
|
|
|
|
type tSyncChunkReq = (from: machine, tag: tMessageTag, writeReq: tWriteReq);
|
|
type tSyncChunkResp = (from: machine, key: tGlobalKey, status: tErrorCode, commitVer: tChunkVer);
|
|
|
|
type tChunkSnapshot = (metadata: tChunkMetadata, content: tBytes);
|
|
type tTargetSyncInfo = (targetId: tTargetId, targetState: tLocalTargetState, lastServeChainVer: tChainVer,
|
|
chunkStoreSnapshot: map[tChunkId, tChunkSnapshot]);
|
|
type tSyncStartReq = (from: machine, vChainId: tVersionedChainId, targetId: tTargetId);
|
|
type tSyncStartResp = (status: tErrorCode, syncInfo: tTargetSyncInfo);
|
|
type tSyncDoneReq = (from: machine, vChainId: tVersionedChainId, targetId: tTargetId);
|
|
type tSyncDoneResp = (status: tErrorCode, vChainId: tVersionedChainId, targetId: tTargetId);
|
|
|
|
fun ReplicaChainToString(replicaChain: tReplicaChain): string {
|
|
return format("chain {0}: states: {1}, services: {2}",
|
|
replicaChain.vChainId,
|
|
PublicTargetStatesToString(replicaChain.states),
|
|
replicaChain.services);
|
|
}
|
|
|
|
/* Event Types */
|
|
|
|
event eWriteReq : tWriteReq;
|
|
event eReadReq : tReadReq;
|
|
event eWriteResp : tWriteResp;
|
|
event eReadResp : tReadResp;
|
|
event eUpdateMsg : tUpdateMsg;
|
|
event eCommitMsg : tCommitMsg;
|
|
|
|
// event eSyncChunkMsg : tSyncChunkMsg;
|
|
event eSyncStartReq : tSyncStartReq;
|
|
event eSyncStartResp : tSyncStartResp;
|
|
event eSyncDoneReq : tSyncDoneReq;
|
|
event eSyncDoneResp : tSyncDoneResp;
|
|
|
|
/* Storage Service */
|
|
|
|
// DONE: group in-memory states by replication chains
|
|
// DONE: rewrite the storage service/target code in sequential style (create a machine for each write request/work)
|
|
// DONE: simplify interaction between StorageTarget and ChunkReplica (move begin/write/finish ops into replica)
|
|
// TODO: create update context and queue for write/update/commit requests (including service, chain, target state etc. for debug)
|
|
|
|
type tLocalReplicaChainInfo = (status: tErrorCode, vChainId: tVersionedChainId,
|
|
isHeadTarget: bool, isTailTarget: bool,
|
|
successorNodeId: tNodeId, predecessorNodeId: tNodeId,
|
|
localTargetId: tTargetId, publicTargetState: tPublicTargetState,
|
|
successorTargetId: tTargetId, successorTargetState: tPublicTargetState);
|
|
type tMessageTagUpdateVer = (tMessageTag, tChunkVer);
|
|
|
|
type tWriteContext = (request: tWriteReq, process: WriteProcess, status: tErrorCode);
|
|
// event eWriteReqBlocked: tWriteContext;
|
|
// event eWriteReqUpdated: tWriteContext;
|
|
// event eWriteReqFailed: tWriteContext;
|
|
event eWriteReqCompleted: tWriteContext;
|
|
event eProcssStopped: machine;
|
|
|
|
fun ReadLocalChunk(from: machine, storageTarget: StorageTarget,
|
|
chunkId: tChunkId, offset: int, length: int, readUncommitted: bool): tReadWorkDone
|
|
{
|
|
var readWork: tReadWork;
|
|
var readWorkDone: tReadWorkDone;
|
|
|
|
readWork = (from = from, chunkId = chunkId, offset = offset, length = length, readUncommitted = readUncommitted);
|
|
|
|
send storageTarget, eReadWork, readWork;
|
|
|
|
receive {
|
|
case eReadWorkDone: (workDone: tReadWorkDone) {
|
|
readWorkDone = workDone;
|
|
}
|
|
}
|
|
|
|
return readWorkDone;
|
|
}
|
|
|
|
machine ReadProcess
|
|
{
|
|
var readReq: tReadReq;
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
var replicaChain: tReplicaChain;
|
|
var localTarget: StorageTarget;
|
|
var localService: StorageService;
|
|
|
|
start state Init {
|
|
entry (args: (
|
|
readReq: tReadReq,
|
|
chainInfo: tLocalReplicaChainInfo,
|
|
replicaChain: tReplicaChain,
|
|
localTarget: StorageTarget,
|
|
localService: StorageService))
|
|
{
|
|
readReq = args.readReq;
|
|
chainInfo = args.chainInfo;
|
|
replicaChain = args.replicaChain;
|
|
localTarget = args.localTarget;
|
|
localService = args.localService;
|
|
goto Running;
|
|
}
|
|
}
|
|
|
|
state Running {
|
|
entry {
|
|
var readWorkDone: tReadWorkDone;
|
|
|
|
print format("{0}: start read process: {1}", this, readReq);
|
|
print format("{0}: replication chain: {1}", this, ReplicaChainToString(replicaChain));
|
|
print format("{0}: local chain info: {1}", this, chainInfo);
|
|
print format("{0}: local target: {1}, local service: {2}", this, localTarget, localService);
|
|
|
|
if (chainInfo.status != ErrorCode_SUCCESS || chainInfo.publicTargetState != PublicTargetState_SERVING || readReq.key.vChainId != chainInfo.vChainId) {
|
|
send readReq.from, eReadResp, (tag = readReq.tag, key = readReq.key, status = ErrorCode_CHAIN_VERION_MISMATCH,
|
|
chunkMetadata = default(tChunkMetadata), dataBytes = default(tBytes));
|
|
return;
|
|
}
|
|
|
|
readWorkDone = ReadLocalChunk(this, localTarget, readReq.key.chunkId, readReq.offset, readReq.length, false /*readUncommitted*/);
|
|
|
|
send readReq.from, eReadResp,
|
|
(tag = readReq.tag, key = readReq.key, status = readWorkDone.status,
|
|
chunkMetadata = readWorkDone.chunkMetadata, dataBytes = readWorkDone.dataBytes);
|
|
|
|
raise halt;
|
|
}
|
|
}
|
|
}
|
|
|
|
fun updateGlobalKey(key: tGlobalKey, chainVer: tChainVer): tGlobalKey {
|
|
key.vChainId.chainVer = Max(key.vChainId.chainVer, chainVer);
|
|
return key;
|
|
}
|
|
|
|
type tRetryWriteReq = (chainInfo: tLocalReplicaChainInfo, replicaChain: tReplicaChain, localTarget: StorageTarget, localService: StorageService, successorService: StorageService);
|
|
event eRetryWriteReq : tRetryWriteReq;
|
|
|
|
type tGetLocalChainInfoResult = (chainInfo: tLocalReplicaChainInfo, replicaChain: tReplicaChain, successorService: StorageService);
|
|
event eGetLocalChainInfo : (from: machine, chainId: tChainId);
|
|
event eGetLocalChainInfoResult : tGetLocalChainInfoResult;
|
|
|
|
fun GetLocalChainInfo(caller: machine, storageService: StorageService, chainId: tChainId): tGetLocalChainInfoResult {
|
|
var chainInfoResult: tGetLocalChainInfoResult;
|
|
|
|
send storageService, eGetLocalChainInfo, (from = caller, chainId = chainId);
|
|
receive {
|
|
case eGetLocalChainInfoResult: (result: tGetLocalChainInfoResult) { chainInfoResult = result; }
|
|
}
|
|
|
|
return chainInfoResult;
|
|
}
|
|
|
|
machine WriteProcess
|
|
{
|
|
var writeReq: tWriteReq;
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
var replicaChain: tReplicaChain;
|
|
var localTarget: StorageTarget;
|
|
var localService: StorageService;
|
|
var successorService: StorageService;
|
|
var fromSyncWorker: bool;
|
|
var timer: Timer;
|
|
var forwardRetries: int;
|
|
var currentChunkContent: tBytes; // this serves as checksum
|
|
|
|
fun readFullChunk(chunkId: tChunkId): tReadWorkDone {
|
|
return ReadLocalChunk(this, localTarget, chunkId, 0 /*offset*/, 1024*1024*1024 /*max chunk size*/, true /*readUncommitted*/);
|
|
}
|
|
|
|
fun postWriteCommitted(commitWorkDone: tCommitWorkDone) {
|
|
var commitMsg: tCommitMsg;
|
|
commitMsg = commitWorkDone.commitMsg;
|
|
|
|
if (commitWorkDone.status == ErrorCode_SUCCESS ||
|
|
commitWorkDone.status == ErrorCode_CHUNK_STALE_COMMIT ||
|
|
(commitWorkDone.status == ErrorCode_CHUNK_NOT_FOUND && commitWorkDone.removeChunk)) {
|
|
assert writeReq.removeChunk == commitWorkDone.removeChunk,
|
|
format("{0}: chunk removed in commit but not in write request, commitWorkDone: {1}, writeReq: {2}", this, commitWorkDone, writeReq);
|
|
if (chainInfo.isHeadTarget && writeReq.fromClient) {
|
|
// send write response to client
|
|
send writeReq.from, eWriteResp, (tag = commitMsg.tag, key = commitMsg.key,
|
|
status = ErrorCode_SUCCESS, commitVer = commitWorkDone.commitVer);
|
|
} else {
|
|
// send commit message to predecessor
|
|
commitMsg.retries = writeReq.retries;
|
|
send writeReq.from, eCommitMsg, commitMsg;
|
|
}
|
|
} else {
|
|
assert false, format("failed commit work: {0}", commitWorkDone);
|
|
}
|
|
|
|
// save the actual used commit chain version
|
|
writeReq.commitChainVer = commitMsg.commitChainVer;
|
|
send localService, eWriteReqCompleted, (request = writeReq, process = this, status = commitWorkDone.status);
|
|
}
|
|
|
|
fun getChainVer(): tChainVer {
|
|
if (writeReq.commitChainVer != 0) {
|
|
assert writeReq.fullChunkReplace == true;
|
|
return writeReq.commitChainVer;
|
|
} else {
|
|
return chainInfo.vChainId.chainVer;
|
|
}
|
|
}
|
|
|
|
start state Init {
|
|
entry (args: (
|
|
writeReq: tWriteReq,
|
|
chainInfo: tLocalReplicaChainInfo,
|
|
replicaChain: tReplicaChain,
|
|
localTarget: StorageTarget,
|
|
localService: StorageService,
|
|
successorService: StorageService))
|
|
{
|
|
writeReq = args.writeReq;
|
|
chainInfo = args.chainInfo;
|
|
replicaChain = args.replicaChain;
|
|
localTarget = args.localTarget;
|
|
localService = args.localService;
|
|
successorService = args.successorService;
|
|
timer = new Timer(this);
|
|
|
|
print format("{0}: start write process: {1}", this, writeReq);
|
|
print format("{0}: remove chunk {1}? {2}, dummy write? {3}", this, writeReq.key.chunkId, writeReq.removeChunk, writeReq.length == 0);
|
|
print format("{0}: replication chain: {1}", this, ReplicaChainToString(replicaChain));
|
|
print format("{0}: local chain info: {1}", this, chainInfo);
|
|
print format("{0}: local target: {1}, local service: {2}", this, localTarget, localService);
|
|
|
|
if (writeReq.removeChunk) {
|
|
goto UpdateLocalChunk;
|
|
} else if (writeReq.length > 0) {
|
|
goto UpdateLocalChunk;
|
|
} else if (writeReq.length == 0) { // dummy write triggered by sync worker
|
|
fromSyncWorker = true;
|
|
goto ReadLocalFullChunk;
|
|
}
|
|
}
|
|
}
|
|
|
|
state UpdateLocalChunk {
|
|
defer eShutDown, eRetryWriteReq;
|
|
|
|
entry {
|
|
var writeWork: tWriteWork;
|
|
var writeWorkDone: tWriteWorkDone;
|
|
var commitMsg: tCommitMsg;
|
|
var commitWorkDone: tCommitWorkDone;
|
|
|
|
writeWork = (
|
|
from = this, tag = writeReq.tag,
|
|
key = writeReq.key, targetId = chainInfo.localTargetId,
|
|
updateVer = writeReq.updateVer, chainVer = getChainVer(),
|
|
fullChunkReplace = writeReq.fullChunkReplace, removeChunk = writeReq.removeChunk,
|
|
offset = writeReq.offset, length = writeReq.length, dataBytes = writeReq.dataBytes);
|
|
|
|
send localTarget, eWriteWork, writeWork;
|
|
|
|
receive {
|
|
case eWriteWorkDone: (workDone: tWriteWorkDone) {
|
|
writeWorkDone = workDone;
|
|
}
|
|
}
|
|
|
|
if (writeWorkDone.status == ErrorCode_SUCCESS) {
|
|
currentChunkContent = writeWorkDone.currentChunkContent;
|
|
if (writeReq.updateVer == 0) {
|
|
writeReq.updateVer = writeWorkDone.updateVer;
|
|
} else {
|
|
assert writeReq.updateVer == writeWorkDone.updateVer,
|
|
format("{0}: writeReq.updateVer {1} != writeWorkDone.updateVer {2}, writeWorkDone: {3}",
|
|
this, writeReq.updateVer, writeWorkDone.updateVer, writeWorkDone);
|
|
}
|
|
} else if (writeWorkDone.status == ErrorCode_CHUNK_MISSING_UPDATE) {
|
|
// since one write request is running over the entire replication chain,
|
|
// there should not exist any out-of-order update request along the chain
|
|
assert false, format("{0}: write request blocked: {1}", this, writeWorkDone);
|
|
return;
|
|
} else if (writeWorkDone.status == ErrorCode_CHUNK_COMMITTED_UPDATE) {
|
|
currentChunkContent = writeWorkDone.currentChunkContent;
|
|
print format("{0}: the update already committed: {1}", this, writeWorkDone);
|
|
commitMsg = (
|
|
retries = writeReq.retries,
|
|
tag = writeReq.tag,
|
|
key = writeReq.key,
|
|
status = ErrorCode_CHUNK_STALE_COMMIT,
|
|
commitVer = writeReq.updateVer,
|
|
commitChainVer = writeWorkDone.chainVer,
|
|
currentChunkContent = currentChunkContent);
|
|
commitWorkDone = (caller = default(machine), commitMsg = commitMsg,
|
|
key = commitMsg.key, targetId = chainInfo.localTargetId, force = false, status = ErrorCode_SUCCESS,
|
|
updateVer = commitMsg.commitVer, commitVer = commitMsg.commitVer, chainVer = commitMsg.commitChainVer, removeChunk = writeReq.removeChunk);
|
|
postWriteCommitted(commitWorkDone);
|
|
raise halt;
|
|
} else if (writeWorkDone.status == ErrorCode_CHUNK_STALE_UPDATE) {
|
|
currentChunkContent = writeWorkDone.currentChunkContent;
|
|
print format("{0}: find a stale update: {1}", this, writeReq);
|
|
} else {
|
|
assert false, format("{0}: failed to write to local chunk #{1}: {2}", this, writeReq.key.chunkId, writeWorkDone);
|
|
// send localService, eWriteReqCompleted, (request = writeReq, process = this, status = writeWorkDone.status);
|
|
return;
|
|
}
|
|
|
|
if (chainInfo.successorTargetState == PublicTargetState_SYNCING) {
|
|
goto ReadLocalFullChunk;
|
|
} else {
|
|
goto ForwardWriteRequest;
|
|
}
|
|
}
|
|
}
|
|
|
|
state ReadLocalFullChunk {
|
|
entry {
|
|
var readWorkDone: tReadWorkDone;
|
|
|
|
if (writeReq.fullChunkReplace) {
|
|
goto ForwardWriteRequest;
|
|
return;
|
|
}
|
|
|
|
readWorkDone = readFullChunk(writeReq.key.chunkId);
|
|
|
|
if (readWorkDone.status == ErrorCode_CHUNK_NOT_FOUND) {
|
|
writeReq.removeChunk = true;
|
|
} else if (readWorkDone.status == ErrorCode_SUCCESS) {
|
|
assert writeReq.updateVer == 0 || readWorkDone.chunkMetadata.updateVer == writeReq.updateVer,
|
|
format("{0}: chunk update version changed, readWorkDone: {1}, writeReq: {2}", this, readWorkDone, writeReq);
|
|
writeReq.updateVer = readWorkDone.chunkMetadata.updateVer;
|
|
if (fromSyncWorker) {
|
|
print format("{0}: set commit chain version to: {1}, writeReq: {2}", this, readWorkDone.chunkMetadata.chainVer, writeReq);
|
|
writeReq.commitChainVer = readWorkDone.chunkMetadata.chainVer;
|
|
}
|
|
} else {
|
|
assert false, format("{0}: failed to read chunk: {1}", this, readWorkDone);
|
|
}
|
|
|
|
writeReq.fullChunkReplace = true;
|
|
writeReq.offset = 0;
|
|
writeReq.length = sizeof(readWorkDone.dataBytes);
|
|
writeReq.dataBytes = readWorkDone.dataBytes;
|
|
currentChunkContent = readWorkDone.dataBytes;
|
|
|
|
goto ForwardWriteRequest;
|
|
}
|
|
}
|
|
|
|
state ForwardWriteRequest {
|
|
entry {
|
|
var commitMsg: tCommitMsg;
|
|
var forwardWrite: tWriteReq;
|
|
var chainInfoResult: tGetLocalChainInfoResult;
|
|
|
|
chainInfoResult = GetLocalChainInfo(this, localService, chainInfo.vChainId.chainId);
|
|
|
|
if (chainInfoResult.chainInfo.status == ErrorCode_SUCCESS ||
|
|
chainInfoResult.chainInfo.status == ErrorCode_TARGET_INACTIVE) {
|
|
chainInfo = chainInfoResult.chainInfo;
|
|
replicaChain = chainInfoResult.replicaChain;
|
|
successorService = chainInfoResult.successorService;
|
|
|
|
assert chainInfo.vChainId.chainVer >= writeReq.key.vChainId.chainVer;
|
|
if (chainInfo.successorTargetState == PublicTargetState_SYNCING && !writeReq.fullChunkReplace) {
|
|
goto ReadLocalFullChunk;
|
|
return;
|
|
} else if (writeReq.key.vChainId.chainVer == chainInfo.vChainId.chainVer && forwardRetries > 1) {
|
|
print format("{0}: chain version is the same, skip retrying #{1}, chain info: {2}", this, forwardRetries, chainInfo.vChainId);
|
|
StartTimer(timer);
|
|
return;
|
|
} else {
|
|
writeReq.key.vChainId.chainVer = chainInfo.vChainId.chainVer;
|
|
}
|
|
} else {
|
|
print format("{0}: cannot get local chain info, error: {1}, stopping", this, chainInfoResult.chainInfo.status);
|
|
raise halt;
|
|
}
|
|
|
|
if (chainInfo.isTailTarget) {
|
|
print format("{0}: chain {1}: commit write request: {2}", this, writeReq.key.vChainId, writeReq);
|
|
commitMsg = (
|
|
retries = writeReq.retries,
|
|
tag = writeReq.tag,
|
|
key = writeReq.key,
|
|
status = ErrorCode_SUCCESS,
|
|
commitVer = writeReq.updateVer,
|
|
commitChainVer = getChainVer(),
|
|
currentChunkContent = currentChunkContent);
|
|
goto CommitLocalChunk, commitMsg;
|
|
} else {
|
|
forwardRetries = forwardRetries + 1;
|
|
forwardWrite = writeReq;
|
|
forwardWrite.from = this;
|
|
forwardWrite.fromClient = false;
|
|
forwardWrite.retries = forwardRetries;
|
|
print format("{0}: chain {1}: forward write request #{2} to {3}: {4}", this, writeReq.key.vChainId, forwardRetries, successorService, forwardWrite);
|
|
send successorService, eWriteReq, forwardWrite;
|
|
StartTimer(timer);
|
|
}
|
|
}
|
|
|
|
on eCommitMsg do (commitMsg: tCommitMsg) {
|
|
if (commitMsg.status == ErrorCode_SUCCESS || commitMsg.status == ErrorCode_CHUNK_STALE_COMMIT) {
|
|
if (commitMsg.retries != forwardRetries) {
|
|
print format("{0}: retries not equal to {1}, commitMsg: {2}", this, forwardRetries, commitMsg);
|
|
goto ForwardWriteRequest;
|
|
return;
|
|
} else if (commitMsg.commitChainVer <= chainInfo.vChainId.chainVer) {
|
|
assert currentChunkContent == commitMsg.currentChunkContent, format("local chunk content {0} not equal to the successor's: {1}", currentChunkContent, commitMsg);
|
|
goto CommitLocalChunk, commitMsg;
|
|
return;
|
|
} else {
|
|
print format("{0}: reject commit message with chain version {1} > {2}",
|
|
this, commitMsg.key.vChainId, chainInfo.vChainId);
|
|
goto ForwardWriteRequest;
|
|
return;
|
|
}
|
|
} else if (commitMsg.status == ErrorCode_CHAIN_VERION_MISMATCH) {
|
|
print format("{0}: chain version mismatch: {1}", this, commitMsg);
|
|
writeReq.key.vChainId.chainVer = 0;
|
|
goto ForwardWriteRequest;
|
|
return;
|
|
} else {
|
|
assert false, format("{0}: failed commit message: {1}", this, commitMsg);
|
|
}
|
|
|
|
send localService, eWriteReqCompleted, (request = writeReq, process = this, status = commitMsg.status);
|
|
}
|
|
|
|
on eTimeOut goto ForwardWriteRequest;
|
|
|
|
on eShutDown do (from: machine) {
|
|
// send from, eProcssStopped, this;
|
|
raise halt;
|
|
}
|
|
}
|
|
|
|
state CommitLocalChunk {
|
|
ignore eRetryWriteReq, eCommitMsg, eTimeOut;
|
|
|
|
entry (commitMsg: tCommitMsg) {
|
|
send localTarget, eCommitWork, (from = this, commitMsg = commitMsg, key = commitMsg.key,
|
|
targetId = chainInfo.localTargetId, commitVer = commitMsg.commitVer, commitChainVer = commitMsg.commitChainVer, force = false);
|
|
CancelTimer(timer);
|
|
}
|
|
|
|
on eCommitWorkDone do (commitWorkDone: tCommitWorkDone) {
|
|
postWriteCommitted(commitWorkDone);
|
|
raise halt;
|
|
}
|
|
|
|
on eShutDown do (from: machine) {
|
|
// send from, eProcssStopped, this;
|
|
raise halt;
|
|
}
|
|
}
|
|
}
|
|
|
|
machine StorageService
|
|
{
|
|
var nodeId: tNodeId;
|
|
var nextRequestId: tRequestId;
|
|
var timer: Timer;
|
|
var mgmtClient: MgmtClient;
|
|
// internal state machines
|
|
var localTargets: tLocalTargetMap;
|
|
var syncWorkers: map[tVersionedChainId, SyncWorker];
|
|
// configuration info from mgmt service
|
|
var routingVer: tRoutingVer;
|
|
var replicaChains: tReplicaChainMap; // current serving replication chains
|
|
var storageServices: tStorageServiceMap; // this works like a connection pool, which is just for P code
|
|
// calculated from localTargets and replicaChains
|
|
var predecessorNodeIds: map[tChainId, tNodeId];
|
|
var successorNodeIds: map[tChainId, tNodeId];
|
|
var successorTargetIds: map[tChainId, tTargetId];
|
|
var successorTargetStates: map[tChainId, tPublicTargetState];
|
|
var localTargetIds: map[tChainId, tTargetId];
|
|
var publicTargetStates: map[tTargetId, tPublicTargetState];
|
|
var localTargetStates: map[tTargetId, tLocalTargetState];
|
|
var uncommittedWriteRetryCountdown: int;
|
|
// read/write processes
|
|
var runningReadProcs: set[ReadProcess];
|
|
var runningWriteProcs: map[tChunkId, map[tMessageTag, WriteProcess]];
|
|
// in-memory state
|
|
var runningWriteReqs: map[tChunkId, map[tMessageTag, tWriteReq]];
|
|
var blockedWriteReqs: map[tChunkId, map[tMessageTag, tWriteReq]]; // pending writes queued on a busy chunk
|
|
var committedWriteReqs: map[tMessageTag, (commitVer: tChunkVer, commitChainVer: tChainVer)];
|
|
var restarting: bool;
|
|
|
|
fun newMessageTag(): tMessageTag {
|
|
nextRequestId = nextRequestId + 1;
|
|
return (nodeId = nodeId, requestId = nextRequestId);
|
|
}
|
|
|
|
fun lookupLocalReplicaChain(chainId: tChainId): tLocalReplicaChainInfo {
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
var replicaChain: tReplicaChain;
|
|
var targetId: tTargetId;
|
|
|
|
if (!(chainId in replicaChains)) {
|
|
print format("chain id {0} does not exist", chainId);
|
|
chainInfo.status = ErrorCode_CHAIN_VERION_MISMATCH;
|
|
return chainInfo;
|
|
}
|
|
|
|
replicaChain = replicaChains[chainId];
|
|
chainInfo.vChainId = replicaChain.vChainId;
|
|
chainInfo.localTargetId = localTargetIds[chainId];
|
|
chainInfo.publicTargetState = publicTargetStates[localTargetIds[chainId]];
|
|
|
|
if (chainId in predecessorNodeIds) {
|
|
chainInfo.predecessorNodeId = predecessorNodeIds[chainId];
|
|
} else {
|
|
chainInfo.isHeadTarget = true;
|
|
}
|
|
|
|
if (chainId in successorNodeIds) {
|
|
chainInfo.successorNodeId = successorNodeIds[chainId];
|
|
chainInfo.successorTargetId = successorTargetIds[chainId];
|
|
chainInfo.successorTargetState = successorTargetStates[chainId];
|
|
} else {
|
|
chainInfo.isTailTarget = true;
|
|
}
|
|
|
|
if (!(chainId in localTargetIds &&
|
|
localTargetIds[chainId] in publicTargetStates &&
|
|
IsActiveTargetState(publicTargetStates[localTargetIds[chainId]])))
|
|
{
|
|
print format("{0}: cannot find an active local target of chain {1}, targets: {2}, localTargetIds: {3}, publicTargetStates: {4}",
|
|
this, replicaChain.vChainId, replicaChain.targets, localTargetIds, PublicTargetStatesToString(publicTargetStates));
|
|
chainInfo.status = ErrorCode_TARGET_INACTIVE;
|
|
return chainInfo;
|
|
}
|
|
|
|
chainInfo.status = ErrorCode_SUCCESS;
|
|
return chainInfo;
|
|
}
|
|
|
|
fun createWriteProcess(writeReq: tWriteReq): tErrorCode {
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
var successorService: StorageService;
|
|
|
|
if (!(writeReq.key.chunkId in runningWriteProcs)) {
|
|
runningWriteProcs += (writeReq.key.chunkId, default(map[tMessageTag, WriteProcess]));
|
|
}
|
|
|
|
if (!(writeReq.key.chunkId in runningWriteReqs)) {
|
|
runningWriteReqs += (writeReq.key.chunkId, default(map[tMessageTag, tWriteReq]));
|
|
}
|
|
|
|
// if (writeReq.tag in runningWriteReqs[writeReq.key.chunkId]) {
|
|
// print format("{0}: duplicate running write request, retries:{1}, tag:{2}, key:{3}, runningWriteProcs: {4}, runningWriteReqs: {5}",
|
|
// this, writeReq.retries, writeReq.tag, writeReq.key, runningWriteProcs[writeReq.key.chunkId], runningWriteReqs[writeReq.key.chunkId]);
|
|
// return ErrorCode_DUPLICATE_REQUEST;
|
|
// }
|
|
|
|
if (!(writeReq.key.chunkId in blockedWriteReqs)) {
|
|
blockedWriteReqs += (writeReq.key.chunkId, default(map[tMessageTag, tWriteReq]));
|
|
}
|
|
|
|
// if (writeReq.tag in blockedWriteReqs[writeReq.key.chunkId]) {
|
|
// print format("{0}: duplicate blocked write request, retries:{1}, tag:{2}, key:{3}, blockedWriteReqs: {4}, runningWriteProcs: {5}, runningWriteReqs: {6}",
|
|
// this, writeReq.retries, writeReq.tag, writeReq.key, keys(blockedWriteReqs[writeReq.key.chunkId]), runningWriteProcs[writeReq.key.chunkId], runningWriteReqs[writeReq.key.chunkId]);
|
|
// return ErrorCode_DUPLICATE_REQUEST;
|
|
// }
|
|
|
|
if (sizeof(runningWriteProcs[writeReq.key.chunkId]) > 0) {
|
|
print format("{0}: add blocked write request: <tag:{1}, key:{2}, updateVer:{3}>, runningWriteProcs: {4}, runningWriteReqs: {5}, blockedWriteReqs: {6}",
|
|
this, writeReq.tag, writeReq.key, writeReq.updateVer, runningWriteProcs[writeReq.key.chunkId],
|
|
keys(runningWriteReqs[writeReq.key.chunkId]), keys(blockedWriteReqs[writeReq.key.chunkId]));
|
|
blockedWriteReqs[writeReq.key.chunkId][writeReq.tag] = writeReq;
|
|
return ErrorCode_CHUNK_BUSY;
|
|
}
|
|
|
|
chainInfo = lookupLocalReplicaChain(writeReq.key.vChainId.chainId);
|
|
|
|
if (chainInfo.status != ErrorCode_SUCCESS || writeReq.key.vChainId != chainInfo.vChainId) {
|
|
print format("{0}: chain version mismatch, vChainId: {1}, writeReq: {2}", this, chainInfo.vChainId, writeReq);
|
|
if (writeReq.fromClient) {
|
|
send writeReq.from, eWriteResp, (tag = writeReq.tag, key = writeReq.key, status = ErrorCode_CHAIN_VERION_MISMATCH, commitVer = 0);
|
|
} else {
|
|
send writeReq.from, eCommitMsg, (retries = writeReq.retries, tag = writeReq.tag, key = writeReq.key, status = ErrorCode_CHAIN_VERION_MISMATCH, commitVer = 0, commitChainVer = 0, currentChunkContent = default(tBytes));
|
|
}
|
|
return ErrorCode_CHAIN_VERION_MISMATCH;
|
|
}
|
|
|
|
if (chainInfo.successorNodeId in storageServices) {
|
|
successorService = storageServices[chainInfo.successorNodeId];
|
|
}
|
|
|
|
// if (writeReq.tag in committedWriteReqs) {
|
|
// print format("{0}: duplicate committed write request, retries:{1}, tag:{2}, key:{3}, commitVer: {4}",
|
|
// this, writeReq.retries, writeReq.tag, writeReq.key, committedWriteReqs[writeReq.tag]);
|
|
// if (writeReq.fromClient) {
|
|
// send writeReq.from, eWriteResp, (tag = writeReq.tag, key = writeReq.key, status = ErrorCode_SUCCESS,
|
|
// commitVer = committedWriteReqs[writeReq.tag].commitVer);
|
|
// } else {
|
|
// send writeReq.from, eCommitMsg, (tag = writeReq.tag, key = writeReq.key, status = ErrorCode_SUCCESS,
|
|
// commitVer = committedWriteReqs[writeReq.tag].commitVer, commitChainVer = committedWriteReqs[writeReq.tag].commitChainVer, currentChunkContent = default(tBytes));
|
|
// }
|
|
// return ErrorCode_DUPLICATE_REQUEST;
|
|
// }
|
|
|
|
runningWriteReqs[writeReq.key.chunkId][writeReq.tag] = writeReq;
|
|
runningWriteProcs[writeReq.key.chunkId][writeReq.tag] = new WriteProcess((
|
|
writeReq = writeReq,
|
|
chainInfo = chainInfo,
|
|
replicaChain = replicaChains[chainInfo.vChainId.chainId],
|
|
localTarget = localTargets[chainInfo.localTargetId],
|
|
localService = this,
|
|
successorService = successorService));
|
|
return ErrorCode_SUCCESS;
|
|
}
|
|
|
|
fun hasRunningWriteReqsWithChainIdNotEqualTo(vChainId: tVersionedChainId): bool {
|
|
var chunkId: tChunkId;
|
|
var writeReq: tWriteReq;
|
|
var successorService: StorageService;
|
|
|
|
foreach (chunkId in keys(runningWriteReqs)) {
|
|
foreach (writeReq in values(runningWriteReqs[chunkId])) {
|
|
if (writeReq.key.vChainId.chainId == vChainId.chainId && writeReq.key.vChainId.chainVer != vChainId.chainVer) {
|
|
print format("{0}: found running write request {1} with chain id {2} not equal to {3}: {4}",
|
|
this, runningWriteProcs[chunkId], writeReq.key.vChainId, vChainId, writeReq);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
fun startSyncWorker(chainId: tChainId) {
|
|
var vChainId: tVersionedChainId;
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
|
|
chainInfo = lookupLocalReplicaChain(chainId);
|
|
|
|
if (chainInfo.status != ErrorCode_SUCCESS) {
|
|
return;
|
|
}
|
|
|
|
foreach (vChainId in keys(syncWorkers)) {
|
|
if (vChainId.chainId == chainInfo.vChainId.chainId && vChainId.chainVer != chainInfo.vChainId.chainVer) {
|
|
print format("shutdown old sync worker {0}: chain {1} local target {2}",
|
|
syncWorkers[vChainId], vChainId, chainInfo.localTargetId);
|
|
send syncWorkers[vChainId], halt;
|
|
syncWorkers -= (vChainId);
|
|
}
|
|
}
|
|
|
|
if (chainInfo.successorTargetState == PublicTargetState_SYNCING) {
|
|
if (!(chainInfo.vChainId in syncWorkers)) {
|
|
syncWorkers[chainInfo.vChainId] = new SyncWorker((
|
|
nodeId = chainInfo.vChainId.chainId * 10000 + chainInfo.vChainId.chainVer,
|
|
vChainId = chainInfo.vChainId,
|
|
localTarget = localTargets[chainInfo.localTargetId],
|
|
localService = this,
|
|
successorTargetId = chainInfo.successorTargetId,
|
|
successorService = storageServices[chainInfo.successorNodeId]));
|
|
print format("create new sync worker {0}: chain {1} local target {2}, successor target {3}",
|
|
syncWorkers[chainInfo.vChainId], chainInfo.vChainId, chainInfo.localTargetId, chainInfo.successorTargetId);
|
|
}
|
|
}
|
|
}
|
|
|
|
fun startBlockedWriteReqs(chunkId: tChunkId, updateVer: tChunkVer) {
|
|
var blockedReq: tWriteReq;
|
|
|
|
if (!(chunkId in blockedWriteReqs)) {
|
|
print format("{0}: no blocked write request on chunk {1}", this, chunkId);
|
|
return;
|
|
}
|
|
|
|
foreach (blockedReq in values(blockedWriteReqs[chunkId])) {
|
|
print format("{0}: blocked write request: {1}", this, blockedReq);
|
|
}
|
|
|
|
foreach (blockedReq in values(blockedWriteReqs[chunkId])) {
|
|
blockedWriteReqs[chunkId] -= (blockedReq.tag);
|
|
if (createWriteProcess(blockedReq) == ErrorCode_SUCCESS) {
|
|
print format("{0}: restart blocked write request: {1}", this, blockedReq);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
fun retryRunningWriteReqs(chainId: tChainId) {
|
|
var chunkId: tChunkId;
|
|
var writeReq: tWriteReq;
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
var currentWriteReqs: map[tChunkId, map[tMessageTag, tWriteReq]];
|
|
var retryWrite: tRetryWriteReq;
|
|
var successorService: StorageService;
|
|
|
|
chainInfo = lookupLocalReplicaChain(chainId);
|
|
|
|
if (chainInfo.status != ErrorCode_SUCCESS) {
|
|
print format("{0}: look up chain error {1}, skip retrying running write requests", this, chainInfo.status);
|
|
return;
|
|
}
|
|
|
|
if (chainInfo.successorNodeId in storageServices) {
|
|
successorService = storageServices[chainInfo.successorNodeId];
|
|
}
|
|
|
|
print format("{0}: retrying running writes on chain {1}", this, ReplicaChainToString(replicaChains[chainInfo.vChainId.chainId]));
|
|
|
|
foreach (chunkId in keys(runningWriteReqs)) {
|
|
foreach (writeReq in values(runningWriteReqs[chunkId])) {
|
|
if (writeReq.key.vChainId.chainId == chainId) {
|
|
print format("{0}: retrying running write, process: {1}, request: {2}",
|
|
this, runningWriteProcs[chunkId][writeReq.tag], runningWriteReqs[chunkId][writeReq.tag]);
|
|
assert writeReq.tag in runningWriteProcs[chunkId],
|
|
format("{0}: write request {1} not in runningWriteProcs[chunkId] {2}", this, writeReq, runningWriteProcs[chunkId]);
|
|
|
|
retryWrite = (
|
|
chainInfo = chainInfo,
|
|
replicaChain = replicaChains[chainInfo.vChainId.chainId],
|
|
localTarget = localTargets[chainInfo.localTargetId],
|
|
localService = this,
|
|
successorService = successorService);
|
|
send runningWriteProcs[chunkId][writeReq.tag], eRetryWriteReq, retryWrite;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fun retryAllRunningWriteReqs() {
|
|
var chainId: tChainId;
|
|
|
|
print format("{0}: retry all running write requests: {1}", this, runningWriteReqs);
|
|
|
|
foreach (chainId in keys(replicaChains)) {
|
|
retryRunningWriteReqs(chainId);
|
|
}
|
|
}
|
|
|
|
fun commitChunksOnLastServingTarget(routingInfo: tRoutingInfo) {
|
|
var chainId: tChainId;
|
|
var vChainId: tVersionedChainId;
|
|
var chunkId: tChunkId;
|
|
var targetId: tTargetId;
|
|
var commitVer: tChunkVer;
|
|
var targetSyncInfo: tTargetSyncInfo;
|
|
var chunkMetadata: tChunkMetadata;
|
|
|
|
foreach (chainId in keys(routingInfo.replicaChains)) {
|
|
vChainId = routingInfo.replicaChains[chainId].vChainId;
|
|
|
|
foreach (targetId in routingInfo.replicaChains[chainId].targets) {
|
|
if (targetId in localTargets && routingInfo.replicaChains[chainId].states[targetId] == PublicTargetState_LASTSRV) {
|
|
print format("{0}: commit chunks on target {1} of chain {2}", this, targetId, vChainId);
|
|
targetSyncInfo = GetTargetSyncInfo(this, localTargets[targetId]);
|
|
|
|
foreach (chunkId in keys(targetSyncInfo.chunkStoreSnapshot)) {
|
|
chunkMetadata = targetSyncInfo.chunkStoreSnapshot[chunkId].metadata;
|
|
|
|
if (chunkMetadata.chunkState == ChunkState_COMMIT) {
|
|
assert chunkMetadata.updateVer == chunkMetadata.commitVer, format("{0}: invalid committed chunk state: {1}", this, chunkMetadata);
|
|
continue;
|
|
} else {
|
|
assert chunkMetadata.updateVer != chunkMetadata.commitVer, format("{0}: invalid uncommitted chunk state: {1}", this, chunkMetadata);
|
|
}
|
|
|
|
if (chunkMetadata.chunkState == ChunkState_DIRTY) {
|
|
print format("{0}: [warning] force commit dirty chunk {1} on last-serving target {2} of {3}: {4}", this, chunkId, targetId, vChainId, chunkMetadata);
|
|
} else {
|
|
print format("{0}: [info] commit chunk {1} on last-serving target {2} of {3}: {4}", this, chunkId, targetId, vChainId, chunkMetadata);
|
|
}
|
|
|
|
send localTargets[targetId], eCommitWork, (from = this, commitMsg = default(tCommitMsg),
|
|
key = (vChainId = vChainId, chunkId = chunkId),
|
|
targetId = targetId, commitVer = chunkMetadata.updateVer, commitChainVer = vChainId.chainVer, force = true);
|
|
receive {
|
|
case eShutDown: (from: machine) {
|
|
print format("{0}: node {1} is going to shutdown during restart", this, nodeId);
|
|
goto Offline;
|
|
}
|
|
case eCommitWorkDone: (workDone: tCommitWorkDone) {
|
|
print format("{0}: chunk committed: {1}", this, workDone);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fun getNodeIdOrDefaultValue(chainId: tChainId, nodeIds: map[tChainId, tNodeId], defaultVal: tNodeId): tNodeId {
|
|
if (chainId in nodeIds)
|
|
return nodeIds[chainId];
|
|
return defaultVal;
|
|
}
|
|
|
|
fun processRoutingInfo(routingInfo: tRoutingInfo) {
|
|
var newRoutingVer: tRoutingVer;
|
|
var newReplicaChains: tReplicaChainMap;
|
|
var newSuccessorNodeIds: map[tChainId, tNodeId];
|
|
var replicaChain: tReplicaChain;
|
|
var localTargetId: tTargetId;
|
|
var localState: tLocalTargetState;
|
|
var publicState: tPublicTargetState;
|
|
var successorTargetId: tTargetId;
|
|
var predecessorTargetId: tTargetId;
|
|
var vChainId: tVersionedChainId;
|
|
var chainId: tChainId;
|
|
var nodeId: tNodeId;
|
|
var newSuccessorNodeId: tNodeId;
|
|
var oldSuccessorNodeId: tNodeId;
|
|
var targetIdx: int;
|
|
|
|
newRoutingVer = routingInfo.routingVer;
|
|
newReplicaChains = routingInfo.replicaChains;
|
|
|
|
if (routingVer > newRoutingVer) {
|
|
print format("{0}: error: routingVer {1} > newRoutingVer {2}", this, routingVer, newRoutingVer);
|
|
return;
|
|
} else if (routingVer == newRoutingVer) {
|
|
print format("{0}: ignore: routingVer {1} == newRoutingVer {2}", this, routingVer, newRoutingVer);
|
|
return;
|
|
}
|
|
|
|
print format("{0}: updating replica chains from version {1} to {2}", this, routingVer, newRoutingVer);
|
|
routingVer = newRoutingVer;
|
|
storageServices = routingInfo.storageServices;
|
|
|
|
foreach (chainId in keys(replicaChains)) {
|
|
if (!(chainId in newReplicaChains)) {
|
|
replicaChains -= (chainId);
|
|
predecessorNodeIds -= (chainId);
|
|
successorNodeIds -= (chainId);
|
|
successorTargetIds -= (chainId);
|
|
successorTargetStates -= (chainId);
|
|
if (chainId in localTargetIds) {
|
|
publicTargetStates -= (localTargetIds[chainId]);
|
|
localTargetIds -= (chainId);
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach (chainId in keys(newReplicaChains)) {
|
|
if (chainId in replicaChains) {
|
|
assert replicaChains[chainId].vChainId.chainVer <= newReplicaChains[chainId].vChainId.chainVer,
|
|
format("error: local replica chain version {0} > {1}",
|
|
replicaChains[chainId].vChainId.chainVer, newReplicaChains[chainId].vChainId.chainVer);
|
|
|
|
if (newReplicaChains[chainId].vChainId == replicaChains[chainId].vChainId) {
|
|
print format("{0}: chain version not changed, skipping {1}", this, replicaChains[chainId].vChainId);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
replicaChain = newReplicaChains[chainId];
|
|
replicaChains[chainId] = replicaChain;
|
|
|
|
print format("{0}: new replica chain: {1}", this, ReplicaChainToString(replicaChain));
|
|
|
|
targetIdx = 0;
|
|
while (targetIdx < sizeof(replicaChain.targets)) {
|
|
localTargetId = replicaChain.targets[targetIdx];
|
|
|
|
if (localTargetId in localTargets)
|
|
{
|
|
localState = localTargetStates[localTargetId];
|
|
publicState = replicaChain.states[localTargetId];
|
|
localTargetIds[chainId] = localTargetId;
|
|
publicTargetStates[localTargetId] = publicState;
|
|
|
|
print format("{0}: local target #{1} {2} with local state {3} & public state {4} on chain {5}",
|
|
this, targetIdx, localTargetId, LocalTargetStateToString(localState), PublicTargetStateToString(publicState to int), replicaChain.vChainId);
|
|
|
|
if (IsActiveTargetState(publicState)) {
|
|
if (targetIdx > 0) {
|
|
predecessorTargetId = replicaChain.targets[targetIdx - 1];
|
|
nodeId = replicaChain.nodes[predecessorTargetId];
|
|
print format("chain {0} predecessor target #{1} {2} node {3}", chainId, targetIdx - 1, predecessorTargetId, nodeId);
|
|
predecessorNodeIds[chainId] = nodeId;
|
|
} else {
|
|
predecessorNodeIds -= (chainId);
|
|
}
|
|
|
|
newSuccessorNodeId = -1;
|
|
oldSuccessorNodeId = getNodeIdOrDefaultValue(chainId, successorNodeIds, -1);
|
|
|
|
if (targetIdx + 1 < sizeof(replicaChain.targets) &&
|
|
IsActiveTargetState(replicaChain.states[replicaChain.targets[targetIdx + 1]]))
|
|
{
|
|
successorTargetId = replicaChain.targets[targetIdx + 1];
|
|
nodeId = replicaChain.nodes[successorTargetId];
|
|
print format("chain {0} successor target #{1} {2} node {3}", chainId, targetIdx + 1, successorTargetId, nodeId);
|
|
successorNodeIds[chainId] = nodeId;
|
|
successorTargetIds[chainId] = successorTargetId;
|
|
successorTargetStates[chainId] = replicaChain.states[successorTargetId];
|
|
newSuccessorNodeId = nodeId;
|
|
} else {
|
|
successorNodeIds -= (chainId);
|
|
successorTargetIds -= (chainId);
|
|
successorTargetStates -= (chainId);
|
|
}
|
|
|
|
if (newSuccessorNodeId != oldSuccessorNodeId) {
|
|
print format("{0}: chain {1} target #{2} {3} 's successor node is changed from {4} to {5}, public state: {6}, sizeof(runningWriteReqs): {7}",
|
|
this, replicaChain.vChainId, targetIdx, localTargetId, oldSuccessorNodeId, newSuccessorNodeId, PublicTargetStateToString(publicState to int), sizeof(runningWriteReqs));
|
|
|
|
// retryRunningWriteReqs(chainId);
|
|
}
|
|
}
|
|
}
|
|
|
|
targetIdx = targetIdx + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
fun updateLocalTargetStates() {
|
|
var targetId: tTargetId;
|
|
var localState: tLocalTargetState;
|
|
// var targetStates: tLocalTargetStateMap;
|
|
var publicState: tPublicTargetState;
|
|
|
|
foreach (targetId in keys(localTargets)) {
|
|
localState = localTargetStates[targetId];
|
|
// targetState = GetTargetState(this, localTargets[targetId]);
|
|
// targetStates += (targetId, targetState);
|
|
|
|
if (targetId in publicTargetStates) {
|
|
publicState = publicTargetStates[targetId];
|
|
}
|
|
|
|
if ((localState == LocalTargetState_ONLINE && !restarting &&
|
|
(publicState == PublicTargetState_OFFLINE ||
|
|
publicState == PublicTargetState_LASTSRV)) ||
|
|
(localState == LocalTargetState_UPTODATE &&
|
|
(publicState == PublicTargetState_OFFLINE ||
|
|
publicState == PublicTargetState_LASTSRV ||
|
|
publicState == PublicTargetState_WAITING)))
|
|
{
|
|
print format("{0}: move to offline state (shutdown), local target: {1}, local state: {2}, public state: {3}",
|
|
this, targetId, LocalTargetStateToString(localState), PublicTargetStateToString(publicState to int));
|
|
goto Offline;
|
|
return;
|
|
// targetStates = default(tLocalTargetStateMap);
|
|
// return default(tLocalTargetStateMap);
|
|
}
|
|
else if (localState == LocalTargetState_ONLINE && publicState == PublicTargetState_SERVING)
|
|
{
|
|
print format("{0}: move to up-to-date state, local target: {1}, local state: {2}, public state: {3}",
|
|
this, targetId, LocalTargetStateToString(localState), PublicTargetStateToString(publicState to int));
|
|
// SetTargetState(this, localTargets[targetId], LocalTargetState_UPTODATE);
|
|
localTargetStates[targetId] = LocalTargetState_UPTODATE;
|
|
}
|
|
}
|
|
|
|
// return localTargetStates;
|
|
}
|
|
|
|
fun reportTargetStates(heartbeatConns: tHeartbeatConns) {
|
|
var targetStates: tLocalTargetStateMap;
|
|
var updateTargetStateMsg: tUpdateTargetStateMsg;
|
|
|
|
updateLocalTargetStates();
|
|
|
|
updateTargetStateMsg = (
|
|
from = heartbeatConns.mgmtClient,
|
|
tag = newMessageTag(),
|
|
routingVer = routingVer,
|
|
nodeId = nodeId,
|
|
targetStates = localTargetStates,
|
|
localTargets = localTargets,
|
|
storageService = this);
|
|
|
|
send heartbeatConns.mgmtService, eUpdateTargetStateMsg, updateTargetStateMsg;
|
|
}
|
|
|
|
start state Init {
|
|
defer eReadReq, eWriteReq, eUpdateMsg, eCommitMsg, eShutDown, eRestart;
|
|
|
|
entry (args: (
|
|
nodeId: tNodeId,
|
|
localTargets: tLocalTargetMap,
|
|
mgmtService: MgmtService)) {
|
|
var targetId: tTargetId;
|
|
|
|
nodeId = args.nodeId;
|
|
localTargets = args.localTargets;
|
|
|
|
foreach (targetId in keys(localTargets)) {
|
|
localTargetStates += (targetId, LocalTargetState_UPTODATE);
|
|
}
|
|
|
|
timer = new Timer(this);
|
|
mgmtClient = new MgmtClient((nodeId = nodeId, clientHost = this, mgmtService = args.mgmtService, sendHeartbeats = true));
|
|
}
|
|
|
|
on eSendHeartbeat do reportTargetStates;
|
|
|
|
on eNewRoutingInfo goto WaitForRequests with processRoutingInfo;
|
|
}
|
|
|
|
state WaitForRequests {
|
|
ignore eRestart;
|
|
|
|
entry {
|
|
restarting = false;
|
|
}
|
|
|
|
// entry {
|
|
// StartTimer(timer);
|
|
// }
|
|
|
|
// on eTimeOut do {
|
|
// var chainId: tChainId;
|
|
|
|
// foreach (chainId in keys(replicaChains)) {
|
|
// startSyncWorker(chainId);
|
|
// }
|
|
|
|
// StartTimer(timer);
|
|
// }
|
|
|
|
on eSendHeartbeat do reportTargetStates;
|
|
|
|
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
|
|
var chainId: tChainId;
|
|
|
|
processRoutingInfo(routingInfo);
|
|
|
|
updateLocalTargetStates();
|
|
|
|
foreach (chainId in keys(replicaChains)) {
|
|
startSyncWorker(chainId);
|
|
}
|
|
}
|
|
|
|
on eShutDown goto Offline with (from: machine) {
|
|
print format("{0}: node {1} is going to shutdown", this, nodeId);
|
|
}
|
|
|
|
on eSyncStartReq do (syncStartReq: tSyncStartReq) {
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
var syncInfo: tTargetSyncInfo;
|
|
var targetState: tLocalTargetState;
|
|
|
|
chainInfo = lookupLocalReplicaChain(syncStartReq.vChainId.chainId);
|
|
|
|
if (chainInfo.status != ErrorCode_SUCCESS || chainInfo.vChainId != syncStartReq.vChainId) {
|
|
send syncStartReq.from, eSyncStartResp, (status = ErrorCode_CHAIN_VERION_MISMATCH, syncInfo = default(tTargetSyncInfo));
|
|
return;
|
|
}
|
|
|
|
// targetState = GetTargetState(this, localTargets[syncStartReq.targetId]);
|
|
|
|
// if (localTargetStates[syncStartReq.targetId] == LocalTargetState_UPTODATE) {
|
|
// send syncStartReq.from, eSyncStartResp, (status = ErrorCode_TARGET_ALREADY_UPTODATE, syncInfo = default(tTargetSyncInfo));
|
|
// return;
|
|
// }
|
|
|
|
syncInfo = GetTargetSyncInfo(this, localTargets[syncStartReq.targetId]);
|
|
send syncStartReq.from, eSyncStartResp, (status = ErrorCode_SUCCESS, syncInfo = syncInfo);
|
|
}
|
|
|
|
on eSyncDoneReq do (syncDoneReq: tSyncDoneReq) {
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
|
|
chainInfo = lookupLocalReplicaChain(syncDoneReq.vChainId.chainId);
|
|
|
|
if (chainInfo.status != ErrorCode_SUCCESS || chainInfo.vChainId != syncDoneReq.vChainId) {
|
|
send syncDoneReq.from, eSyncDoneResp, (status = ErrorCode_CHAIN_VERION_MISMATCH, vChainId = syncDoneReq.vChainId, targetId = syncDoneReq.targetId);
|
|
return;
|
|
}
|
|
|
|
print format("{0}: done syncing: {1}", this, syncDoneReq);
|
|
|
|
localTargetStates[chainInfo.localTargetId] = LocalTargetState_UPTODATE;
|
|
// SetTargetState(this, localTargets[chainInfo.localTargetId], LocalTargetState_UPTODATE);
|
|
send syncDoneReq.from, eSyncDoneResp, (status = ErrorCode_SUCCESS, vChainId = syncDoneReq.vChainId, targetId = syncDoneReq.targetId);
|
|
}
|
|
|
|
on eGetLocalChainInfo do (args: (from: machine, chainId: tChainId)) {
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
var replicaChain: tReplicaChain;
|
|
var successorService: StorageService;
|
|
|
|
chainInfo = lookupLocalReplicaChain(args.chainId);
|
|
|
|
if (chainInfo.vChainId.chainId in replicaChains) {
|
|
replicaChain = replicaChains[chainInfo.vChainId.chainId];
|
|
}
|
|
|
|
if (chainInfo.successorNodeId in storageServices) {
|
|
successorService = storageServices[chainInfo.successorNodeId];
|
|
}
|
|
|
|
send args.from, eGetLocalChainInfoResult, (chainInfo = chainInfo, replicaChain = replicaChain, successorService = successorService);
|
|
}
|
|
|
|
on eReadReq do (readReq: tReadReq) {
|
|
var chainId: tChainId;
|
|
var targetId: tTargetId;
|
|
var chainInfo: tLocalReplicaChainInfo;
|
|
|
|
chainInfo = lookupLocalReplicaChain(readReq.key.vChainId.chainId);
|
|
|
|
if (chainInfo.status != ErrorCode_SUCCESS) {
|
|
send readReq.from, eReadResp, (tag = readReq.tag, key = readReq.key, status = ErrorCode_CHAIN_VERION_MISMATCH,
|
|
chunkMetadata = default(tChunkMetadata), dataBytes = default(tBytes));
|
|
return;
|
|
}
|
|
|
|
runningReadProcs += (new ReadProcess((
|
|
readReq = readReq,
|
|
chainInfo = chainInfo,
|
|
replicaChain = replicaChains[chainInfo.vChainId.chainId],
|
|
localTarget = localTargets[chainInfo.localTargetId],
|
|
localService = this)));
|
|
}
|
|
|
|
on eWriteReq do createWriteProcess;
|
|
|
|
// on eUpdateMsg do (updateMsg: tUpdateMsg) {
|
|
// var writeReq: tWriteReq;
|
|
// var chainInfo: tLocalReplicaChainInfo;
|
|
|
|
// writeReq = updateMsg.writeReq;
|
|
// writeReq.predecessor = updateMsg.from;
|
|
|
|
// chainInfo = lookupLocalReplicaChain(writeReq.key.vChainId.chainId);
|
|
|
|
// if (chainInfo.status != ErrorCode_SUCCESS || writeReq.key.vChainId != chainInfo.vChainId) {
|
|
// print format("{0}: reject update request: {1}, chainInfo: {2}", this, updateMsg, chainInfo);
|
|
// return;
|
|
// }
|
|
|
|
// createWriteProcess(writeReq);
|
|
// }
|
|
|
|
on eWriteReqCompleted do (context: tWriteContext) {
|
|
var writeReq: tWriteReq;
|
|
var writeProcess: WriteProcess;
|
|
var status : tErrorCode;
|
|
|
|
writeReq = context.request;
|
|
writeProcess = context.process;
|
|
status = context.status;
|
|
|
|
if (!(writeReq.key.chunkId in runningWriteProcs) ||
|
|
!(writeReq.tag in runningWriteProcs[writeReq.key.chunkId]) ||
|
|
writeProcess != runningWriteProcs[writeReq.key.chunkId][writeReq.tag])
|
|
{
|
|
print format("{0}: write process {1} has been removed from the running set",
|
|
this, writeProcess);
|
|
return;
|
|
}
|
|
|
|
if (status == ErrorCode_SUCCESS) {
|
|
if (writeReq.tag in committedWriteReqs)
|
|
print format("{0}: found write request in committedWriteReqs[tag:{1}] {2}", this, writeReq.tag, committedWriteReqs[writeReq.tag]);
|
|
else
|
|
committedWriteReqs += (writeReq.tag, (commitVer = writeReq.updateVer, commitChainVer = writeReq.commitChainVer));
|
|
}
|
|
|
|
print format("{0}: write completed, process: {1}, request: {2}", this, writeProcess, writeReq);
|
|
|
|
runningWriteProcs[writeReq.key.chunkId] -= (writeReq.tag);
|
|
runningWriteReqs[writeReq.key.chunkId] -= (writeReq.tag);
|
|
|
|
startBlockedWriteReqs(writeReq.key.chunkId, writeReq.updateVer);
|
|
}
|
|
}
|
|
|
|
state Offline {
|
|
defer eSyncStartReq, eSyncDoneReq, eNewRoutingInfo;
|
|
ignore eTimeOut, eSendHeartbeat, eShutDown,
|
|
eReadReq, eWriteReq, eUpdateMsg,
|
|
eWriteReqCompleted, eGetLocalChainInfo,
|
|
eReadWorkDone, eWriteWorkDone, eCommitWorkDone;
|
|
|
|
entry {
|
|
var chunkId: tChunkId;
|
|
var targetId: tTargetId;
|
|
var syncWorker: SyncWorker;
|
|
var readProcess: ReadProcess;
|
|
var writeProcess: WriteProcess;
|
|
|
|
restarting = false;
|
|
CancelTimer(timer);
|
|
|
|
foreach (readProcess in runningReadProcs) {
|
|
send readProcess, halt;
|
|
}
|
|
|
|
foreach (chunkId in keys(runningWriteProcs)) {
|
|
foreach (writeProcess in values(runningWriteProcs[chunkId])) {
|
|
send writeProcess, halt;
|
|
}
|
|
}
|
|
|
|
foreach (syncWorker in values(syncWorkers)) {
|
|
send syncWorker, halt;
|
|
}
|
|
|
|
foreach (targetId in keys(localTargets)) {
|
|
send localTargets[targetId], eShutDown, this;
|
|
}
|
|
|
|
send mgmtClient, eShutDown, this;
|
|
|
|
syncWorkers = default(map[tVersionedChainId, SyncWorker]);
|
|
runningReadProcs = default(set[ReadProcess]);
|
|
runningWriteProcs = default(map[tChunkId, map[tMessageTag, WriteProcess]]);
|
|
|
|
runningWriteReqs = default(map[tChunkId, map[tMessageTag, tWriteReq]]);
|
|
blockedWriteReqs = default(map[tChunkId, map[tMessageTag, tWriteReq]]);
|
|
committedWriteReqs = default(map[tMessageTag, (commitVer: tChunkVer, commitChainVer: tChainVer)]);
|
|
|
|
routingVer = default(tRoutingVer);
|
|
replicaChains = default(tReplicaChainMap);
|
|
storageServices = default(tStorageServiceMap);
|
|
|
|
predecessorNodeIds = default(map[tChainId, tNodeId]);
|
|
successorNodeIds = default(map[tChainId, tNodeId]);
|
|
successorTargetIds = default(map[tChainId, tTargetId]);
|
|
successorTargetStates = default(map[tChainId, tPublicTargetState]);
|
|
localTargetIds = default(map[tChainId, tTargetId]);
|
|
publicTargetStates = default(map[tTargetId, tPublicTargetState]);
|
|
|
|
foreach (targetId in keys(localTargets)) {
|
|
localTargetStates[targetId] = LocalTargetState_OFFLINE;
|
|
}
|
|
|
|
print format("{0} #{1} is offline", this, nodeId);
|
|
}
|
|
|
|
on eRestart goto WaitOfflineState with (from: machine) {
|
|
var targetId: tTargetId;
|
|
|
|
foreach (targetId in keys(localTargets)) {
|
|
send localTargets[targetId], eRestart, this;
|
|
receive {
|
|
case eStartUp: (target: machine) {
|
|
assert target == localTargets[targetId];
|
|
}
|
|
}
|
|
}
|
|
|
|
send mgmtClient, eRestart, this;
|
|
// send from, eStarted, routingVer;
|
|
|
|
print format("{0} #{1} is restarted by {2}", this, nodeId, from);
|
|
}
|
|
}
|
|
|
|
state WaitOfflineState {
|
|
defer eSyncStartReq, eSyncDoneReq;
|
|
ignore eTimeOut, eSendHeartbeat, eRestart,
|
|
eReadReq, eWriteReq, eUpdateMsg,
|
|
eWriteReqCompleted, eGetLocalChainInfo,
|
|
eReadWorkDone, eWriteWorkDone;
|
|
|
|
on eShutDown goto Offline with (from: machine) {
|
|
print format("{0}: node {1} is going to shutdown during restart", this, nodeId);
|
|
}
|
|
|
|
on eCommitWorkDone do (workDone: tCommitWorkDone) {
|
|
print format("{0}: chunk committed: {1}", this, workDone);
|
|
}
|
|
|
|
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
|
|
if (nodeId in routingInfo.offlineServices) {
|
|
processRoutingInfo(routingInfo);
|
|
commitChunksOnLastServingTarget(routingInfo);
|
|
goto WaitOnlineState;
|
|
} else {
|
|
print format("{0}: current node {1} not offline in routing info: {2}", this, nodeId, routingInfo);
|
|
goto WaitOfflineState;
|
|
}
|
|
}
|
|
}
|
|
|
|
state WaitOnlineState {
|
|
defer eSyncStartReq, eSyncDoneReq,
|
|
eReadReq, eWriteReq, eUpdateMsg;
|
|
ignore eTimeOut, eRestart,
|
|
eWriteReqCompleted, eGetLocalChainInfo,
|
|
eReadWorkDone, eWriteWorkDone, eCommitWorkDone;
|
|
|
|
entry {
|
|
var targetId: tTargetId;
|
|
foreach (targetId in keys(localTargets)) {
|
|
localTargetStates[targetId] = LocalTargetState_ONLINE;
|
|
}
|
|
restarting = true;
|
|
}
|
|
|
|
on eShutDown goto Offline with (from: machine) {
|
|
print format("{0}: node {1} is going to shutdown during restart", this, nodeId);
|
|
}
|
|
|
|
on eSendHeartbeat do reportTargetStates;
|
|
|
|
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
|
|
processRoutingInfo(routingInfo);
|
|
if (nodeId in routingInfo.offlineServices) {
|
|
print format("{0}: current node {1} still offline in routing info: {2}", this, nodeId, routingInfo);
|
|
goto WaitOnlineState;
|
|
} else {
|
|
goto WaitForRequests;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
machine SyncWorker {
|
|
var nodeId: tNodeId;
|
|
var vChainId: tVersionedChainId;
|
|
var localTarget: StorageTarget;
|
|
var localService: StorageService;
|
|
var successorTargetId: tTargetId;
|
|
var successorService: StorageService;
|
|
var localSyncInfo: tTargetSyncInfo;
|
|
var remoteSyncInfo: tTargetSyncInfo;
|
|
var nextRequestId: tRequestId;
|
|
var timer: Timer;
|
|
var syncInfoDelay: int; // simulate a temporal delay between remote and local sync infos
|
|
|
|
fun newMessageTag(): tMessageTag {
|
|
nextRequestId = nextRequestId + 1;
|
|
return (nodeId = nodeId, requestId = nextRequestId);
|
|
}
|
|
|
|
fun syncChunkData(chunkId: tChunkId): tErrorCode {
|
|
var dummyWrite: tWriteReq;
|
|
var status: tErrorCode;
|
|
|
|
// create a dummy write to trigger a sync update for the chunk
|
|
dummyWrite = (from = this,
|
|
retries = 0,
|
|
tag = newMessageTag(),
|
|
key = (vChainId = vChainId, chunkId = chunkId),
|
|
updateVer = 0,
|
|
commitChainVer = 0,
|
|
fullChunkReplace = false, /* a full chunk replace could be triggered in the write process */
|
|
removeChunk = false,
|
|
fromClient = false,
|
|
offset = 0,
|
|
length = 0,
|
|
dataBytes = default(tBytes));
|
|
send localService, eWriteReq, dummyWrite;
|
|
|
|
receive {
|
|
case eCommitMsg: (resp: tCommitMsg) {
|
|
if (resp.status != ErrorCode_SUCCESS) {
|
|
print format("error in sync chunk resp: {0}", resp);
|
|
}
|
|
|
|
assert resp.retries == 0, format("{0}: retries not equal to zero, commitMsg: {1}", this, resp);
|
|
status = resp.status;
|
|
}
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
start state Init {
|
|
entry (args: (nodeId: tNodeId, vChainId: tVersionedChainId,
|
|
localTarget: StorageTarget, localService: StorageService,
|
|
successorTargetId: tTargetId, successorService: StorageService))
|
|
{
|
|
nodeId = args.nodeId;
|
|
vChainId = args.vChainId;
|
|
localTarget = args.localTarget;
|
|
localService = args.localService;
|
|
successorTargetId = args.successorTargetId;
|
|
successorService = args.successorService;
|
|
timer = new Timer(this);
|
|
syncInfoDelay = 7;
|
|
goto GetRemoteSyncInfo;
|
|
}
|
|
}
|
|
|
|
state GetRemoteSyncInfo {
|
|
entry {
|
|
if (syncInfoDelay == 0) {
|
|
send successorService, eSyncStartReq, (from = this, vChainId = vChainId, targetId = successorTargetId);
|
|
receive {
|
|
case eSyncStartResp: (syncStartResp: tSyncStartResp) {
|
|
if (syncStartResp.status == ErrorCode_SUCCESS) {
|
|
print format("{0}: start sync, chain {1}, localTarget {2}, localService {3}, successorService {4}",
|
|
this, vChainId, localTarget, localService, successorService);
|
|
remoteSyncInfo = syncStartResp.syncInfo;
|
|
CancelTimer(timer);
|
|
syncInfoDelay = 7; // restart count down
|
|
goto GetLocalSyncInfo;
|
|
} else {
|
|
print format("{0}: failed to start sync, chain {1}, localTarget {2}, localService {3}, successorService {4}",
|
|
this, vChainId, localTarget, localService, successorService);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
syncInfoDelay = syncInfoDelay - 1;
|
|
}
|
|
|
|
StartTimer(timer);
|
|
}
|
|
|
|
on eTimeOut goto GetRemoteSyncInfo;
|
|
|
|
// on eSyncStartResp do (syncStartResp: tSyncStartResp) {
|
|
// if (syncStartResp.status != ErrorCode_SUCCESS) {
|
|
// print format("chain {0}: cannot start sync {1}", vChainId, syncStartResp);
|
|
// } else {
|
|
// remoteSyncInfo = syncStartResp.syncInfo;
|
|
// CancelTimer(timer);
|
|
// syncInfoDelay = 7; // restart count down
|
|
// goto GetLocalSyncInfo;
|
|
// }
|
|
// }
|
|
}
|
|
|
|
state GetLocalSyncInfo {
|
|
ignore eSyncStartResp;
|
|
|
|
entry {
|
|
if (syncInfoDelay == 0) {
|
|
localSyncInfo = GetTargetSyncInfo(this, localTarget);
|
|
goto SyncChunks;
|
|
} else {
|
|
syncInfoDelay = syncInfoDelay - 1;
|
|
StartTimer(timer);
|
|
}
|
|
}
|
|
|
|
on eTimeOut goto GetLocalSyncInfo;
|
|
}
|
|
|
|
state SyncChunks {
|
|
ignore eTimeOut;
|
|
|
|
entry {
|
|
var chunkId: tChunkId;
|
|
var status: tErrorCode;
|
|
var localChunk: tChunkSnapshot;
|
|
var remoteChunk: tChunkSnapshot;
|
|
var localMetadata: tChunkMetadata;
|
|
var remoteMetadata: tChunkMetadata;
|
|
var chunksNeedSync: set[tChunkId];
|
|
|
|
foreach (chunkId in keys(remoteSyncInfo.chunkStoreSnapshot)) {
|
|
remoteChunk = remoteSyncInfo.chunkStoreSnapshot[chunkId];
|
|
remoteMetadata = remoteChunk.metadata;
|
|
|
|
if (!(chunkId in keys(localSyncInfo.chunkStoreSnapshot))) {
|
|
print format("{0}: try to remove chunk {1} on peer target {2}", this, remoteMetadata, successorTargetId);
|
|
chunksNeedSync += (chunkId);
|
|
continue;
|
|
}
|
|
|
|
localChunk = localSyncInfo.chunkStoreSnapshot[chunkId];
|
|
localMetadata = localChunk.metadata;
|
|
|
|
if (localMetadata.chainVer > remoteMetadata.chainVer) {
|
|
print format("{0}: sync local chunk {1} to peer target {2}, remote chunk: {3}",
|
|
this, localMetadata, successorTargetId, remoteMetadata);
|
|
chunksNeedSync += (chunkId);
|
|
} else if (remoteMetadata.updateVer != remoteMetadata.commitVer) {
|
|
assert remoteMetadata.chunkState != ChunkState_COMMIT, format("unexpected chunk state: {0}", remoteMetadata);
|
|
print format("{0}: sync local chunk {1} to peer target {2}, uncommitted remote chunk: {3}",
|
|
this, localMetadata, successorTargetId, remoteMetadata);
|
|
chunksNeedSync += (chunkId);
|
|
} else if (localMetadata.chainVer < remoteMetadata.chainVer) {
|
|
if (localMetadata.chunkState == ChunkState_COMMIT)
|
|
assert false, format("chainVer of localMetadata {0} less than chainVer of remoteMetadata {1}", localMetadata, remoteMetadata);
|
|
else
|
|
print format("{0}: local chunk being updated on stale chain version: {1}, localMetadata: {2}, remoteMetadata: {3}", this, vChainId, localMetadata, remoteMetadata);
|
|
} else if (localMetadata.updateVer != remoteMetadata.commitVer) { // localMetadata.chainVer == remoteMetadata.chainVer
|
|
if (localMetadata.chainVer != vChainId.chainVer && localMetadata.chunkState == ChunkState_COMMIT)
|
|
assert false, format("updateVer of localMetadata {0} not equal to commitVer of remoteMetadata {1}", localMetadata, remoteMetadata);
|
|
else
|
|
print format("{0}: local chunk being updated on chain version: {1}, localMetadata: {2}, remoteMetadata: {3}", this, vChainId, localMetadata, remoteMetadata);
|
|
} else if (localChunk.content != remoteChunk.content) {
|
|
if (localMetadata.chainVer != vChainId.chainVer || remoteMetadata.chainVer != vChainId.chainVer)
|
|
assert false, format("local chunk content not equal to remote content, localChunk: {0}, remoteChunk: {1}", localChunk, remoteChunk);
|
|
else
|
|
print format("{0}: local chunk being removed and updated on chain version: {1}, localChunk: {2}, remoteChunk: {3}", this, vChainId, localChunk, remoteChunk);
|
|
} else {
|
|
print format("skip syncing chunk {0}, localMetadata: {1}, remoteMetadata: {2}", chunkId, localMetadata, remoteMetadata);
|
|
}
|
|
}
|
|
|
|
foreach (chunkId in keys(localSyncInfo.chunkStoreSnapshot)) {
|
|
if (!(chunkId in remoteSyncInfo.chunkStoreSnapshot)) {
|
|
localMetadata = localSyncInfo.chunkStoreSnapshot[chunkId].metadata;
|
|
print format("{0}: sync local chunk {1} to peer target {2}, remote chunk: not found",
|
|
this, localMetadata, successorTargetId);
|
|
chunksNeedSync += (chunkId);
|
|
}
|
|
}
|
|
|
|
foreach (chunkId in chunksNeedSync) {
|
|
status = syncChunkData(chunkId);
|
|
if (status != ErrorCode_SUCCESS) {
|
|
print format("{0}: stop syncing, error: {1}", this, status);
|
|
raise halt;
|
|
}
|
|
}
|
|
|
|
goto SyncDone;
|
|
}
|
|
}
|
|
|
|
state SyncDone {
|
|
ignore eSyncStartResp;
|
|
|
|
entry {
|
|
print format("{0} sync loop stopped: chain {1} local target {2} successor {3}", this, vChainId, localTarget, successorService);
|
|
send successorService, eSyncDoneReq, (from = this, vChainId = vChainId, targetId = successorTargetId);
|
|
receive {
|
|
case eSyncDoneResp: (syncDoneResp: tSyncDoneResp) {
|
|
if (syncDoneResp.status == ErrorCode_SUCCESS) {
|
|
print format("{0}: sync done: {1}", this, syncDoneResp);
|
|
} else {
|
|
print format("{0}: sync failed: {1}", this, syncDoneResp);
|
|
}
|
|
raise halt;
|
|
}
|
|
}
|
|
}
|
|
|
|
// on eTimeOut goto SyncDone;
|
|
|
|
// on eSyncDoneResp do (syncDoneResp: tSyncDoneResp) {
|
|
// if (syncDoneResp.status == ErrorCode_SUCCESS) {
|
|
// print format("{0}: sync done: {1}", this, syncDoneResp);
|
|
// } else {
|
|
// print format("{0}: sync failed: {1}", this, syncDoneResp);
|
|
// }
|
|
|
|
// CancelTimer(timer);
|
|
// raise halt;
|
|
// }
|
|
}
|
|
}
|
|
|
|
// public interfaces of storage engine
|
|
|
|
enum tLocalTargetState {
|
|
LocalTargetState_INVALID = 0, // invalid state
|
|
LocalTargetState_UPTODATE = 1, // ready to serve client requests
|
|
LocalTargetState_ONLINE = 2, // waiting or syncing
|
|
LocalTargetState_OFFLINE = 4 // crashed or stopped
|
|
}
|
|
|
|
fun LocalTargetStateToString(x: tLocalTargetState): string {
|
|
if (x == LocalTargetState_INVALID) return "INVALID";
|
|
if (x == LocalTargetState_UPTODATE) return "UPTODATE";
|
|
if (x == LocalTargetState_ONLINE) return "ONLINE";
|
|
if (x == LocalTargetState_OFFLINE) return "OFFLINE";
|
|
return format("UNKNOWN {0}", x);
|
|
}
|
|
|
|
fun LocalTargetStatesToString(targetStates: map[tTargetId, tLocalTargetState]): string {
|
|
var targetId: tTargetId;
|
|
var str: string;
|
|
|
|
foreach (targetId in keys(targetStates)) {
|
|
if (str != "") str = str + ", ";
|
|
str = str + format("<{0}->{1}>", targetId, LocalTargetStateToString(targetStates[targetId]));
|
|
}
|
|
|
|
return str;
|
|
}
|
|
|
|
type tLocalTargetStateMap = map[tTargetId, tLocalTargetState];
|
|
|
|
type tReadWork = (from: machine, chunkId: tChunkId, offset: int, length: int, readUncommitted: bool);
|
|
event eReadWork : tReadWork;
|
|
|
|
type tReadWorkDone = (status: tErrorCode, targetId: tTargetId, chunkMetadata: tChunkMetadata, dataBytes: tBytes);
|
|
event eReadWorkDone : tReadWorkDone;
|
|
|
|
type tWriteWork = (from: machine, tag: tMessageTag, key: tGlobalKey, targetId: tTargetId, updateVer: tChunkVer, chainVer: tChainVer,
|
|
fullChunkReplace: bool, removeChunk: bool,
|
|
offset: int, length: int, dataBytes: tBytes);
|
|
event eWriteWork : tWriteWork;
|
|
|
|
type tWriteWorkDone = (caller: machine, tag: tMessageTag, key: tGlobalKey, targetId: tTargetId,
|
|
status: tErrorCode, updateVer: tChunkVer, commitVer: tChunkVer, chainVer: tChainVer, currentChunkContent: tBytes);
|
|
event eWriteWorkDone : tWriteWorkDone;
|
|
|
|
type tCommitWork = (from: machine, commitMsg: tCommitMsg, key: tGlobalKey, targetId: tTargetId, commitVer: tChunkVer, commitChainVer: tChainVer, force: bool);
|
|
event eCommitWork : tCommitWork;
|
|
|
|
type tCommitWorkDone = (caller: machine, commitMsg: tCommitMsg, key: tGlobalKey, targetId: tTargetId, force: bool,
|
|
status: tErrorCode, updateVer: tChunkVer, commitVer: tChunkVer, chainVer: tChainVer, removeChunk: bool);
|
|
event eCommitWorkDone : tCommitWorkDone;
|
|
|
|
event eGetTargetState : machine;
|
|
event eGetTargetStateResult : tLocalTargetState;
|
|
type tSetTargetState = (from: machine, targetState: tLocalTargetState);
|
|
event eSetTargetState : tSetTargetState;
|
|
event eSetTargetStateResult : tLocalTargetState;
|
|
|
|
event eGetTargetSyncInfo : machine;
|
|
event eGetTargetSyncInfoResult : tTargetSyncInfo;
|
|
|
|
machine StorageTarget {
|
|
var targetId: tTargetId;
|
|
var chunkSize: int;
|
|
var lastServeChainVer: tChainVer;
|
|
var targetState: tLocalTargetState;
|
|
var chunkStore: map[tChunkId, ChunkReplica];
|
|
var localService: machine;
|
|
|
|
fun getOrCreateChunkReplica(chunkId: tChunkId, createIfNotExists: bool): ChunkReplica {
|
|
var chunkReplica: ChunkReplica;
|
|
|
|
if (!(chunkId in chunkStore)) {
|
|
if (!createIfNotExists) {
|
|
return default(ChunkReplica);
|
|
}
|
|
|
|
chunkStore += (chunkId,
|
|
new ChunkReplica((chunkId = chunkId, chunkSize = chunkSize)));
|
|
print format("{0}: created a new chunk {1}: {2}", this, chunkId, chunkStore[chunkId]);
|
|
}
|
|
|
|
chunkReplica = chunkStore[chunkId];
|
|
return chunkReplica;
|
|
}
|
|
|
|
fun removeAndHaltChunkReplica(chunkId: tChunkId) {
|
|
var chunkReplica: ChunkReplica;
|
|
|
|
chunkReplica = getOrCreateChunkReplica(chunkId, false);
|
|
print format("{0}: remove chunk #{1} {2}", this, chunkId, chunkReplica);
|
|
if (chunkReplica != default(ChunkReplica)) {
|
|
chunkStore -= (chunkId);
|
|
send chunkReplica, halt;
|
|
}
|
|
}
|
|
|
|
fun processWriteFinishResult(writeFinishRes: tWriteOpFinishResult) {
|
|
var writeWork: tWriteWork;
|
|
var i: int;
|
|
|
|
writeWork = writeFinishRes.writeWork;
|
|
|
|
if (writeFinishRes.status != ErrorCode_SUCCESS) {
|
|
send writeWork.from, eWriteWorkDone, (
|
|
caller = writeWork.from, tag = writeWork.tag, key = writeWork.key, targetId = writeWork.targetId, status = writeFinishRes.status,
|
|
updateVer = writeFinishRes.updateVer, commitVer = writeFinishRes.commitVer, chainVer = writeFinishRes.chainVer, currentChunkContent = writeFinishRes.currentChunkContent);
|
|
return;
|
|
}
|
|
|
|
send writeWork.from, eWriteWorkDone, (
|
|
caller = writeWork.from, tag = writeWork.tag, key = writeWork.key, targetId = writeWork.targetId, status = ErrorCode_SUCCESS,
|
|
updateVer = writeFinishRes.updateVer, commitVer = writeFinishRes.commitVer, chainVer = writeFinishRes.chainVer, currentChunkContent = writeFinishRes.currentChunkContent);
|
|
}
|
|
|
|
fun processCommitOpResult(commitOpResult: tCommitOpResult) {
|
|
var commitWork: tCommitWork;
|
|
var chunkReplica: ChunkReplica;
|
|
|
|
commitWork = commitOpResult.commitWork;
|
|
|
|
if (commitOpResult.removeChunk &&
|
|
(commitOpResult.status == ErrorCode_SUCCESS ||
|
|
commitOpResult.status == ErrorCode_CHUNK_STALE_COMMIT))
|
|
{
|
|
removeAndHaltChunkReplica(commitWork.key.chunkId);
|
|
}
|
|
|
|
send commitWork.from, eCommitWorkDone, (caller = commitWork.from, commitMsg = commitWork.commitMsg,
|
|
key = commitWork.key, targetId = commitWork.targetId, force = commitWork.force, status = commitOpResult.status,
|
|
updateVer = commitOpResult.updateVer, commitVer = commitOpResult.commitVer, chainVer = commitOpResult.chainVer, removeChunk = commitOpResult.removeChunk);
|
|
}
|
|
|
|
start state Init {
|
|
entry (args: (targetId: tTargetId, chunkSize: int)) {
|
|
targetId = args.targetId;
|
|
chunkSize = args.chunkSize;
|
|
targetState = LocalTargetState_UPTODATE;
|
|
goto WaitForWorks;
|
|
}
|
|
}
|
|
|
|
state StartUp {
|
|
entry {
|
|
targetState = LocalTargetState_ONLINE;
|
|
|
|
if (localService != default(machine))
|
|
send localService, eStartUp, this;
|
|
|
|
goto WaitForWorks;
|
|
}
|
|
}
|
|
|
|
state WaitForWorks {
|
|
on eShutDown goto Offline with (from: machine) {
|
|
print format("{0} with target id {1} is going to offline", this, targetId);
|
|
}
|
|
|
|
on eGetTargetState do (from: machine) {
|
|
send from, eGetTargetStateResult, targetState;
|
|
}
|
|
|
|
on eSetTargetState do (setTargetState: tSetTargetState) {
|
|
targetState = setTargetState.targetState;
|
|
send setTargetState.from, eSetTargetStateResult, targetState;
|
|
}
|
|
|
|
on eGetTargetSyncInfo do (from: machine) {
|
|
var chunkId: tChunkId;
|
|
var getChunkSnapshotRes: tGetChunkSnapshotResult;
|
|
var chunkSnapshot: tChunkSnapshot;
|
|
var chunkMetadata: tChunkMetadata;
|
|
var chunkStoreSnapshot: map[tChunkId, tChunkSnapshot];
|
|
|
|
foreach (chunkId in keys(chunkStore)) {
|
|
send chunkStore[chunkId], eGetChunkSnapshot, (from = this, chunkId = chunkId);
|
|
|
|
receive {
|
|
case eGetChunkSnapshotResult: (result: tGetChunkSnapshotResult) {
|
|
getChunkSnapshotRes = result;
|
|
}
|
|
}
|
|
|
|
if (getChunkSnapshotRes.status == ErrorCode_SUCCESS) {
|
|
chunkSnapshot = getChunkSnapshotRes.chunkSnapshot;
|
|
chunkMetadata = chunkSnapshot.metadata;
|
|
chunkStoreSnapshot += (chunkId, chunkSnapshot);
|
|
|
|
if (chunkMetadata.updateVer != chunkMetadata.commitVer) {
|
|
print format("found an uncommitted chunk {0}, metadata: {1}", chunkId, chunkMetadata);
|
|
// if a normal write updates and commits a chunk during syncing, its chain version could be greater than the lastServeChainVer
|
|
// so the following assertion is not applicable
|
|
// assert lastServeChainVer >= chunkMetadata.chainVer, format("lastServeChainVer {0} < uncommitted chunk chainVer {1}", lastServeChainVer, chunkMetadata.chainVer);
|
|
}
|
|
}
|
|
}
|
|
|
|
send from, eGetTargetSyncInfoResult, (targetId = targetId, targetState = targetState, lastServeChainVer = lastServeChainVer,
|
|
chunkStoreSnapshot = chunkStoreSnapshot);
|
|
}
|
|
|
|
on eReadWork do (readWork: tReadWork) {
|
|
var chunkReplica: ChunkReplica;
|
|
|
|
chunkReplica = getOrCreateChunkReplica(readWork.chunkId, false);
|
|
|
|
if (chunkReplica == default(ChunkReplica)) {
|
|
send readWork.from, eReadWorkDone, (status = ErrorCode_CHUNK_NOT_FOUND, targetId = targetId,
|
|
chunkMetadata = default(tChunkMetadata), dataBytes = default(tBytes));
|
|
return;
|
|
}
|
|
|
|
send chunkReplica, eReadOp, (from = this, readWork = readWork,
|
|
chunkId = readWork.chunkId, offset = readWork.offset, length = readWork.length);
|
|
}
|
|
|
|
on eReadOpResult do (readOpRes: tReadOpResult) {
|
|
send readOpRes.readWork.from, eReadWorkDone, (status = readOpRes.status, targetId = targetId,
|
|
chunkMetadata = readOpRes.chunkMetadata, dataBytes = readOpRes.dataBytes);
|
|
}
|
|
|
|
on eWriteWork do (writeWork: tWriteWork) {
|
|
var chunkReplica: ChunkReplica;
|
|
|
|
if (targetState == LocalTargetState_UPTODATE) {
|
|
if (lastServeChainVer < writeWork.key.vChainId.chainVer)
|
|
lastServeChainVer = writeWork.key.vChainId.chainVer;
|
|
}
|
|
|
|
if (writeWork.removeChunk) {
|
|
chunkReplica = getOrCreateChunkReplica(writeWork.key.chunkId, false);
|
|
if (chunkReplica == default(ChunkReplica)) {
|
|
send writeWork.from, eWriteWorkDone, (
|
|
caller = writeWork.from, tag = writeWork.tag, key = writeWork.key, targetId = writeWork.targetId, status = ErrorCode_SUCCESS,
|
|
updateVer = writeWork.updateVer, commitVer = 0, chainVer = writeWork.key.vChainId.chainVer, currentChunkContent = default(tBytes));
|
|
return;
|
|
}
|
|
}
|
|
|
|
chunkReplica = getOrCreateChunkReplica(writeWork.key.chunkId, true);
|
|
assert chunkReplica != default(ChunkReplica);
|
|
|
|
send chunkReplica, eWriteOpBegin, (from = this, writeWork = writeWork, tag = writeWork.tag, key = writeWork.key,
|
|
updateVer = writeWork.updateVer, chainVer = writeWork.chainVer,
|
|
fullChunkReplace = writeWork.fullChunkReplace, removeChunk = writeWork.removeChunk);
|
|
}
|
|
|
|
on eCommitWork do (commitWork: tCommitWork) {
|
|
var chunkReplica: ChunkReplica;
|
|
|
|
chunkReplica = getOrCreateChunkReplica(commitWork.key.chunkId, false);
|
|
|
|
if (chunkReplica == default(ChunkReplica)) {
|
|
send commitWork.from, eCommitWorkDone, (caller = commitWork.from, commitMsg = commitWork.commitMsg,
|
|
key = commitWork.key, targetId = commitWork.targetId, force = commitWork.force, status = ErrorCode_CHUNK_NOT_FOUND,
|
|
updateVer = commitWork.commitVer, commitVer = commitWork.commitVer, chainVer = commitWork.key.vChainId.chainVer, removeChunk = true);
|
|
return;
|
|
}
|
|
|
|
send chunkReplica, eCommitOp, (from = this, commitWork = commitWork, key = commitWork.key, commitVer = commitWork.commitVer, commitChainVer = commitWork.commitChainVer, force = commitWork.force);
|
|
}
|
|
|
|
on eWriteOpFinishResult do processWriteFinishResult;
|
|
|
|
on eCommitOpResult do processCommitOpResult;
|
|
}
|
|
|
|
state Offline {
|
|
ignore eShutDown, eReadOpResult, eSetTargetState, eGetTargetSyncInfo;
|
|
|
|
entry {
|
|
var chunkId: tChunkId;
|
|
|
|
print format("{0} with target id {1} is offline", this, targetId);
|
|
targetState = LocalTargetState_OFFLINE;
|
|
|
|
foreach (chunkId in keys(chunkStore)) {
|
|
send chunkStore[chunkId], eShutDown, this;
|
|
}
|
|
}
|
|
|
|
on eWriteOpFinishResult do processWriteFinishResult;
|
|
|
|
on eCommitOpResult do processCommitOpResult;
|
|
|
|
on eGetTargetState do (from: machine) {
|
|
send from, eGetTargetStateResult, LocalTargetState_OFFLINE;
|
|
}
|
|
|
|
on eReadWork do (readWork: tReadWork) {
|
|
send readWork.from, eReadWorkDone, (status = ErrorCode_TARGET_OFFLINE, targetId = targetId,
|
|
chunkMetadata = default(tChunkMetadata), dataBytes = default(tBytes));
|
|
}
|
|
|
|
on eWriteWork do (writeWork: tWriteWork) {
|
|
send writeWork.from, eWriteWorkDone, (
|
|
caller = writeWork.from, tag = writeWork.tag, key = writeWork.key, targetId = writeWork.targetId,
|
|
status = ErrorCode_TARGET_OFFLINE, updateVer = 0, commitVer = 0, chainVer = 0, currentChunkContent = default(tBytes));
|
|
}
|
|
|
|
on eCommitWork do (commitWork: tCommitWork) {
|
|
send commitWork.from, eCommitWorkDone, (caller = commitWork.from, commitMsg = commitWork.commitMsg,
|
|
key = commitWork.key, targetId = commitWork.targetId, force = commitWork.force,
|
|
status = ErrorCode_TARGET_OFFLINE, updateVer = 0, commitVer = 0, chainVer = 0, removeChunk = false);
|
|
}
|
|
|
|
on eRestart goto StartUp with (from: machine) {
|
|
var chunkId: tChunkId;
|
|
var chunkReplica: ChunkReplica;
|
|
|
|
foreach (chunkId in keys(chunkStore)) {
|
|
chunkReplica = chunkStore[chunkId];
|
|
send chunkReplica, eRestart, this;
|
|
}
|
|
|
|
localService = from;
|
|
print format("{0} #{1} is restarted by {2}", this, targetId, from);
|
|
}
|
|
}
|
|
}
|
|
|
|
fun SetTargetState(caller: machine, storageTarget: StorageTarget, targetState: tLocalTargetState)
|
|
: tLocalTargetState
|
|
{
|
|
send storageTarget, eSetTargetState, (from = caller, targetState = targetState);
|
|
receive {
|
|
case eSetTargetStateResult: (result: tLocalTargetState) { targetState = result; }
|
|
}
|
|
|
|
return targetState;
|
|
}
|
|
|
|
fun GetTargetState(caller: machine, storageTarget: StorageTarget)
|
|
: tLocalTargetState
|
|
{
|
|
var targetState: tLocalTargetState;
|
|
|
|
send storageTarget, eGetTargetState, caller;
|
|
receive {
|
|
case eGetTargetStateResult: (result: tLocalTargetState) { targetState = result; }
|
|
}
|
|
|
|
return targetState;
|
|
}
|
|
|
|
fun GetTargetSyncInfo(caller: machine, storageTarget: StorageTarget): tTargetSyncInfo {
|
|
var syncInfo: tTargetSyncInfo;
|
|
|
|
send storageTarget, eGetTargetSyncInfo, caller;
|
|
receive {
|
|
case eGetTargetSyncInfoResult: (result: tTargetSyncInfo) { syncInfo = result; }
|
|
}
|
|
|
|
return syncInfo;
|
|
}
|
|
|
|
type tReadOp = (from: machine, readWork: tReadWork, chunkId: tChunkId, offset: int, length: int);
|
|
type tReadOpResult = (readWork: tReadWork, status: tErrorCode, chunkMetadata: tChunkMetadata, dataBytes: tBytes);
|
|
event eReadOp : tReadOp;
|
|
event eReadOpResult : tReadOpResult;
|
|
|
|
type tGetChunkSnapshot = (from: machine, chunkId: tChunkId);
|
|
type tGetChunkSnapshotResult = (status: tErrorCode, chunkSnapshot: tChunkSnapshot);
|
|
event eGetChunkSnapshot : tGetChunkSnapshot;
|
|
event eGetChunkSnapshotResult : tGetChunkSnapshotResult;
|
|
|
|
type tWriteOp = (from: machine, writeWork: tWriteWork, key: tGlobalKey, offset: int, length: int, dataBytes: tBytes, fullChunkReplace: bool, removeChunk: bool);
|
|
type tWriteOpBegin = (from: machine, writeWork: tWriteWork, tag: tMessageTag, key: tGlobalKey, updateVer: tChunkVer, chainVer: tChainVer, fullChunkReplace: bool, removeChunk: bool);
|
|
event eWriteOpBegin : tWriteOpBegin;
|
|
|
|
type tWriteOpFinish = (from: machine, writeWork: tWriteWork, key: tGlobalKey);
|
|
type tWriteOpFinishResult = (writeWork: tWriteWork, status: tErrorCode, updateVer: tChunkVer, commitVer: tChunkVer, chainVer: tChainVer, currentChunkContent: tBytes);
|
|
event eWriteOpFinishResult : tWriteOpFinishResult;
|
|
|
|
type tCommitOp = (from: machine, commitWork: tCommitWork, key: tGlobalKey, commitVer: tChunkVer, commitChainVer: tChainVer, force: bool);
|
|
type tCommitOpResult = (commitWork: tCommitWork, status: tErrorCode, updateVer: tChunkVer, commitVer: tChunkVer, chainVer: tChainVer, removeChunk: bool);
|
|
event eCommitOp : tCommitOp;
|
|
event eCommitOpResult : tCommitOpResult;
|
|
|
|
enum tChunkState {
|
|
ChunkState_DIRTY,
|
|
ChunkState_CLEAN,
|
|
ChunkState_COMMIT
|
|
}
|
|
|
|
type tChunkDebugInfo = (lastWriteReqTag: tMessageTag);
|
|
type tChunkMetadata = (chunkId: tChunkId, chunkSize: int,
|
|
chainVer: tChainVer, // the chain version on which the chunk is committed
|
|
commitVer: tChunkVer,
|
|
updateVer: tChunkVer,
|
|
chunkState: tChunkState,
|
|
debugInfo: tChunkDebugInfo);
|
|
|
|
fun CreateChunkMetadata(chunkId: tChunkId, chunkSize: int) : tChunkMetadata {
|
|
var metadata: tChunkMetadata;
|
|
metadata = (chunkId = chunkId, chunkSize = chunkSize,
|
|
chainVer = 0,
|
|
commitVer = 0,
|
|
updateVer = 0,
|
|
chunkState = ChunkState_CLEAN,
|
|
debugInfo = default(tChunkDebugInfo));
|
|
return metadata;
|
|
}
|
|
|
|
machine ChunkReplica {
|
|
// on-disk chunk state
|
|
var content: tBytes; // it's a removed but not committed chunk if content equals to default(tBytes)
|
|
var metadata: tChunkMetadata;
|
|
|
|
fun processCommitOp(commitOp: tCommitOp) {
|
|
var commitRes: tErrorCode;
|
|
var removeChunk: bool;
|
|
|
|
assert commitOp.force || metadata.updateVer >= commitOp.commitVer,
|
|
format("metadata: {0}, commitOp: {1}", metadata, commitOp);
|
|
print format("{0}: commit chunk: metadata {1}, content {2}", this, metadata, content);
|
|
|
|
if (commitOp.force) {
|
|
assert metadata.commitVer <= commitOp.commitVer && commitOp.commitVer <= metadata.updateVer;
|
|
metadata.chunkState = ChunkState_CLEAN;
|
|
metadata.commitVer = commitOp.commitVer;
|
|
commitRes = ErrorCode_SUCCESS;
|
|
} else if (metadata.chunkState == ChunkState_DIRTY) {
|
|
commitRes = ErrorCode_CHUNK_NOT_CLEAN;
|
|
} else if (metadata.commitVer < commitOp.commitVer) {
|
|
metadata.commitVer = commitOp.commitVer;
|
|
commitRes = ErrorCode_SUCCESS;
|
|
} else {
|
|
commitRes = ErrorCode_CHUNK_STALE_COMMIT;
|
|
}
|
|
|
|
if (metadata.commitVer == metadata.updateVer) {
|
|
metadata.chunkState = ChunkState_COMMIT;
|
|
metadata.chainVer = commitOp.commitChainVer;
|
|
}
|
|
|
|
if (content == default(tBytes))
|
|
removeChunk = true;
|
|
|
|
// since the client may expect commitVer to be monitonically increasing without gaps,
|
|
// send commitOp.commitVer instead of metadata.commitVer as result,
|
|
send commitOp.from, eCommitOpResult, (commitWork = commitOp.commitWork, status = commitRes,
|
|
updateVer = metadata.updateVer, commitVer = commitOp.commitVer, chainVer = commitOp.key.vChainId.chainVer, removeChunk = removeChunk);
|
|
|
|
if (removeChunk) {
|
|
assert commitOp.commitVer == metadata.updateVer && metadata.chunkState == ChunkState_COMMIT,
|
|
format("{0}: try to commit removed chunk but found uncommitted chunk state: {1}", this, metadata);
|
|
print format("{0}: remove chunk committed: {1}", this, metadata);
|
|
goto RemoveChunkCommitted;
|
|
}
|
|
}
|
|
|
|
start state Init {
|
|
entry (args: (chunkId: tChunkId, chunkSize: int)) {
|
|
var i: int;
|
|
|
|
while (i < args.chunkSize) {
|
|
content += (i, 0);
|
|
i = i + 1;
|
|
}
|
|
|
|
metadata = CreateChunkMetadata(args.chunkId, args.chunkSize);
|
|
|
|
goto WaitForOps;
|
|
}
|
|
}
|
|
|
|
state WaitForOps {
|
|
|
|
on eShutDown goto Offline with (from: machine) {
|
|
print format("{0} with chunk id {1} is going to offline", this, metadata.chunkId);
|
|
}
|
|
|
|
on eCommitOp do processCommitOp;
|
|
|
|
on eReadOp do (readOp: tReadOp) {
|
|
var dataBytes: tBytes;
|
|
var i: int;
|
|
|
|
if (!readOp.readWork.readUncommitted && metadata.updateVer != metadata.commitVer) {
|
|
send readOp.from, eReadOpResult, (readWork = readOp.readWork, status = ErrorCode_CHUNK_NOT_COMMIT,
|
|
chunkMetadata = metadata, dataBytes = default(tBytes));
|
|
return;
|
|
}
|
|
|
|
i = 0;
|
|
while (i < readOp.length && readOp.offset + i < sizeof(content)) {
|
|
dataBytes += (i, content[readOp.offset + i]);
|
|
i = i + 1;
|
|
}
|
|
|
|
send readOp.from, eReadOpResult,
|
|
(readWork = readOp.readWork, status = ErrorCode_SUCCESS, chunkMetadata = metadata, dataBytes = dataBytes);
|
|
}
|
|
|
|
on eGetChunkSnapshot do (getSnapshot: tGetChunkSnapshot) {
|
|
var chunkSnapshot: tChunkSnapshot;
|
|
chunkSnapshot = (metadata = metadata, content = content);
|
|
send getSnapshot.from, eGetChunkSnapshotResult, (status = ErrorCode_SUCCESS, chunkSnapshot = chunkSnapshot);
|
|
}
|
|
|
|
on eWriteOpBegin do (writeBegin: tWriteOpBegin) {
|
|
var writeWork: tWriteWork;
|
|
var writeOp: tWriteOp;
|
|
|
|
writeWork = writeBegin.writeWork;
|
|
|
|
print format("{0}: write begin: metadata {1}, content {2}", this, metadata, content);
|
|
|
|
if (metadata.chunkState == ChunkState_DIRTY && !writeBegin.fullChunkReplace) {
|
|
send writeBegin.from, eWriteOpFinishResult, (writeWork = writeWork, status = ErrorCode_CHUNK_NOT_CLEAN,
|
|
updateVer = metadata.updateVer, commitVer = metadata.commitVer, chainVer = metadata.chainVer, currentChunkContent = content);
|
|
return;
|
|
}
|
|
|
|
if (writeBegin.key.vChainId.chainVer < metadata.chainVer) {
|
|
send writeBegin.from, eWriteOpFinishResult, (writeWork = writeWork, status = ErrorCode_CHAIN_VERION_MISMATCH,
|
|
updateVer = metadata.updateVer, commitVer = metadata.commitVer, chainVer = metadata.chainVer, currentChunkContent = content);
|
|
return;
|
|
}
|
|
|
|
if (writeBegin.updateVer > 0 || writeBegin.fullChunkReplace) {
|
|
if (writeBegin.fullChunkReplace) {
|
|
// full chunk update during syncing
|
|
assert writeBegin.key.vChainId.chainVer >= metadata.chainVer;
|
|
metadata.commitVer = Max(0, writeBegin.updateVer - 1);
|
|
} else if (writeBegin.updateVer <= metadata.commitVer) {
|
|
send writeBegin.from, eWriteOpFinishResult, (writeWork = writeWork, status = ErrorCode_CHUNK_COMMITTED_UPDATE,
|
|
updateVer = metadata.updateVer, commitVer = metadata.commitVer, chainVer = metadata.chainVer, currentChunkContent = content);
|
|
return;
|
|
} else if (writeBegin.updateVer <= metadata.updateVer) {
|
|
send writeBegin.from, eWriteOpFinishResult, (writeWork = writeWork, status = ErrorCode_CHUNK_STALE_UPDATE,
|
|
updateVer = metadata.updateVer, commitVer = metadata.commitVer, chainVer = metadata.chainVer, currentChunkContent = content);
|
|
return;
|
|
} else if (writeBegin.updateVer == metadata.updateVer + 1) {
|
|
// normal update
|
|
} else {
|
|
// writeBegin.updateVer > metadata.updateVer + 1
|
|
send writeBegin.from, eWriteOpFinishResult, (writeWork = writeWork, status = ErrorCode_CHUNK_MISSING_UPDATE,
|
|
updateVer = metadata.updateVer, commitVer = metadata.commitVer, chainVer = metadata.chainVer, currentChunkContent = content);
|
|
return;
|
|
}
|
|
|
|
metadata.updateVer = writeBegin.updateVer;
|
|
} else {
|
|
assert writeBegin.updateVer == 0;
|
|
metadata.updateVer = metadata.updateVer + 1;
|
|
}
|
|
|
|
metadata.chunkState = ChunkState_DIRTY;
|
|
metadata.chainVer = writeBegin.chainVer;
|
|
metadata.debugInfo.lastWriteReqTag = writeBegin.tag;
|
|
|
|
writeOp = (from = writeBegin.from, writeWork = writeWork, key = writeWork.key,
|
|
offset = writeWork.offset, length = writeWork.length, dataBytes = writeWork.dataBytes,
|
|
fullChunkReplace = writeWork.fullChunkReplace, removeChunk = writeWork.removeChunk);
|
|
goto DoWrite, writeOp;
|
|
}
|
|
}
|
|
|
|
state DoWrite {
|
|
defer eReadOp, eGetChunkSnapshot;
|
|
ignore eCommitOp, eWriteOpBegin;
|
|
|
|
entry (writeOp: tWriteOp) {
|
|
var i: int;
|
|
var writeFinish: tWriteOpFinish;
|
|
|
|
assert metadata.chunkState == ChunkState_DIRTY, format("chunk not in dirty state: {0}", metadata);
|
|
assert writeOp.offset + writeOp.length <= metadata.chunkSize,
|
|
format("write range condition failed: {0}", writeOp);
|
|
|
|
print format("{0}: before write: metadata {1}, content {2}", this, metadata, content);
|
|
|
|
// TODO: compare checksum of the chunk before changing the chunk content to ensure the update is not applied out-of-order
|
|
|
|
if (writeOp.removeChunk) {
|
|
content = default(tBytes);
|
|
} else if (writeOp.fullChunkReplace) {
|
|
assert (writeOp.offset == 0 && writeOp.length == metadata.chunkSize) || (writeOp.offset == 0 && writeOp.length == 0 && writeOp.dataBytes == default(tBytes)),
|
|
format("full chunk replace condition failed: {0}", writeOp);
|
|
content = writeOp.dataBytes;
|
|
} else {
|
|
assert writeOp.offset + writeOp.length <= sizeof(content),
|
|
format("{0}: writeOp.offset:{1} + writeOp.length:{2} < sizeof(content):{3}", this, writeOp.offset, writeOp.length, sizeof(content));
|
|
i = 0;
|
|
while (i < writeOp.length) {
|
|
content[writeOp.offset + i] = writeOp.dataBytes[i];
|
|
i = i + 1;
|
|
}
|
|
}
|
|
|
|
print format("{0}: after write: metadata {1}, content {2}", this, metadata, content);
|
|
|
|
writeFinish = (from = writeOp.from, writeWork = writeOp.writeWork, key = writeOp.writeWork.key);
|
|
goto FinishWrite, writeFinish;
|
|
}
|
|
|
|
on eShutDown goto Offline with (from: machine) {
|
|
print format("{0} with chunk id {1} is going to offline", this, metadata.chunkId);
|
|
}
|
|
}
|
|
|
|
state FinishWrite {
|
|
defer eReadOp, eGetChunkSnapshot;
|
|
ignore eCommitOp, eWriteOpBegin;
|
|
|
|
entry (writeFinish: tWriteOpFinish) {
|
|
assert metadata.chunkState == ChunkState_DIRTY;
|
|
print format("{0}: write finish: metadata {1}, content {2}", this, metadata, content);
|
|
|
|
metadata.chunkState = ChunkState_CLEAN;
|
|
|
|
send writeFinish.from, eWriteOpFinishResult, (writeWork = writeFinish.writeWork, status = ErrorCode_SUCCESS,
|
|
updateVer = metadata.updateVer, commitVer = metadata.commitVer, chainVer = metadata.chainVer, currentChunkContent = content);
|
|
goto WaitForOps;
|
|
}
|
|
|
|
on eShutDown goto Offline with (from: machine) {
|
|
print format("{0} with chunk id {1} is going to offline", this, metadata.chunkId);
|
|
}
|
|
}
|
|
|
|
state Offline {
|
|
ignore eReadOp, eGetChunkSnapshot, eCommitOp, eWriteOpBegin, eShutDown;
|
|
|
|
entry {
|
|
print format("{0} with chunk id {1} is offline", this, metadata.chunkId);
|
|
}
|
|
|
|
on eRestart goto WaitForOps;
|
|
}
|
|
|
|
state RemoveChunkCommitted {
|
|
ignore eShutDown, eRestart;
|
|
|
|
on eCommitOp do (commitOp: tCommitOp) {
|
|
send commitOp.from, eCommitOpResult, (commitWork = commitOp.commitWork, status = ErrorCode_CHUNK_NOT_FOUND,
|
|
updateVer = metadata.updateVer, commitVer = commitOp.commitVer, chainVer = commitOp.key.vChainId.chainVer, removeChunk = true);
|
|
}
|
|
|
|
on eReadOp do (readOp: tReadOp) {
|
|
send readOp.from, eReadOpResult, (readWork = readOp.readWork, status = ErrorCode_CHUNK_NOT_FOUND,
|
|
chunkMetadata = metadata, dataBytes = default(tBytes));
|
|
}
|
|
|
|
on eGetChunkSnapshot do (getSnapshot: tGetChunkSnapshot) {
|
|
var chunkSnapshot: tChunkSnapshot;
|
|
chunkSnapshot = (metadata = default(tChunkMetadata), content = default(tBytes));
|
|
send getSnapshot.from, eGetChunkSnapshotResult, (status = ErrorCode_CHUNK_NOT_FOUND, chunkSnapshot = chunkSnapshot);
|
|
}
|
|
|
|
on eWriteOpBegin do (writeBegin: tWriteOpBegin) {
|
|
send writeBegin.from, eWriteOpFinishResult, (writeWork = writeBegin.writeWork, status = ErrorCode_CHUNK_NOT_FOUND,
|
|
updateVer = metadata.updateVer, commitVer = metadata.commitVer, chainVer = metadata.chainVer, currentChunkContent = default(tBytes));
|
|
}
|
|
}
|
|
}
|