Initial commit

This commit is contained in:
dev
2025-02-27 21:53:53 +08:00
commit 815e55e4c0
1291 changed files with 185445 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
<ApplicationIcon />
<OutputType>Exe</OutputType>
<StartupObject />
<LangVersion>latest</LangVersion>
<OutputPath>POutput/</OutputPath>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Coyote" Version="1.0.5"/>
<PackageReference Include="PCSharpRuntime" Version="1.1.15"/>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,12 @@
<!-- P project file for data storage -->
<Project>
<ProjectName>DataStorage</ProjectName>
<InputFiles>
<PFile>./PSrc/</PFile>
<PFile>./PSpec/</PFile>
<PFile>./PTst/</PFile>
</InputFiles>
<OutputDir>./PGenerated/</OutputDir>
<!-- Add external dependencies -->
<IncludeProject>../Timer/Timer.pproj</IncludeProject>
</Project>

View File

@@ -0,0 +1,492 @@
spec WriteComplete observes eWriteReq, eWriteResp {
var completedWriteReqTags: map[tMessageTag, bool];
var numPendingWriteReqs: int;
fun OnWriteReq(writeReq: tWriteReq) {
if (writeReq.fromClient && !(writeReq.tag in completedWriteReqTags)) {
completedWriteReqTags += (writeReq.tag, false);
numPendingWriteReqs = numPendingWriteReqs + 1;
if (numPendingWriteReqs > 0) {
goto PendingWrites;
}
}
}
fun OnWriteResp(writeResp: tWriteResp) {
assert writeResp.tag in completedWriteReqTags;
if (writeResp.status != ErrorCode_SUCCESS) {
return;
}
if (!completedWriteReqTags[writeResp.tag]) {
completedWriteReqTags[writeResp.tag] = true;
numPendingWriteReqs = numPendingWriteReqs - 1;
if (numPendingWriteReqs == 0) {
goto NoPendingWrites;
}
}
}
start cold state NoPendingWrites {
entry {
print format("numPendingWriteReqs: {0}, completedWriteReqTags: {1}", numPendingWriteReqs, completedWriteReqTags);
assert numPendingWriteReqs == 0, format("{0} pending writes not equal to zero", numPendingWriteReqs);
}
on eWriteReq do OnWriteReq;
on eWriteResp do OnWriteResp;
}
hot state PendingWrites {
entry {
print format("numPendingWriteReqs: {0}, completedWriteReqTags: {1}", numPendingWriteReqs, completedWriteReqTags);
}
on eWriteReq do OnWriteReq;
on eWriteResp do OnWriteResp;
}
}
event eSystemConfig: (config: tSystemConfig);
event eStorageSystem: (system: tStorageSystem);
spec AllWriteItersProcessed observes eWriteWork, eWriteReq, eWriteResp, eSystemConfig, eStorageSystem {
var config: tSystemConfig;
var mgmtService: MgmtService;
var storageServices: tStorageServiceMap;
var seenWriteRequestTags: map[tNodeId, set[tMessageTag]];
var seenWriteResponseTags: map[tNodeId, set[tMessageTag]];
var seenWriteProcs: map[tMessageTag, map[tTargetId, map[tChunkVer, set[machine]]]];
var clientDone: set[tNodeId];
start state Init {
on eSystemConfig goto SendingWriteReqs with (args: (config: tSystemConfig)) {
config = args.config;
}
on eStorageSystem do (args: (system: tStorageSystem)) {
mgmtService = args.system.mgmt;
storageServices = args.system.storages;
}
}
hot state SendingWriteReqs {
entry {
var tag: tMessageTag;
foreach (tag in keys(seenWriteProcs)) {
print format("write request tag: {0}, seenWriteProcs: {1}", tag, seenWriteProcs[tag]);
}
print format("seenWriteRequestTags: {0}", seenWriteRequestTags);
print format("seenWriteResponseTags: {0}", seenWriteResponseTags);
}
on eWriteWork goto SendingWriteReqs with (writeWork: tWriteWork) {
if (!(writeWork.tag in seenWriteProcs)) {
seenWriteProcs += (writeWork.tag, default(map[tTargetId, map[tChunkVer, set[machine]]]));
}
if (!(writeWork.targetId in seenWriteProcs[writeWork.tag])) {
seenWriteProcs[writeWork.tag] += (writeWork.targetId, default(map[tChunkVer, set[machine]]));
}
if (!(writeWork.updateVer in seenWriteProcs[writeWork.tag][writeWork.targetId])) {
seenWriteProcs[writeWork.tag][writeWork.targetId] += (writeWork.updateVer, default(set[machine]));
}
seenWriteProcs[writeWork.tag][writeWork.targetId][writeWork.updateVer] += (writeWork.from);
}
on eStorageSystem do (args: (system: tStorageSystem)) {
mgmtService = args.system.mgmt;
storageServices = args.system.storages;
}
on eWriteReq do (writeReq: tWriteReq) {
if (!(writeReq.tag.nodeId in seenWriteRequestTags))
seenWriteRequestTags += (writeReq.tag.nodeId, default(set[tMessageTag]));
seenWriteRequestTags[writeReq.tag.nodeId] += (writeReq.tag);
}
on eWriteResp do (writeResp: tWriteResp) {
if (writeResp.status != ErrorCode_SUCCESS) {
return;
}
assert writeResp.tag in seenWriteRequestTags[writeResp.tag.nodeId];
if (!(writeResp.tag.nodeId in seenWriteResponseTags))
seenWriteResponseTags += (writeResp.tag.nodeId, default(set[tMessageTag]));
seenWriteResponseTags[writeResp.tag.nodeId] += (writeResp.tag);
if (sizeof(seenWriteResponseTags[writeResp.tag.nodeId]) == config.numIters) {
clientDone += (writeResp.tag.nodeId);
if (sizeof(clientDone) == config.numClients)
goto Done;
}
}
}
cold state Done {
ignore eWriteWork, eWriteReq, eWriteResp;
entry {
print format("all iterations processed {0}", clientDone);
}
}
}
spec MonotoneIncreasingVersionNumber observes eWriteOpFinishResult, eCommitOpResult {
var chunkReplicaCommits: map[(tChunkId, tTargetId), map[tChunkVer, tCommitWork]];
start state WaitForResponses {
on eWriteOpFinishResult do (writeFinishRes: tWriteOpFinishResult) {
var writeWork: tWriteWork;
var chunkIdOnTarget: (tChunkId, tTargetId);
if (writeFinishRes.status != ErrorCode_SUCCESS) {
return;
}
writeWork = writeFinishRes.writeWork;
chunkIdOnTarget = (writeWork.key.chunkId, writeWork.targetId);
if (writeWork.fullChunkReplace) {
chunkReplicaCommits -= (chunkIdOnTarget);
}
}
on eCommitOpResult do (commitOpResult: tCommitOpResult) {
var commitWork: tCommitWork;
var commitMsg: tCommitMsg;
var chunkVer: tChunkVer;
var commitVer: tChunkVer;
var chunkId: tChunkId;
var chunkIdOnTarget: (tChunkId, tTargetId);
if (commitOpResult.status != ErrorCode_SUCCESS) {
return;
}
commitWork = commitOpResult.commitWork;
commitMsg = commitWork.commitMsg;
commitVer = commitOpResult.commitVer;
chunkId = commitWork.key.chunkId;
chunkIdOnTarget = (chunkId, commitWork.targetId);
if (!(chunkIdOnTarget in chunkReplicaCommits)) {
chunkReplicaCommits += (chunkIdOnTarget, default(map[tChunkVer, tCommitWork]));
}
if (commitOpResult.removeChunk) {
print format("remove request {0} committed, clear chunkReplicaCommits[{1}]: {2}",
commitMsg.tag, chunkIdOnTarget, chunkReplicaCommits[chunkIdOnTarget]);
chunkReplicaCommits -= (chunkIdOnTarget);
return;
}
if (!(commitVer in chunkReplicaCommits[chunkIdOnTarget])) {
// all existing commits should have smaller version
foreach (chunkVer in keys(chunkReplicaCommits[chunkIdOnTarget])) {
assert commitVer > chunkVer,
format ("current commit version {0} <= previous version {1} found in chunkReplicaCommits[chunkId:{2}]: {3}, commit result: {4}",
commitVer, chunkVer, chunkIdOnTarget, chunkReplicaCommits[chunkIdOnTarget], commitOpResult);
}
chunkReplicaCommits[chunkIdOnTarget] += (commitVer, commitWork);
}
}
}
}
// DONE: check chunk content after each update
spec AllReplicasOnChainUpdated observes eReadWorkDone, eWriteWorkDone, eCommitWorkDone, eNewRoutingInfo {
var seenRoutingVers: set[tRoutingVer];
var replicaChains: map[tVersionedChainId, tReplicaChain];
var chunkVersionOnTarget: map[tChunkId, map[tTargetId, tChunkVer]];
var chunkContentOnTarget: map[tChunkId, map[tChunkVer, map[tTargetId, tBytes]]];
var updatesOfChunkReplica: map[tChunkId, map[tChunkVer, map[tTargetId, seq[tWriteWorkDone]]]];
fun updateChunkVersionOnTarget(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer) {
if (!(chunkId in chunkVersionOnTarget)) {
chunkVersionOnTarget += (chunkId, default(map[tTargetId, tChunkVer]));
}
if (!(targetId in chunkVersionOnTarget[chunkId])) {
chunkVersionOnTarget[chunkId] += (targetId, 0);
}
if (chunkVersionOnTarget[chunkId][targetId] < updateVer) {
chunkVersionOnTarget[chunkId][targetId] = updateVer;
}
}
fun updateChunkContentOnTarget(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer, chunkContent: tBytes) {
if (!(chunkId in chunkContentOnTarget)) {
chunkContentOnTarget += (chunkId, default(map[tChunkVer, map[tTargetId, tBytes]]));
}
if (!(updateVer in chunkContentOnTarget[chunkId])) {
chunkContentOnTarget[chunkId] += (updateVer, default(map[tTargetId, tBytes]));
}
if (targetId in chunkContentOnTarget[chunkId][updateVer]) {
if (chunkContentOnTarget[chunkId][updateVer][targetId] != chunkContent) {
print format("find different chunk content {0} than chunkContentOnTarget[chunkId:{1}][updateVer:{2}] {3}",
chunkContent, chunkId, updateVer, chunkContentOnTarget[chunkId][updateVer]);
}
}
chunkContentOnTarget[chunkId][updateVer][targetId] = chunkContent;
}
fun addUpdateOfChunkReplica(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer, writeWorkDone: tWriteWorkDone) {
if (!(chunkId in updatesOfChunkReplica)) {
updatesOfChunkReplica += (chunkId, default(map[tChunkVer, map[tTargetId, seq[tWriteWorkDone]]]));
}
if (!(updateVer in updatesOfChunkReplica[chunkId])) {
updatesOfChunkReplica[chunkId] += (updateVer, default(map[tTargetId, seq[tWriteWorkDone]]));
}
if (!(targetId in updatesOfChunkReplica[chunkId][updateVer])) {
updatesOfChunkReplica[chunkId][updateVer] += (targetId, default(seq[tWriteWorkDone]));
}
updatesOfChunkReplica[chunkId][updateVer][targetId] += (sizeof(updatesOfChunkReplica[chunkId][updateVer][targetId]), writeWorkDone);
}
start state WaitForUpdates {
on eReadWorkDone do (readWorkDone: tReadWorkDone) {
var chunkId: tChunkId;
var targetId: tTargetId;
var updateVer: tChunkVer;
chunkId = readWorkDone.chunkMetadata.chunkId;
targetId = readWorkDone.targetId;
updateVer = readWorkDone.chunkMetadata.updateVer;
if (readWorkDone.status == ErrorCode_SUCCESS && sizeof(readWorkDone.dataBytes) == readWorkDone.chunkMetadata.chunkSize) {
// updateChunkVersionOnTarget(chunkId, targetId, updateVer);
// updateChunkContentOnTarget(chunkId, targetId, updateVer, readWorkDone.dataBytes);
}
}
on eWriteWorkDone do (writeWorkDone: tWriteWorkDone) {
var chunkId: tChunkId;
var targetId: tTargetId;
var updateVer: tChunkVer;
if (writeWorkDone.status != ErrorCode_SUCCESS &&
writeWorkDone.status != ErrorCode_CHUNK_COMMITTED_UPDATE &&
writeWorkDone.status != ErrorCode_CHUNK_STALE_UPDATE)
return;
chunkId = writeWorkDone.key.chunkId;
targetId = writeWorkDone.targetId;
updateVer = writeWorkDone.updateVer;
if (writeWorkDone.status == ErrorCode_SUCCESS) {
updateChunkVersionOnTarget(chunkId, targetId, updateVer);
updateChunkContentOnTarget(chunkId, targetId, updateVer, writeWorkDone.currentChunkContent);
addUpdateOfChunkReplica(chunkId, targetId, updateVer, writeWorkDone);
}
}
on eCommitWorkDone do (commitWorkDone: tCommitWorkDone) {
var chunkId: tChunkId;
var targetId: tTargetId;
var targetIdx: tTargetId;
var commitVer: tChunkVer;
var updateVer: tChunkVer;
var chunkVer: tChunkVer;
var chunkContent: tBytes;
var replicaChain: tReplicaChain;
var commitMsg: tCommitMsg;
var writeWorkDone: tWriteWorkDone;
var writeWorkIdx: int;
if (commitWorkDone.status != ErrorCode_SUCCESS &&
commitWorkDone.status != ErrorCode_CHUNK_STALE_COMMIT)
return;
targetId = commitWorkDone.targetId;
chunkId = commitWorkDone.key.chunkId;
commitVer = commitWorkDone.commitVer;
replicaChain = replicaChains[commitWorkDone.key.vChainId];
commitMsg = commitWorkDone.commitMsg;
// this is a special commit to remove an old chunk from a returning target
if (commitWorkDone.removeChunk && commitWorkDone.commitVer == 0)
return;
if (chunkId in updatesOfChunkReplica) {
// print all write works on the chunk
foreach (chunkVer in keys(updatesOfChunkReplica[chunkId])) {
foreach (targetIdx in keys(updatesOfChunkReplica[chunkId][chunkVer])) {
writeWorkIdx = 0;
while (writeWorkIdx < sizeof(updatesOfChunkReplica[chunkId][chunkVer][targetIdx])) {
writeWorkDone = updatesOfChunkReplica[chunkId][chunkVer][targetIdx][writeWorkIdx];
print format("updatesOfChunkReplica[chunkId:{0}][updateVer:{1}][targetIdx:{2}][#{3}][chainVer:{4}][remove:{5}][commit:{6}]: {7}",
chunkId, chunkVer, targetIdx, writeWorkIdx, writeWorkDone.chainVer, writeWorkDone.currentChunkContent == default(tBytes), chunkVer <= commitVer, writeWorkDone);
writeWorkIdx = writeWorkIdx + 1;
}
}
}
}
if (chunkId in chunkVersionOnTarget) {
// print all versions of the chunk
print format("chunkVersionOnTarget[chunkId:{0}]: {1}", chunkId, chunkVersionOnTarget[chunkId]);
}
if (chunkId in chunkContentOnTarget) {
// print all contents of the chunk
print format("chunkContentOnTarget[chunkId:{0}]: {1}", chunkId, chunkContentOnTarget[chunkId]);
}
if (commitWorkDone.removeChunk) {
if (chunkId in chunkVersionOnTarget && targetId in chunkVersionOnTarget[chunkId]) {
print format("remove versions of chunk {0}: {1}", chunkId, chunkVersionOnTarget[chunkId]);
chunkVersionOnTarget[chunkId] -= (targetId);
}
if (chunkId in chunkContentOnTarget) {
print format("remove contents of chunk {0}: {1}", chunkId, chunkContentOnTarget[chunkId]);
foreach (chunkVer in keys(chunkContentOnTarget[chunkId])) {
chunkContentOnTarget[chunkId][chunkVer] -= (targetId);
}
}
return;
}
// foreach (targetId in replicaChain.targets) {
// if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
// assert chunkId in chunkVersionOnTarget && targetId in chunkVersionOnTarget[chunkId] && chunkVersionOnTarget[chunkId][targetId] >= commitVer,
// format("missing update, tag:{0}, chunkId:{1}, targetId:{2}, commitVer:{3}, chunkVersionOnTarget: {4}, replica chain: {5}",
// commitMsg.tag, chunkId, targetId, commitVer, chunkVersionOnTarget[chunkId], replicaChain);
// }
// }
// foreach (targetId in replicaChain.targets) {
// if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
// if (sizeof(chunkContent) == 0) {
// assert chunkId in chunkContentOnTarget && commitVer in chunkContentOnTarget[chunkId] && targetId in chunkContentOnTarget[chunkId][commitVer],
// format("missing chunk content, chunkId:{0}, commitVer:{1}, targetId:{2}, chunkContentOnTarget: {3}",
// chunkId, commitVer, targetId, chunkContentOnTarget);
// chunkContent = chunkContentOnTarget[chunkId][commitVer][targetId];
// } else {
// assert chunkContentOnTarget[chunkId][commitVer][targetId] == chunkContent,
// format("inconsistent replica, chunkContentOnTarget[chunkId:{0}][commitVer:{1}] {2}",
// chunkId, commitVer, chunkContentOnTarget[chunkId][commitVer]);
// }
// }
// }
}
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
var replicaChain: tReplicaChain;
if (routingInfo.routingVer in seenRoutingVers) {
return;
} else {
seenRoutingVers += (routingInfo.routingVer);
}
foreach (replicaChain in values(routingInfo.replicaChains)) {
replicaChains[replicaChain.vChainId] = replicaChain;
}
}
}
}
event eStopMonitorTargetStates;
spec AllReplicasInServingState observes eNewRoutingInfo, eSyncStartReq, eSyncDoneResp, eStopMonitorTargetStates {
var knownReplicaChains: tReplicaChainMap;
var unavailableTargets: map[tTargetId, tPublicTargetState];
var syncWorkers: map[tTargetId, set[machine]];
fun checkForUnavailableTargets(routingInfo: tRoutingInfo) {
var targetId: tTargetId;
var replicaChain: tReplicaChain;
unavailableTargets = default(map[tTargetId, tPublicTargetState]);
foreach (replicaChain in values(routingInfo.replicaChains)) {
if (!(replicaChain.vChainId.chainId in knownReplicaChains) ||
replicaChain.vChainId.chainVer > knownReplicaChains[replicaChain.vChainId.chainId].vChainId.chainVer)
{
foreach (targetId in replicaChain.targets) {
if (replicaChain.states[targetId] != PublicTargetState_SERVING &&
replicaChain.states[targetId] != PublicTargetState_LASTSRV)
{
unavailableTargets[targetId] = replicaChain.states[targetId];
}
}
knownReplicaChains[replicaChain.vChainId.chainId] = replicaChain;
print format("added a new chain: {0}, unavailableTargets: {1}", replicaChain, unavailableTargets);
}
}
if (sizeof(unavailableTargets) > 0) {
goto SomeTargetsUnavailable;
} else {
goto AllTargetsAvailable;
}
}
fun onSyncDone(syncDoneResp: tSyncDoneResp) {
// assert syncDoneResp.targetId in unavailableTargets,
// format("sync target {0} not found in unavailableTargets: {1}", syncDoneResp, unavailableTargets);
}
fun onSyncStart(syncStartReq: tSyncStartReq) {
if (!(syncStartReq.targetId in syncWorkers)) {
syncWorkers += (syncStartReq.targetId, default(set[machine]));
}
syncWorkers[syncStartReq.targetId] += (syncStartReq.from);
}
start cold state AllTargetsAvailable {
on eNewRoutingInfo do checkForUnavailableTargets;
on eSyncDoneResp do onSyncDone;
on eSyncStartReq do onSyncStart;
on eStopMonitorTargetStates goto Done;
}
hot state SomeTargetsUnavailable {
entry {
var replicaChain: tReplicaChain;
print format("unavailable targets: {0}, sync workers: {1}", unavailableTargets, syncWorkers);
// foreach (replicaChain in values(knownReplicaChains)) {
// print format("known replica chain: {0}", replicaChain);
// }
}
on eNewRoutingInfo do checkForUnavailableTargets;
on eSyncDoneResp do onSyncDone;
on eSyncStartReq do onSyncStart;
on eStopMonitorTargetStates goto Done;
}
cold state Done {
ignore eNewRoutingInfo, eSyncStartReq, eSyncDoneResp;
}
}

View File

@@ -0,0 +1,30 @@
fun Min(x: int, y: int): int{
if (x < y)
return x;
else
return y;
}
fun Max(x: int, y: int): int{
if (x > y)
return x;
else
return y;
}
fun BitwiseAnd(x: int, y: int): int {
var n: int;
var r: int;
n = 1;
while (x > 0 && y > 0) {
if (x % 2 > 0 && y % 2 > 0) {
r = r + n;
}
x = x / 2;
y = y / 2;
n = n * 2;
}
return r;
}

View File

@@ -0,0 +1,75 @@
type tHeartbeatConns = (mgmtClient: MgmtClient, mgmtService: MgmtService);
event eSendHeartbeat: tHeartbeatConns;
event eNewRoutingInfo: tRoutingInfo;
machine MgmtClient {
var nodeId: tNodeId;
var clientHost: machine;
var mgmtService: MgmtService;
var sendHeartbeats: bool;
var timer: Timer;
var nextRequestId: tRequestId;
var routingInfo: tRoutingInfo;
fun newMessageTag(): tMessageTag {
nextRequestId = nextRequestId + 1;
return (nodeId = nodeId, requestId = nextRequestId);
}
start state Init {
entry (args: (nodeId: tNodeId, clientHost: machine, mgmtService: MgmtService, sendHeartbeats: bool)) {
print format("{0} init: {1}", this, args);
nodeId = args.nodeId;
clientHost = args.clientHost;
mgmtService = args.mgmtService;
sendHeartbeats = args.sendHeartbeats;
timer = CreateTimer(this);
goto SendHeartbeats;
}
}
state SendHeartbeats {
entry {
if (sendHeartbeats) {
print format("{0} of {1} sends heartbeat to {2}", this, clientHost, mgmtService);
send clientHost, eSendHeartbeat, (mgmtClient = this, mgmtService = mgmtService);
}
send mgmtService, eGetRoutingInfoReq, (from = this, tag = newMessageTag(), routingVer = routingInfo.routingVer);
StartTimer(timer);
}
on eTimeOut goto SendHeartbeats;
on eShutDown goto Offline with (from: machine) {
print format("{0} of node {1} is going to shutdown", this, nodeId);
CancelTimer(timer);
}
on eGetRoutingInfoResp do (getRoutingInfoResp: tGetRoutingInfoResp) {
var latestRoutingInfo: tRoutingInfo;
latestRoutingInfo = getRoutingInfoResp.routingInfo;
if (getRoutingInfoResp.status == ErrorCode_SUCCESS &&
routingInfo.routingVer < latestRoutingInfo.routingVer)
{
print format("{0}: routing info version {1} is greater than: {2}", this, latestRoutingInfo.routingVer, routingInfo.routingVer);
routingInfo = latestRoutingInfo;
send clientHost, eNewRoutingInfo, routingInfo;
}
}
}
state Offline {
ignore eTimeOut, eShutDown, eGetRoutingInfoResp;
entry {
print format("{0} #{1} is offline, client host: {2}", this, nodeId, clientHost);
routingInfo = default(tRoutingInfo);
}
on eRestart goto SendHeartbeats with (from: machine) {
print format("{0} #{1} is restarted by {2}", this, nodeId, from);
}
}
}

View File

@@ -0,0 +1,623 @@
enum tPublicTargetState {
PublicTargetState_INVALID = 0, // invalid state
PublicTargetState_SERVING = 1, // online and serving client requests
PublicTargetState_LASTSRV = 2, // offline but it was the last serving target
PublicTargetState_SYNCING = 4, // online and syncing updates
PublicTargetState_WAITING = 8, // online and waiting to join the chain
PublicTargetState_OFFLINE = 16 // crashed or stopped
}
fun IsActiveTargetState(targetState: tPublicTargetState): bool {
return targetState == PublicTargetState_SERVING || targetState == PublicTargetState_SYNCING;
}
fun AllPublicTargetStates(): map[tPublicTargetState, string] {
var states: map[tPublicTargetState, string];
states += (PublicTargetState_SERVING, "SERVING");
states += (PublicTargetState_LASTSRV, "LASTSRV");
states += (PublicTargetState_SYNCING, "SYNCING");
states += (PublicTargetState_WAITING, "WAITING");
states += (PublicTargetState_OFFLINE, "OFFLINE");
return states;
}
fun PublicTargetStateToString(x: int): string {
var states: map[tPublicTargetState, string];
var s: tPublicTargetState;
var y: int;
var str: string;
states = AllPublicTargetStates();
y = x;
while (y > 0) {
if (str != "") {
str = str + "+";
};
foreach (s in keys(states)) {
if (BitwiseAnd(y, (s to int)) == (s to int)) {
str = str + states[s];
y = y - (s to int);
break;
}
}
}
return str + format("({0})", x);
}
fun PublicTargetStatesToString(targetStates: map[tTargetId, tPublicTargetState]): string {
var targetId: tTargetId;
var str: string;
foreach (targetId in keys(targetStates)) {
if (str != "") str = str + ", ";
str = str + format("<{0}->{1}>", targetId, PublicTargetStateToString(targetStates[targetId] to int));
}
return str;
}
type tLocalTargetMap = map[tTargetId, StorageTarget];
type tGlobalTargetMap = map[tNodeId, tLocalTargetMap];
type tReplicaChainMap = map[tChainId, tReplicaChain];
type tStorageClientMap = map[tNodeId, StorageClient];
type tStorageServiceMap = map[tNodeId, StorageService];
type tRoutingVer = int;
type tRoutingInfo = (routingVer: tRoutingVer, replicaChains: tReplicaChainMap, storageServices: tStorageServiceMap, offlineServices: set[tNodeId]);
type tGetRoutingInfoReq = (from: machine, tag: tMessageTag, routingVer: tRoutingVer);
type tGetRoutingInfoResp = (tag: tMessageTag, status: tErrorCode, routingInfo: tRoutingInfo);
event eGetRoutingInfoReq : tGetRoutingInfoReq;
event eGetRoutingInfoResp : tGetRoutingInfoResp;
type tUpdateTargetStateMsg = (from: machine, tag: tMessageTag, routingVer: tRoutingVer, nodeId: tNodeId, targetStates: tLocalTargetStateMap, localTargets: tLocalTargetMap, storageService: StorageService);
event eUpdateTargetStateMsg : tUpdateTargetStateMsg;
type tRegisterClientMsg = (from: machine, nodeId: tNodeId, storageClient: StorageClient);
event eRegisterClientMsg : tRegisterClientMsg;
event eStopFindNewFailures : int;
event eStartNextHeartbeatRound;
// DONE: remove failed storage targets from replication chains
// DONE: re-send pending write requests to successor
// DONE: let failed targets resync and return
// TODO: allow targets moved from one node to another
// TODO: leader election among multiple mgmt services
// TODO: create C++ interfaces from the spec
machine MgmtService {
var nodeId: tNodeId;
var nextRequestId: tRequestId;
var routingVer: tRoutingVer;
var numStorageServices: int;
// var mgmtClients: set[machine];
var fullReplicaChains: tReplicaChainMap;
// var knownStorageClients: tStorageClientMap;
var knownStorageServices: tStorageServiceMap;
var nodeTargetStates: map[tNodeId, tLocalTargetStateMap];
var storageTargets: map[tTargetId, StorageTarget]; // for debug only
var delayedRoutingReqs: map[(machine, tRoutingVer), tGetRoutingInfoReq];
// num of ping attempts made
var numAttempts: int;
var maxAttempts: int;
var stopFindNewFailures: int;
// set of offline storage services
var offlineStorageServices: set[tNodeId];
// nodes that have responded in the current round
var aliveStorageServices: set[tNodeId];
// timer to wait for responses from nodes
var timer: Timer;
fun newMessageTag(): tMessageTag {
nextRequestId = nextRequestId + 1;
return (nodeId = nodeId, requestId = nextRequestId);
}
// fun registerClient(registerClientMsg: tRegisterClientMsg) {
// var nodeId: tNodeId;
// var storageClient: StorageClient;
// nodeId = registerClientMsg.nodeId;
// storageClient = registerClientMsg.storageClient;
// assert !(nodeId in knownStorageClients && knownStorageClients[nodeId] != storageClient);
// knownStorageClients[nodeId] = storageClient;
// mgmtClients += (registerClientMsg.from);
// print format("added client {0}", nodeId);
// }
fun updateLocalTargetState(nodeId: tNodeId, localTargetStates: tLocalTargetStateMap, localTargets: tLocalTargetMap) {
var targetId: tTargetId;
if (!(nodeId in nodeTargetStates)) {
nodeTargetStates += (nodeId, default(tLocalTargetStateMap));
}
foreach (targetId in keys(nodeTargetStates[nodeId])) {
if (!(targetId in localTargetStates)) {
nodeTargetStates[nodeId] -= (targetId);
}
}
foreach (targetId in keys(localTargetStates)) {
nodeTargetStates[nodeId][targetId] = localTargetStates[targetId];
storageTargets[targetId] = localTargets[targetId];
}
}
fun setLocalTargetState(nodeId: tNodeId, targetState: tLocalTargetState) {
var targetId: tTargetId;
if (!(nodeId in nodeTargetStates)) {
nodeTargetStates += (nodeId, default(tLocalTargetStateMap));
}
foreach (targetId in keys(nodeTargetStates[nodeId])) {
nodeTargetStates[nodeId][targetId] = targetState;
}
}
fun processUpdateTargetStateMsg(updateTargetStateMsg: tUpdateTargetStateMsg) {
// mgmtClients += (updateTargetStateMsg.from);
updateLocalTargetState(updateTargetStateMsg.nodeId, updateTargetStateMsg.targetStates, updateTargetStateMsg.localTargets);
}
fun appendTargetToChain(replicaChain: tReplicaChain, targetId: tTargetId, nodeId: tNodeId, targetState: tPublicTargetState): tReplicaChain {
if (targetId in replicaChain.targets)
return replicaChain;
replicaChain.targets += (sizeof(replicaChain.targets), targetId);
replicaChain.states += (targetId, targetState);
replicaChain.nodes += (targetId, nodeId);
replicaChain.services += (targetId, knownStorageServices[nodeId]);
return replicaChain;
}
fun extendChain(chain: tReplicaChain, other: tReplicaChain): tReplicaChain {
var targetId: tTargetId;
foreach (targetId in other.targets) {
if (targetId in chain.targets) continue;
chain = appendTargetToChain(chain, targetId, other.nodes[targetId], other.states[targetId]);
}
return chain;
}
fun updatePublicTargetState(
replicaChain: tReplicaChain,
chainId: tChainId,
targetId: tTargetId,
expectedLocalState: tLocalTargetState,
fromPublicState: int,
toPublicState: tPublicTargetState): tReplicaChain
{
var nodeId: tNodeId;
var currentLocalState: tLocalTargetState;
var currentPublicState: tPublicTargetState;
nodeId = fullReplicaChains[chainId].nodes[targetId];
currentLocalState = nodeTargetStates[nodeId][targetId];
currentPublicState = fullReplicaChains[chainId].states[targetId];
if (currentLocalState == expectedLocalState) {
if (BitwiseAnd(currentPublicState to int, fromPublicState to int) > 0) {
replicaChain = appendTargetToChain(replicaChain, targetId, nodeId, toPublicState);
if (fullReplicaChains[chainId].states[targetId] != toPublicState) {
replicaChain.vChainId.chainVer = replicaChain.vChainId.chainVer + 1;
routingVer = routingVer + 1;
print format("chain {0}, {1} #{2}: public state updated {3} ==> {4}, local state: {5}, routing version: {6}",
replicaChain.vChainId, storageTargets[targetId], targetId,
PublicTargetStateToString(fromPublicState to int),
PublicTargetStateToString(toPublicState to int),
LocalTargetStateToString(currentLocalState),
routingVer);
} else {
print format("chain {0}, {1} #{2}: public state untouched, from state {3}, to state {4}, local state: {5}",
chainId, storageTargets[targetId], targetId,
PublicTargetStateToString(fromPublicState to int),
PublicTargetStateToString(toPublicState to int),
LocalTargetStateToString(expectedLocalState));
}
}
}
return replicaChain;
}
/* transitions of public target states
<up-to-date>
serving syncing waiting lastsrv offline
serving y
syncing y
waiting y
lastsrv y
offline y
<online>
serving syncing waiting lastsrv offline
serving y
syncing c c
waiting c c
lastsrv y
offline y
<offline>
serving syncing waiting lastsrv offline
serving c c
syncing y
waiting y
lastsrv y
offline y
*/
fun updateOneReplicaChain(chainId: tChainId): tReplicaChain {
var states: map[tPublicTargetState, string];
var targetsGroupbyState: map[tPublicTargetState, tReplicaChain];
var updatedReplicaChain: tReplicaChain;
var targetId: tTargetId;
var targetState: tPublicTargetState;
states = AllPublicTargetStates();
foreach (targetState in keys(states)) {
targetsGroupbyState[targetState] = default(tReplicaChain);
targetsGroupbyState[targetState].vChainId.chainId = chainId;
}
// state transitions to serving
foreach (targetId in fullReplicaChains[chainId].targets) {
targetsGroupbyState[PublicTargetState_SERVING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_SERVING],
chainId, targetId,
LocalTargetState_UPTODATE,
(PublicTargetState_SERVING to int) + (PublicTargetState_SYNCING to int) + (PublicTargetState_LASTSRV to int),
PublicTargetState_SERVING);
}
foreach (targetId in fullReplicaChains[chainId].targets) {
targetsGroupbyState[PublicTargetState_SERVING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_SERVING],
chainId, targetId,
LocalTargetState_ONLINE,
(PublicTargetState_SERVING to int) + (PublicTargetState_LASTSRV to int),
PublicTargetState_SERVING);
}
// state transitions to lastsrv
foreach (targetId in fullReplicaChains[chainId].targets) {
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) == 0 &&
sizeof(targetsGroupbyState[PublicTargetState_LASTSRV].targets) == 0) {
targetsGroupbyState[PublicTargetState_LASTSRV] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_LASTSRV],
chainId, targetId,
LocalTargetState_OFFLINE,
PublicTargetState_SERVING to int,
PublicTargetState_LASTSRV);
}
targetsGroupbyState[PublicTargetState_LASTSRV] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_LASTSRV],
chainId, targetId,
LocalTargetState_OFFLINE,
PublicTargetState_LASTSRV to int,
PublicTargetState_LASTSRV);
}
// state transitions to syncing
foreach (targetId in fullReplicaChains[chainId].targets) {
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) > 0) {
targetsGroupbyState[PublicTargetState_SYNCING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_SYNCING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_SYNCING to int,
PublicTargetState_SYNCING);
}
}
foreach (targetId in fullReplicaChains[chainId].targets) {
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) > 0 &&
sizeof(targetsGroupbyState[PublicTargetState_SYNCING].targets) == 0) {
targetsGroupbyState[PublicTargetState_SYNCING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_SYNCING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_WAITING to int,
PublicTargetState_SYNCING);
}
}
// state transitions to waiting
foreach (targetId in fullReplicaChains[chainId].targets) {
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) == 0) {
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_WAITING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_SYNCING to int,
PublicTargetState_WAITING);
}
if (!(targetId in targetsGroupbyState[PublicTargetState_SYNCING].targets)) {
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_WAITING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_WAITING to int,
PublicTargetState_WAITING);
}
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_WAITING],
chainId, targetId,
LocalTargetState_UPTODATE,
(PublicTargetState_OFFLINE to int) + (PublicTargetState_WAITING to int),
PublicTargetState_WAITING);
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_WAITING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_OFFLINE to int,
PublicTargetState_WAITING);
}
// state transitions to offline
foreach (targetId in fullReplicaChains[chainId].targets) {
if (!(targetId in targetsGroupbyState[PublicTargetState_LASTSRV].targets)) {
targetsGroupbyState[PublicTargetState_OFFLINE] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_OFFLINE],
chainId, targetId,
LocalTargetState_OFFLINE,
PublicTargetState_SERVING to int,
PublicTargetState_OFFLINE);
}
targetsGroupbyState[PublicTargetState_OFFLINE] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_OFFLINE],
chainId, targetId,
LocalTargetState_OFFLINE,
(PublicTargetState_SYNCING to int) + (PublicTargetState_WAITING to int) + (PublicTargetState_OFFLINE to int),
PublicTargetState_OFFLINE);
}
// print format("chain {0}, targets group by state: {1}", chainId, targetsGroupbyState);
updatedReplicaChain.vChainId = fullReplicaChains[chainId].vChainId;
foreach (targetState in keys(states)) {
updatedReplicaChain = extendChain(updatedReplicaChain, targetsGroupbyState[targetState]);
updatedReplicaChain.vChainId.chainVer = updatedReplicaChain.vChainId.chainVer + targetsGroupbyState[targetState].vChainId.chainVer;
}
return updatedReplicaChain;
}
fun updateRoutingInfo() {
var updatedReplicaChain: tReplicaChain;
var localTargetStates: map[tTargetId, tLocalTargetState];
var chainId: tChainId;
var targetId: tTargetId;
var prevRoutingVer: tRoutingVer;
prevRoutingVer = routingVer;
foreach (chainId in keys(fullReplicaChains)) {
localTargetStates = default(map[tTargetId, tLocalTargetState]);
foreach (targetId in fullReplicaChains[chainId].targets) {
nodeId = fullReplicaChains[chainId].nodes[targetId];
localTargetStates += (targetId, nodeTargetStates[nodeId][targetId]);
}
print format("start to update chain {0}, public states: {1}, local states: {2}",
fullReplicaChains[chainId].vChainId,
PublicTargetStatesToString(fullReplicaChains[chainId].states),
LocalTargetStatesToString(localTargetStates));
updatedReplicaChain = updateOneReplicaChain(chainId);
if (updatedReplicaChain.vChainId != fullReplicaChains[chainId].vChainId) {
print format("replication chain updated: {0}, updated states: {1}, services: {2}",
updatedReplicaChain.vChainId,
PublicTargetStatesToString(updatedReplicaChain.states),
updatedReplicaChain.services);
}
assert PublicTargetState_SERVING in values(updatedReplicaChain.states) ||
PublicTargetState_LASTSRV in values(updatedReplicaChain.states),
format("no serving target: {0}", ReplicaChainToString(updatedReplicaChain));
assert sizeof(updatedReplicaChain.targets) == sizeof(fullReplicaChains[chainId].targets),
format("updated chain {0} has different number of targets {1} than the old chain {2}",
chainId, updatedReplicaChain.targets, fullReplicaChains[chainId].targets);
fullReplicaChains[chainId] = updatedReplicaChain;
}
if (routingVer != prevRoutingVer) {
print format("routing info updated to version {0}, process delayed routing queries: {1}", routingVer, delayedRoutingReqs);
processDelayedRoutingReqs();
}
}
fun processDelayedRoutingReqs() {
var getRoutingInfo: tGetRoutingInfoReq;
foreach (getRoutingInfo in values(delayedRoutingReqs)) {
delayedRoutingReqs -= (getRoutingInfo.from, getRoutingInfo.routingVer);
replyWithRoutingInfo(getRoutingInfo);
}
}
fun replyWithRoutingInfo(getRoutingInfo: tGetRoutingInfoReq) {
var routingInfo: tRoutingInfo;
if (getRoutingInfo.routingVer == routingVer) {
if (!((getRoutingInfo.from, getRoutingInfo.routingVer) in delayedRoutingReqs))
delayedRoutingReqs += ((getRoutingInfo.from, getRoutingInfo.routingVer), getRoutingInfo);
return;
}
routingInfo = (
routingVer = routingVer,
replicaChains = fullReplicaChains,
storageServices = knownStorageServices,
offlineServices = offlineStorageServices);
send getRoutingInfo.from, eGetRoutingInfoResp, (tag = getRoutingInfo.tag, status = ErrorCode_SUCCESS, routingInfo = routingInfo);
}
fun computeOfflineStorageServices() : set[tNodeId] {
var nodeId: tNodeId;
var servicesOffline: set[tNodeId];
if (stopFindNewFailures == 2) {
return servicesOffline;
} else if (stopFindNewFailures == 1 && sizeof(knownStorageServices) == sizeof(aliveStorageServices)) {
// wait until all storage services are alive and then stop finding new failures
stopFindNewFailures = 2;
return servicesOffline;
}
foreach (nodeId in keys(knownStorageServices)) {
if (!(nodeId in aliveStorageServices)) {
servicesOffline += (nodeId);
}
}
return servicesOffline;
}
start state Init {
entry (args: (nodeId: tNodeId, maxAttempts: int, numStorageServices: int, replicaChains: tReplicaChainMap)) {
nodeId = args.nodeId;
numStorageServices = args.numStorageServices;
fullReplicaChains = args.replicaChains;
routingVer = 10001;
maxAttempts = args.maxAttempts;
timer = CreateTimer(this);
goto Bootstrap;
}
}
state Bootstrap {
defer eGetRoutingInfoReq, eRegisterClientMsg;
on eUpdateTargetStateMsg do (updateTargetStateMsg: tUpdateTargetStateMsg) {
var nodeId: tNodeId;
processUpdateTargetStateMsg(updateTargetStateMsg);
knownStorageServices[updateTargetStateMsg.nodeId] = updateTargetStateMsg.storageService;
if (sizeof(knownStorageServices) == numStorageServices) {
foreach(nodeId in keys(knownStorageServices)) {
aliveStorageServices += (nodeId);
}
updateRoutingInfo();
print format("mgmt service started");
goto WaitForHeartbeats;
}
}
}
state WaitForHeartbeats {
entry {
// start wait timer to wait for responses
StartTimer(timer);
}
on eGetRoutingInfoReq do replyWithRoutingInfo;
// on eRegisterClientMsg do registerClient;
on eStopFindNewFailures do (value: int) {
stopFindNewFailures = value;
}
on eUpdateTargetStateMsg do (updateTargetStateMsg: tUpdateTargetStateMsg) {
if (updateTargetStateMsg.routingVer < routingVer) {
print format("#{0}: ignore stale heartbeat (routingVer < {1}): {2} ", numAttempts, routingVer, updateTargetStateMsg);
return;
}
processUpdateTargetStateMsg(updateTargetStateMsg);
aliveStorageServices += (updateTargetStateMsg.nodeId);
print format("#{0}: {1} added to aliveStorageServices {2}", numAttempts, updateTargetStateMsg.nodeId, aliveStorageServices);
}
on eTimeOut do {
var nodeId: tNodeId;
// one more attempt finished
numAttempts = numAttempts + 1;
print format("#{0}: aliveStorageServices: {1}", numAttempts, aliveStorageServices);
if (numAttempts < maxAttempts) {
// send this, eStartNextHeartbeatRound;
StartTimer(timer);
return;
}
// set storage targets to offline state
offlineStorageServices = computeOfflineStorageServices();
foreach (nodeId in offlineStorageServices) {
print format("detected node {0} {1} is down, set its targets offline: {2}",
nodeId, knownStorageServices[nodeId], keys(nodeTargetStates[nodeId]));
setLocalTargetState(nodeId, LocalTargetState_OFFLINE);
}
updateRoutingInfo();
// lets reset and restart the failure detection
aliveStorageServices = default(set[tNodeId]);
numAttempts = 0;
StartTimer(timer);
// send this, eStartNextHeartbeatRound;
}
// on eStartNextHeartbeatRound goto WaitForHeartbeats;
on eShutDown goto Offline with (from: machine) {
print format("{0} is going to shutdown", this);
send from, eStopped, this;
}
}
state Offline {
// detection has finish, these are all delayed responses and must be ignored
ignore eGetRoutingInfoReq, eUpdateTargetStateMsg, eRegisterClientMsg, eTimeOut, eStartNextHeartbeatRound;
entry {
var client: machine;
var service: StorageService;
print format("stop failure detection");
CancelTimer(timer);
}
}
}

View File

@@ -0,0 +1,315 @@
/* Storage Client */
type tWriteArgs = (from: machine, chunkId: tChunkId, offset: int, length: int, dataBytes: tBytes);
type tReadArgs = (from: machine, chunkId: tChunkId, offset: int, length: int);
type tWriteRes = (status: tErrorCode, chunkId: tChunkId, commitVer: tChunkVer);
type tReadRes = (status: tErrorCode, chunkId: tChunkId, chunkMetadata: tChunkMetadata, dataBytes: tBytes);
event eSubmitWrite : tWriteArgs;
event eSubmitRead : tReadArgs;
event eWriteComplete : tWriteRes;
event eReadComplete : tReadRes;
event eWaitConnected : machine;
event eClientConnected;
machine StorageClient {
var clientId: tNodeId;
var mgmtService: MgmtService;
var mgmtClient: MgmtClient;
// var timer: Timer;
var routingVer: tRoutingVer;
var replicaChains: tReplicaChainMap;
var nextRequestId: tRequestId;
var clientUsers: set[machine];
var submittedWrites: map[tMessageTag, tWriteArgs];
var submittedReads: map[tMessageTag, tReadArgs];
var inflightWriteReqs: map[tMessageTag, tWriteReq];
var inflightReadReqs: map[tMessageTag, tReadReq];
fun newMessageTag(): tMessageTag {
nextRequestId = nextRequestId + 1;
return (nodeId = clientId, requestId = nextRequestId);
}
fun calcGlobalKeyFromChunkId(chunkId: tChainId): tGlobalKey {
var chainIds: seq[tChainId];
var targetChain: tChainId;
var replicaChain: tReplicaChain;
chainIds = keys(replicaChains);
targetChain = chainIds[chunkId % sizeof(chainIds)];
replicaChain = replicaChains[targetChain];
return (vChainId = replicaChain.vChainId, chunkId = chunkId);
}
fun processRoutingInfo(routingInfo: tRoutingInfo) {
var newRoutingVer: tRoutingVer;
var newReplicaChains: tReplicaChainMap;
var replicaChain: tReplicaChain;
var targetId: tTargetId;
var chainId: tChainId;
var nodeId: tNodeId;
var services: seq[StorageService];
newRoutingVer = routingInfo.routingVer;
newReplicaChains = routingInfo.replicaChains;
if (routingVer > newRoutingVer) {
print format("{0}: error: routingVer {1} > newRoutingVer {2}", this, routingVer, newRoutingVer);
return;
} else if (routingVer == newRoutingVer) {
print format("{0}: ignore: routingVer {1} == newRoutingVer {2}", this, routingVer, newRoutingVer);
return;
}
print format("{0}: updating replica chains from version {1} to {2}", this, routingVer, newRoutingVer);
routingVer = newRoutingVer;
foreach (chainId in keys(replicaChains)) {
if (!(chainId in newReplicaChains))
replicaChains -= (chainId);
}
foreach (chainId in keys(newReplicaChains)) {
replicaChains[chainId] = newReplicaChains[chainId];
replicaChain = replicaChains[chainId];
print format("{0}: new replica chain {1}, targets: {2}, services: {3}",
this, newReplicaChains[chainId].vChainId, newReplicaChains[chainId].targets, newReplicaChains[chainId].services);
}
}
// fun onSendHeartbeatEvent(heartbeatConns: tHeartbeatConns) {
// send heartbeatConns.mgmtService, eRegisterClientMsg, (from = heartbeatConns.mgmtClient, nodeId = clientId, storageClient = this);
// }
fun chooseServingTarget(replicaChain: tReplicaChain): tTargetId {
var targetId: tTargetId;
var servingTargetIds: set[tTargetId];
targetId = replicaChain.targets[0];
if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
return targetId;
}
return 0;
}
fun sendWriteReq(writeReq: tWriteReq) {
var replicaChain: tReplicaChain;
var targetId: tTargetId;
var targetService: StorageService;
// get the latest chain and update versioned chain id
replicaChain = replicaChains[writeReq.key.vChainId.chainId];
writeReq.key.vChainId = replicaChain.vChainId;
targetId = chooseServingTarget(replicaChain);
if (targetId > 0) {
print format("{0}: send write request #{1}: {2}", this, writeReq.retries, writeReq);
targetService = replicaChain.services[targetId];
send targetService, eWriteReq, writeReq;
}
}
fun reissueWriteReq(reqTag: tMessageTag) {
inflightWriteReqs[reqTag].retries = inflightWriteReqs[reqTag].retries + 1;
sendWriteReq(inflightWriteReqs[reqTag]);
}
fun sendReadReq(readReq: tReadReq) {
var replicaChain: tReplicaChain;
var targetId: tTargetId;
var targetService: StorageService;
// get the latest chain and update versioned chain id
replicaChain = replicaChains[readReq.key.vChainId.chainId];
readReq.key.vChainId = replicaChain.vChainId;
targetId = chooseServingTarget(replicaChain);
if (targetId > 0) {
print format("{0}: send read request #{1}: {2}", this, readReq.retries, readReq);
targetService = replicaChain.services[targetId];
send targetService, eReadReq, readReq;
}
}
fun reissueReadReq(reqTag: tMessageTag) {
sendReadReq(inflightReadReqs[reqTag]);
inflightReadReqs[reqTag].retries = inflightReadReqs[reqTag].retries + 1;
}
fun processInflightWriteReqs() {
var oldChainId: tVersionedChainId;
var newChainId: tVersionedChainId;
var writeReq: tWriteReq;
foreach (writeReq in values(inflightWriteReqs)) {
oldChainId = writeReq.key.vChainId;
newChainId = replicaChains[oldChainId.chainId].vChainId;
if (oldChainId != newChainId) {
print format("{0}: chain version updated: {1} --> {2}, reissuing request {3}", this, oldChainId, newChainId, writeReq);
reissueWriteReq(writeReq.tag);
}
}
}
fun processInflightReadReqs() {
var oldChainId: tVersionedChainId;
var newChainId: tVersionedChainId;
var readReq: tReadReq;
foreach (readReq in values(inflightReadReqs)) {
oldChainId = readReq.key.vChainId;
newChainId = replicaChains[oldChainId.chainId].vChainId;
if (oldChainId != newChainId) {
print format("{0}: chain version updated: {1} --> {2}, reissuing request {3}", this, oldChainId, newChainId, readReq);
reissueReadReq(readReq.tag);
}
}
}
start state Init {
ignore eSendHeartbeat;
entry (args: (clientId: tNodeId, mgmtService: MgmtService)) {
clientId = args.clientId;
mgmtService = args.mgmtService;
mgmtClient = new MgmtClient((nodeId = clientId, clientHost = this, mgmtService = mgmtService, sendHeartbeats = false));
// timer = new Timer(this);
}
on eWaitConnected do (user: machine) {
clientUsers += (user);
}
// on eSendHeartbeat do onSendHeartbeatEvent;
on eNewRoutingInfo goto WaitForReqs with processRoutingInfo;
}
state WaitForReqs {
ignore eSendHeartbeat;
entry {
var user: machine;
foreach (user in clientUsers) {
send user, eClientConnected;
}
// StartTimer(timer);
}
on eWaitConnected do (user: machine) {
send user, eClientConnected;
}
on eShutDown goto Stopped with (from: machine) {
print format("{0} is going to shutdown", this);
send mgmtClient, eShutDown, this;
}
// on eSendHeartbeat do onSendHeartbeatEvent;
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
processRoutingInfo(routingInfo);
processInflightWriteReqs();
processInflightReadReqs();
}
// on eTimeOut do {
// processInflightWriteReqs();
// processInflightReadReqs();
// StartTimer(timer);
// }
on eSubmitWrite do (writeArgs: tWriteArgs) {
var writeReq: tWriteReq;
writeReq = (from = this,
retries = 1,
tag = newMessageTag(),
key = calcGlobalKeyFromChunkId(writeArgs.chunkId),
updateVer = 0,
commitChainVer = 0,
fullChunkReplace = false,
removeChunk = writeArgs.dataBytes == default(tBytes),
fromClient = true,
offset = writeArgs.offset, length = writeArgs.length,
dataBytes = writeArgs.dataBytes);
sendWriteReq(writeReq);
submittedWrites += (writeReq.tag, writeArgs);
inflightWriteReqs += (writeReq.tag, writeReq);
}
on eSubmitRead do (readArgs: tReadArgs) {
var readReq: tReadReq;
readReq = (from = this,
retries = 1,
tag = newMessageTag(),
key = calcGlobalKeyFromChunkId(readArgs.chunkId),
offset = readArgs.offset, length = readArgs.length);
sendReadReq(readReq);
submittedReads += (readReq.tag, readArgs);
inflightReadReqs += (readReq.tag, readReq);
}
on eWriteResp do (writeResp: tWriteResp) {
if (!(writeResp.tag in inflightWriteReqs)) {
print format("{0}: got response for completed write request: {1}", this, writeResp.key);
return;
}
if (writeResp.status == ErrorCode_CHAIN_VERION_MISMATCH) {
print format("{0}: retry write request: {1}", this, writeResp.key);
reissueWriteReq(writeResp.tag);
return;
}
print format("{0}: write response {1}", this, writeResp);
send submittedWrites[writeResp.tag].from, eWriteComplete, (status = writeResp.status, chunkId = writeResp.key.chunkId, commitVer = writeResp.commitVer);
submittedWrites -= (writeResp.tag);
inflightWriteReqs -= (writeResp.tag);
}
on eReadResp do (readResp: tReadResp) {
if (!(readResp.tag in inflightReadReqs)) {
return;
}
if ((readResp.status == ErrorCode_CHAIN_VERION_MISMATCH) || (readResp.status == ErrorCode_CHUNK_NOT_COMMIT)) {
print format("{0}: retry read request: {1}", this, readResp.key);
reissueReadReq(readResp.tag);
return;
}
print format("{0}: read response {1}", this, readResp);
send submittedReads[readResp.tag].from, eReadComplete, (status = readResp.status, chunkId = readResp.key.chunkId,
chunkMetadata = readResp.chunkMetadata, dataBytes = readResp.dataBytes);
submittedReads -= (readResp.tag);
inflightReadReqs -= (readResp.tag);
}
}
state Stopped {
ignore eReadResp, eWriteResp, eSendHeartbeat, eNewRoutingInfo, eTimeOut;
entry {
// CancelTimer(timer);
print format("{0} stopped", this);
}
}
}

View File

@@ -0,0 +1,9 @@
// the storage system module
module StorageSystem = {
TestClient,
MgmtClient, MgmtService,
StorageClient, StorageService,
ReadProcess, WriteProcess,
SyncWorker, StorageTarget, ChunkReplica,
SystemMonitor, Timer
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,804 @@
type tSystemConfig = (
chunkSize: int,
numChains: int,
numReplicas: int,
numStorageServices: int,
failStorageServices: int,
failDetectionMaxAttempts: int,
numClients: int,
numIters: int
);
type tStorageSystem = (
mgmt: MgmtService,
storages: tStorageServiceMap,
clients: tTestClientMap
);
fun BuildNodeTargetMap(chunkSize: int, numNodes: int, numTargetsPerNode: int)
: tGlobalTargetMap
{
var nodeId: tNodeId;
var targetId: tTargetId;
var storageTarget: StorageTarget;
var localTargets: tLocalTargetMap;
var nodeTargets: tGlobalTargetMap;
assert numTargetsPerNode < 100;
nodeId = 1;
while (nodeId <= numNodes) {
localTargets = default(tLocalTargetMap);
targetId = nodeId * 100 + 1;
while (sizeof(localTargets) < numTargetsPerNode) {
storageTarget = new StorageTarget((targetId = targetId, chunkSize = chunkSize));
localTargets += (targetId, storageTarget);
targetId = targetId + 1;
}
nodeTargets += (nodeId, localTargets);
nodeId = nodeId + 1;
}
return nodeTargets;
}
fun BuildRepliaChainMap(numChains: int, numReplicas: int, nodeTargets: tGlobalTargetMap)
: tReplicaChainMap
{
var vChainId: tVersionedChainId;
var targetId: tTargetId;
var nodeId: tNodeId;
var replicaChain: tReplicaChain;
var replicaChains: tReplicaChainMap;
var serviceNodeIds: seq[tNodeId];
var n: int;
n = 0;
serviceNodeIds = keys(nodeTargets);
vChainId = (chainId = 1, chainVer = 1);
while (vChainId.chainId <= numChains) {
replicaChain = default(tReplicaChain);
replicaChain.vChainId = vChainId;
while (sizeof(replicaChain.targets) < numReplicas) {
nodeId = serviceNodeIds[n % sizeof(serviceNodeIds)];
targetId = keys(nodeTargets[nodeId])[0];
nodeTargets[nodeId] -= (targetId);
print format("chain {0} added target {1} from node {2}", vChainId.chainId, targetId, nodeId);
replicaChain.targets += (sizeof(replicaChain.targets), targetId);
replicaChain.nodes += (targetId, nodeId);
replicaChain.states += (targetId, PublicTargetState_SERVING);
n = n + 1;
}
print format("create new replica chain: {0}", replicaChain);
replicaChains += (vChainId.chainId, replicaChain);
vChainId.chainId = vChainId.chainId + 1;
}
return replicaChains;
}
fun CreateStorageServices(nodeTargets: tGlobalTargetMap, mgmtService: MgmtService)
: tStorageServiceMap
{
var nodeId: tNodeId;
var localTargets: tLocalTargetMap;
var service: StorageService;
var storageServices: tStorageServiceMap;
foreach (nodeId in keys(nodeTargets)) {
service = new StorageService((nodeId = nodeId, localTargets = nodeTargets[nodeId], mgmtService = mgmtService));
storageServices += (nodeId, service);
}
return storageServices;
}
fun CreateTestClients(numClients: int, numChains: int, numIters: int, failStorageServices: int, mgmtService: MgmtService, storageServices: tStorageServiceMap, systemMonitor: SystemMonitor)
: tTestClientMap
{
var clientId: tNodeId;
var client: TestClient;
var testClients: tTestClientMap;
clientId = 1;
while (clientId <= numClients) {
client = new TestClient((
clientId = clientId,
chunkIdBegin = 789001,
chunkIdEnd = 789000 + numChains * 2,
numIters = numIters,
failStorageServices = failStorageServices,
mgmtService = mgmtService,
storageServices = storageServices,
systemMonitor = systemMonitor));
testClients += (clientId, client);
clientId = clientId + 1;
}
return testClients;
}
fun SetUpStorageSystem(testDriver: machine, config: tSystemConfig) {
var numTargetsPerNode: int;
var nodeTargets: tGlobalTargetMap;
var replicaChains: tReplicaChainMap;
var storageServices: tStorageServiceMap;
var storageService: StorageService;
var mgmtService: MgmtService;
var testClients: tTestClientMap;
var storageSystem: tStorageSystem;
var systemMonitor: SystemMonitor;
print format("system config: {0}", config);
announce eSystemConfig, (config = config,);
assert config.failStorageServices <= config.numStorageServices;
assert config.numStorageServices >= config.numReplicas;
assert config.numChains * config.numReplicas % config.numStorageServices == 0;
assert config.chunkSize > config.numClients * config.numIters;
numTargetsPerNode = config.numChains * config.numReplicas / config.numStorageServices;
nodeTargets = BuildNodeTargetMap(config.chunkSize, config.numStorageServices, numTargetsPerNode);
print format("init nodeTargets {0}", nodeTargets);
replicaChains = BuildRepliaChainMap(config.numChains, config.numReplicas, nodeTargets);
print format("init replicaChains {0}", replicaChains);
mgmtService = new MgmtService((nodeId = 9001, maxAttempts = config.failDetectionMaxAttempts,
numStorageServices = config.numStorageServices, replicaChains = replicaChains));
storageServices = CreateStorageServices(nodeTargets, mgmtService);
systemMonitor = new SystemMonitor((nodeId = 9002, numClients = config.numClients, mgmtService = mgmtService, storageServices = storageServices));
testClients = CreateTestClients(config.numClients, config.numChains, config.numIters, config.failStorageServices, mgmtService, storageServices, systemMonitor);
storageSystem = (mgmt = mgmtService, storages = storageServices, clients = testClients);
announce eStorageSystem, (system = storageSystem,);
}
fun InitBytes(size: int, value: int): tBytes {
var i: int;
var bytes: tBytes;
i = 0;
while (i < size) {
bytes += (i, value);
i = i + 1;
}
return bytes;
}
/* Service Monitor */
event eRestart: machine;
event eStarted: machine;
event eStartUp: machine;
event eShutDown: machine;
event eStopped: machine;
machine SystemMonitor {
var nodeId: tNodeId;
var numClients: int;
var mgmtService: MgmtService;
var storageServices: tStorageServiceMap;
var failStorageServices: int;
var mgmtClient: MgmtClient;
var timer: Timer;
var stoppedClients: set[tNodeId];
var offlineTargets: set[tTargetId];
var offlineServices: set[tNodeId];
var restartedServices: set[tNodeId];
fun processRoutingInfo(routingInfo: tRoutingInfo) {
var replicaChain: tReplicaChain;
var targetId: tTargetId;
restartedServices = default(set[tNodeId]);
foreach (replicaChain in values(routingInfo.replicaChains)) {
print format("{0}: replication chain: {1}", this, ReplicaChainToString(replicaChain));
foreach (targetId in replicaChain.targets) {
if (replicaChain.states[targetId] == PublicTargetState_OFFLINE ||
replicaChain.states[targetId] == PublicTargetState_LASTSRV)
{
if (!(targetId in offlineTargets)) {
offlineTargets += (targetId);
offlineServices += (replicaChain.nodes[targetId]);
}
}
else if (replicaChain.states[targetId] == PublicTargetState_SERVING ||
replicaChain.states[targetId] == PublicTargetState_SYNCING ||
replicaChain.states[targetId] == PublicTargetState_WAITING)
{
if (targetId in offlineTargets) {
offlineTargets -= (targetId);
offlineServices -= (replicaChain.nodes[targetId]);
}
}
}
}
}
fun restartOfflineServices() {
var nodeId: tNodeId;
foreach (nodeId in offlineServices) {
send storageServices[nodeId], eRestart, this;
restartedServices += (nodeId);
}
}
start state Init {
entry (args: (nodeId: tNodeId, numClients: int, mgmtService: MgmtService, storageServices: tStorageServiceMap)) {
nodeId = args.nodeId;
numClients = args.numClients;
mgmtService = args.mgmtService;
storageServices = args.storageServices;
// failStorageServices = args.failStorageServices;
mgmtClient = new MgmtClient((nodeId = nodeId, clientHost = this, mgmtService = mgmtService, sendHeartbeats = false));
timer = CreateTimer(this);
goto WaitUntilTestDone;
}
}
state WaitUntilTestDone {
ignore eSendHeartbeat;
entry {
print format("wait until test done: offlineTargets {0}, offlineServices {1}, restartedServices {2}, stoppedClients {3}",
offlineTargets, offlineServices, restartedServices, stoppedClients);
StartTimer(timer);
}
// on eSendHeartbeat do (heartbeatConns: tHeartbeatConns) {
// send heartbeatConns.mgmtService, eRegisterClientMsg, (from = heartbeatConns.mgmtClient, nodeId = nodeId, storageClient = this);
// }
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
processRoutingInfo(routingInfo);
if (sizeof(offlineServices) > 0) {
StartTimer(timer);
}
}
on eTimeOut do {
restartOfflineServices();
if (sizeof(offlineServices) > 0) {
StartTimer(timer);
} else {
CancelTimer(timer);
}
}
on eTestClientDone do (clientId: tNodeId) {
stoppedClients += (clientId);
if (sizeof(stoppedClients) == numClients) {
print format("all test clients stopped");
send mgmtService, eStopFindNewFailures, 1;
goto WaitUntilSyncDone;
}
}
}
state WaitUntilSyncDone {
ignore eSendHeartbeat;
entry {
print format("wait until sync done: offlineTargets {0}, offlineServices {1}, restartedServices {2}, stoppedClients {3}",
offlineTargets, offlineServices, restartedServices, stoppedClients);
StartTimer(timer);
}
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
processRoutingInfo(routingInfo);
if (sizeof(offlineServices) > 0) {
StartTimer(timer);
} else {
goto ShutdownSystem;
}
}
on eTimeOut do {
restartOfflineServices();
if (sizeof(offlineServices) > 0) {
StartTimer(timer);
} else {
goto ShutdownSystem;
}
}
}
state ShutdownSystem {
ignore eSendHeartbeat, eNewRoutingInfo, eTimeOut;
entry {
var storageService: StorageService;
print format("{0}: all done, restartedServices: {1}", this, restartedServices);
announce eStopMonitorTargetStates;
CancelTimer(timer);
send mgmtClient, eShutDown, this;
send mgmtService, eShutDown, this;
receive {
case eStopped: (mgmt: machine) {
assert mgmt == mgmtService;
}
}
foreach (storageService in values(storageServices)) {
send storageService, eShutDown, this;
}
}
}
}
/* Test Client */
// DONE: write different part of the chunk for each write to detect any error
// DONE: stop a storage service more than once during test (stop it when it's syncing)
// TODO: [new test] stop mgmt client of an alive storage service to simulate network partition
// TODO: [new test] shut down storage service and then restart
// TODO: [new test] make storage service crash during syncing
type tTestClientMap = map[tNodeId, TestClient];
// type tTestStatus = (nodeId: tNodeId, done: bool);
event eTestClientDone : tNodeId;
// event eTestStatusReq : tTestStatus;
// event eTestStatusResp : tTestStatus;
machine TestClient {
var clientId: tNodeId;
var chunkIdBegin: tChainId;
var chunkIdEnd: tChainId;
var numIters: int;
var failStorageServices: int;
var storageClient: StorageClient;
var storageServices: tStorageServiceMap;
var systemMonitor: SystemMonitor;
var nextWritePos: int;
var currIter: int;
var currChunkId: tChunkId;
var lastChunkVer: map[tChunkId, tChunkVer];
fun CreateNewWrite(chunkId: tChunkId, offset: int, length: int, value: int): tWriteArgs {
var dataBytes: tBytes;
var writeArgs: tWriteArgs;
dataBytes = InitBytes(length, value);
print format("data bytes size {0}", sizeof(dataBytes));
writeArgs = (from = this, chunkId = chunkId, offset = offset, length = sizeof(dataBytes), dataBytes = dataBytes);
print format("{0}: created a new write: {1}", this, writeArgs);
return writeArgs;
}
fun CreateNewRemove(chunkId: tChunkId): tWriteArgs {
var writeArgs: tWriteArgs;
writeArgs = (from = this, chunkId = chunkId, offset = 0, length = 0, dataBytes = default(tBytes));
print format("{0}: created a new remove: {1}", this, writeArgs);
return writeArgs;
}
fun CreateNewRead(chunkId: tChunkId, offset: int, length: int): tReadArgs {
var readArgs: tReadArgs;
readArgs = (from = this, chunkId = chunkId, offset = offset, length = length);
print format("{0}: created a new read: {1}", this, readArgs);
return readArgs;
}
start state Init {
// defer eTestStatusReq;
entry (args: (clientId: tNodeId, chunkIdBegin: int, chunkIdEnd: int, numIters: int, failStorageServices: int, mgmtService: MgmtService, storageServices: tStorageServiceMap, systemMonitor: SystemMonitor)) {
assert args.chunkIdBegin < args.chunkIdEnd;
clientId = args.clientId + 8000;
chunkIdBegin = args.chunkIdBegin;
chunkIdEnd = args.chunkIdEnd;
numIters = args.numIters;
failStorageServices = args.failStorageServices;
storageServices = args.storageServices;
systemMonitor = args.systemMonitor;
nextWritePos = 0;
storageClient = new StorageClient((clientId = clientId, mgmtService = args.mgmtService));
send storageClient, eWaitConnected, this;
}
on eClientConnected goto SendingWriteReq;
}
state SendingWriteReq {
entry {
var offset: int;
var length: int;
var machineToFail: machine;
/* ---------------------------------------------------------------------
currChunkId
---------------------------------------------------------------------
client 8001 | client 8002 | client 8003 | ......
---------------------------------------------------------------------
<currIter> bytes | <currIter> bytes | <currIter> bytes | ......
---------------------------------------------------------------------
^ ^
| |
offset----|<---length-->|
*/
currChunkId = chunkIdBegin + nextWritePos / numIters;
currIter = nextWritePos % numIters + 1;
offset = (clientId - 8001) * numIters + currIter - 1;
length = numIters - currIter + 1;
nextWritePos = nextWritePos + 1;
if (!(currChunkId in lastChunkVer))
lastChunkVer += (currChunkId, 0);
send storageClient, eSubmitWrite, CreateNewWrite(currChunkId, offset, length, currIter);
if (failStorageServices > 0 && choose()) {
machineToFail = choose(values(storageServices));
send machineToFail, eShutDown, machineToFail;
failStorageServices = failStorageServices - 1;
}
}
on eWriteComplete do (writeRes: tWriteRes) {
assert writeRes.status == ErrorCode_SUCCESS, format("error: {0}", writeRes);
// assert lastChunkVer[writeRes.chunkId] < writeRes.commitVer,
// format("error: last chunk version {0} >= commit version {1}", lastChunkVer[writeRes.chunkId], writeRes.commitVer);
// lastChunkVer[writeRes.chunkId] = writeRes.commitVer;
if (nextWritePos >= numIters * (chunkIdEnd - chunkIdBegin)) {
goto Done;
} else if (nextWritePos % numIters == numIters / 2) {
goto SendingRemoveReq;
} else {
goto SendingWriteReq;
}
}
// on eTestStatusReq do (from: machine) {
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
// }
}
state SendingReadReq {
entry {
var offset: int;
var length: int;
offset = (clientId - 101) * numIters;
length = numIters;
send storageClient, eSubmitRead, CreateNewRead(currChunkId, offset, length);
}
on eReadComplete do (readRes: tReadRes) {
var i: int;
if (readRes.status == ErrorCode_CHUNK_NOT_FOUND) {
print format("{0} chunk {1} removed by other client, re-create the chunk", this, currChunkId);
goto SendingWriteReq;
return;
}
if (readRes.status == ErrorCode_TARGET_OFFLINE) {
goto SendingReadReq;
return;
}
assert readRes.status == ErrorCode_SUCCESS, format("readRes.status {0}", readRes.status);
assert readRes.chunkId == currChunkId, format("readRes.chunkId {0} != currChunkId {1}", readRes.chunkId, currChunkId);
// assert lastChunkVer[currChunkId] <= readRes.chunkMetadata.commitVer,
// format("lastChunkVer[currChunkId:{0}] {1} > readRes.chunkMetadata.commitVer {2}",
// currChunkId, lastChunkVer[currChunkId], readRes.chunkMetadata.commitVer);
// if (lastChunkVer[currChunkId] == readRes.chunkMetadata.commitVer) {
while (i < sizeof(readRes.dataBytes)) {
assert readRes.dataBytes[i] <= Min(currIter, i + 1),
format("readRes.dataBytes[i:{0}] {1} != {2}, nextWritePos {3}, currIter {4}",
i, readRes.dataBytes[i], Min(currIter, i + 1), nextWritePos, currIter);
i = i + 1;
}
// }
if (nextWritePos % numIters == 0) {
goto SendingRemoveReq;
} else {
goto SendingWriteReq;
}
}
// on eTestStatusReq do (from: machine) {
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
// }
}
state SendingRemoveReq {
entry {
send storageClient, eSubmitWrite, CreateNewRemove(currChunkId);
}
on eWriteComplete do (writeRes: tWriteRes) {
assert writeRes.status == ErrorCode_SUCCESS, format("error: {0}", writeRes);
// assert lastChunkVer[writeRes.chunkId] < writeRes.commitVer,
// format("error: last chunk version {0} >= commit version {1}", lastChunkVer[writeRes.chunkId], writeRes.commitVer);
// lastChunkVer -= (writeRes.chunkId);
// check if the chunk removed or re-created
send storageClient, eSubmitRead, CreateNewRead(currChunkId, 0, numIters);
receive {
case eReadComplete: (readRes: tReadRes) {
if (readRes.status == ErrorCode_CHUNK_NOT_FOUND) {
print format("Chunk {0} removed, result: {1}", currChunkId, readRes);
} else {
print format("Chunk {0} re-created, result: {1}", currChunkId, readRes);
}
}
}
if (nextWritePos < numIters * (chunkIdEnd - chunkIdBegin)) {
goto SendingWriteReq;
} else {
goto Done;
}
}
// on eTestStatusReq do (from: machine) {
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
// }
}
state Done {
entry {
print format("{0}: all done", this);
send systemMonitor, eTestClientDone, clientId;
send storageClient, eShutDown, this;
}
// on eTestStatusReq do (from: machine) {
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = true);
// }
}
}
// no failure
machine OneClientWriteNoFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 0,
failDetectionMaxAttempts = 11,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteNoFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 0,
failDetectionMaxAttempts = 11,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine ThreeClientsWriteNoFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 0,
failDetectionMaxAttempts = 11,
numClients = 3,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
// unreliable failure detector
machine OneClientWriteUnreliableDetector {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 1,
failDetectionMaxAttempts = 7,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteUnreliableDetector {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 1,
failDetectionMaxAttempts = 7,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
// with failures
machine OneClientWriteWithFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 1,
failDetectionMaxAttempts = 11,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteWithFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 1,
failDetectionMaxAttempts = 11,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine OneClientWriteWithFailures {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 3,
failDetectionMaxAttempts = 11,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteWithFailures {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 3,
failDetectionMaxAttempts = 11,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
// short chain: two replicas
machine OneClientWriteShortChainWithFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 2,
numStorageServices = 2,
failStorageServices = 1,
failDetectionMaxAttempts = 11,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteShortChainWithFailures {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 2,
numStorageServices = 2,
failStorageServices = 2,
failDetectionMaxAttempts = 11,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
// long chain: four replicas
machine TwoClientsWriteLongChainWithFailures {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 4,
numStorageServices = 4,
failStorageServices = 2,
failDetectionMaxAttempts = 23,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}

View File

@@ -0,0 +1,57 @@
// no failure
test tcOneClientWriteNoFailure [main = OneClientWriteNoFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { OneClientWriteNoFailure };
test tcTwoClientsWriteNoFailure [main = TwoClientsWriteNoFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteNoFailure };
test tcThreeClientsWriteNoFailure [main = ThreeClientsWriteNoFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { ThreeClientsWriteNoFailure };
// unreliable failure detector
test tcOneClientWriteUnreliableDetector [main = OneClientWriteUnreliableDetector]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated in
union StorageSystem, { OneClientWriteUnreliableDetector };
test tcTwoClientsWriteUnreliableDetector [main = TwoClientsWriteUnreliableDetector]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated in
union StorageSystem, { TwoClientsWriteUnreliableDetector };
// with failures
test tcOneClientWriteWithFailure [main = OneClientWriteWithFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { OneClientWriteWithFailure };
test tcTwoClientsWriteWithFailure [main = TwoClientsWriteWithFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteWithFailure };
test tcOneClientWriteWithFailures [main = OneClientWriteWithFailures]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { OneClientWriteWithFailures };
test tcTwoClientsWriteWithFailures [main = TwoClientsWriteWithFailures]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteWithFailures };
// short chain
test tcOneClientWriteShortChainWithFailure [main = OneClientWriteShortChainWithFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { OneClientWriteShortChainWithFailure };
test tcTwoClientsWriteShortChainWithFailures [main = TwoClientsWriteShortChainWithFailures]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteShortChainWithFailures };
// long chain
test tcTwoClientsWriteLongChainWithFailures [main = TwoClientsWriteLongChainWithFailures]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteLongChainWithFailures };