Initial commit

This commit is contained in:
dev
2025-02-27 21:53:53 +08:00
commit 815e55e4c0
1291 changed files with 185445 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
{
"version": 1,
"isRoot": true,
"tools": {
"p": {
"version": "2.3.2",
"commands": [
"p"
],
"rollForward": false
}
}
}

7
specs/.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
PCheckerOutput/
PGenerated/
POutput/
obj/
*.toolbox/
Test.cs
*.log

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
<ApplicationIcon />
<OutputType>Exe</OutputType>
<StartupObject />
<LangVersion>latest</LangVersion>
<OutputPath>POutput/</OutputPath>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Coyote" Version="1.0.5"/>
<PackageReference Include="PCSharpRuntime" Version="1.1.15"/>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,12 @@
<!-- P project file for data storage -->
<Project>
<ProjectName>DataStorage</ProjectName>
<InputFiles>
<PFile>./PSrc/</PFile>
<PFile>./PSpec/</PFile>
<PFile>./PTst/</PFile>
</InputFiles>
<OutputDir>./PGenerated/</OutputDir>
<!-- Add external dependencies -->
<IncludeProject>../Timer/Timer.pproj</IncludeProject>
</Project>

View File

@@ -0,0 +1,492 @@
spec WriteComplete observes eWriteReq, eWriteResp {
var completedWriteReqTags: map[tMessageTag, bool];
var numPendingWriteReqs: int;
fun OnWriteReq(writeReq: tWriteReq) {
if (writeReq.fromClient && !(writeReq.tag in completedWriteReqTags)) {
completedWriteReqTags += (writeReq.tag, false);
numPendingWriteReqs = numPendingWriteReqs + 1;
if (numPendingWriteReqs > 0) {
goto PendingWrites;
}
}
}
fun OnWriteResp(writeResp: tWriteResp) {
assert writeResp.tag in completedWriteReqTags;
if (writeResp.status != ErrorCode_SUCCESS) {
return;
}
if (!completedWriteReqTags[writeResp.tag]) {
completedWriteReqTags[writeResp.tag] = true;
numPendingWriteReqs = numPendingWriteReqs - 1;
if (numPendingWriteReqs == 0) {
goto NoPendingWrites;
}
}
}
start cold state NoPendingWrites {
entry {
print format("numPendingWriteReqs: {0}, completedWriteReqTags: {1}", numPendingWriteReqs, completedWriteReqTags);
assert numPendingWriteReqs == 0, format("{0} pending writes not equal to zero", numPendingWriteReqs);
}
on eWriteReq do OnWriteReq;
on eWriteResp do OnWriteResp;
}
hot state PendingWrites {
entry {
print format("numPendingWriteReqs: {0}, completedWriteReqTags: {1}", numPendingWriteReqs, completedWriteReqTags);
}
on eWriteReq do OnWriteReq;
on eWriteResp do OnWriteResp;
}
}
event eSystemConfig: (config: tSystemConfig);
event eStorageSystem: (system: tStorageSystem);
spec AllWriteItersProcessed observes eWriteWork, eWriteReq, eWriteResp, eSystemConfig, eStorageSystem {
var config: tSystemConfig;
var mgmtService: MgmtService;
var storageServices: tStorageServiceMap;
var seenWriteRequestTags: map[tNodeId, set[tMessageTag]];
var seenWriteResponseTags: map[tNodeId, set[tMessageTag]];
var seenWriteProcs: map[tMessageTag, map[tTargetId, map[tChunkVer, set[machine]]]];
var clientDone: set[tNodeId];
start state Init {
on eSystemConfig goto SendingWriteReqs with (args: (config: tSystemConfig)) {
config = args.config;
}
on eStorageSystem do (args: (system: tStorageSystem)) {
mgmtService = args.system.mgmt;
storageServices = args.system.storages;
}
}
hot state SendingWriteReqs {
entry {
var tag: tMessageTag;
foreach (tag in keys(seenWriteProcs)) {
print format("write request tag: {0}, seenWriteProcs: {1}", tag, seenWriteProcs[tag]);
}
print format("seenWriteRequestTags: {0}", seenWriteRequestTags);
print format("seenWriteResponseTags: {0}", seenWriteResponseTags);
}
on eWriteWork goto SendingWriteReqs with (writeWork: tWriteWork) {
if (!(writeWork.tag in seenWriteProcs)) {
seenWriteProcs += (writeWork.tag, default(map[tTargetId, map[tChunkVer, set[machine]]]));
}
if (!(writeWork.targetId in seenWriteProcs[writeWork.tag])) {
seenWriteProcs[writeWork.tag] += (writeWork.targetId, default(map[tChunkVer, set[machine]]));
}
if (!(writeWork.updateVer in seenWriteProcs[writeWork.tag][writeWork.targetId])) {
seenWriteProcs[writeWork.tag][writeWork.targetId] += (writeWork.updateVer, default(set[machine]));
}
seenWriteProcs[writeWork.tag][writeWork.targetId][writeWork.updateVer] += (writeWork.from);
}
on eStorageSystem do (args: (system: tStorageSystem)) {
mgmtService = args.system.mgmt;
storageServices = args.system.storages;
}
on eWriteReq do (writeReq: tWriteReq) {
if (!(writeReq.tag.nodeId in seenWriteRequestTags))
seenWriteRequestTags += (writeReq.tag.nodeId, default(set[tMessageTag]));
seenWriteRequestTags[writeReq.tag.nodeId] += (writeReq.tag);
}
on eWriteResp do (writeResp: tWriteResp) {
if (writeResp.status != ErrorCode_SUCCESS) {
return;
}
assert writeResp.tag in seenWriteRequestTags[writeResp.tag.nodeId];
if (!(writeResp.tag.nodeId in seenWriteResponseTags))
seenWriteResponseTags += (writeResp.tag.nodeId, default(set[tMessageTag]));
seenWriteResponseTags[writeResp.tag.nodeId] += (writeResp.tag);
if (sizeof(seenWriteResponseTags[writeResp.tag.nodeId]) == config.numIters) {
clientDone += (writeResp.tag.nodeId);
if (sizeof(clientDone) == config.numClients)
goto Done;
}
}
}
cold state Done {
ignore eWriteWork, eWriteReq, eWriteResp;
entry {
print format("all iterations processed {0}", clientDone);
}
}
}
spec MonotoneIncreasingVersionNumber observes eWriteOpFinishResult, eCommitOpResult {
var chunkReplicaCommits: map[(tChunkId, tTargetId), map[tChunkVer, tCommitWork]];
start state WaitForResponses {
on eWriteOpFinishResult do (writeFinishRes: tWriteOpFinishResult) {
var writeWork: tWriteWork;
var chunkIdOnTarget: (tChunkId, tTargetId);
if (writeFinishRes.status != ErrorCode_SUCCESS) {
return;
}
writeWork = writeFinishRes.writeWork;
chunkIdOnTarget = (writeWork.key.chunkId, writeWork.targetId);
if (writeWork.fullChunkReplace) {
chunkReplicaCommits -= (chunkIdOnTarget);
}
}
on eCommitOpResult do (commitOpResult: tCommitOpResult) {
var commitWork: tCommitWork;
var commitMsg: tCommitMsg;
var chunkVer: tChunkVer;
var commitVer: tChunkVer;
var chunkId: tChunkId;
var chunkIdOnTarget: (tChunkId, tTargetId);
if (commitOpResult.status != ErrorCode_SUCCESS) {
return;
}
commitWork = commitOpResult.commitWork;
commitMsg = commitWork.commitMsg;
commitVer = commitOpResult.commitVer;
chunkId = commitWork.key.chunkId;
chunkIdOnTarget = (chunkId, commitWork.targetId);
if (!(chunkIdOnTarget in chunkReplicaCommits)) {
chunkReplicaCommits += (chunkIdOnTarget, default(map[tChunkVer, tCommitWork]));
}
if (commitOpResult.removeChunk) {
print format("remove request {0} committed, clear chunkReplicaCommits[{1}]: {2}",
commitMsg.tag, chunkIdOnTarget, chunkReplicaCommits[chunkIdOnTarget]);
chunkReplicaCommits -= (chunkIdOnTarget);
return;
}
if (!(commitVer in chunkReplicaCommits[chunkIdOnTarget])) {
// all existing commits should have smaller version
foreach (chunkVer in keys(chunkReplicaCommits[chunkIdOnTarget])) {
assert commitVer > chunkVer,
format ("current commit version {0} <= previous version {1} found in chunkReplicaCommits[chunkId:{2}]: {3}, commit result: {4}",
commitVer, chunkVer, chunkIdOnTarget, chunkReplicaCommits[chunkIdOnTarget], commitOpResult);
}
chunkReplicaCommits[chunkIdOnTarget] += (commitVer, commitWork);
}
}
}
}
// DONE: check chunk content after each update
spec AllReplicasOnChainUpdated observes eReadWorkDone, eWriteWorkDone, eCommitWorkDone, eNewRoutingInfo {
var seenRoutingVers: set[tRoutingVer];
var replicaChains: map[tVersionedChainId, tReplicaChain];
var chunkVersionOnTarget: map[tChunkId, map[tTargetId, tChunkVer]];
var chunkContentOnTarget: map[tChunkId, map[tChunkVer, map[tTargetId, tBytes]]];
var updatesOfChunkReplica: map[tChunkId, map[tChunkVer, map[tTargetId, seq[tWriteWorkDone]]]];
fun updateChunkVersionOnTarget(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer) {
if (!(chunkId in chunkVersionOnTarget)) {
chunkVersionOnTarget += (chunkId, default(map[tTargetId, tChunkVer]));
}
if (!(targetId in chunkVersionOnTarget[chunkId])) {
chunkVersionOnTarget[chunkId] += (targetId, 0);
}
if (chunkVersionOnTarget[chunkId][targetId] < updateVer) {
chunkVersionOnTarget[chunkId][targetId] = updateVer;
}
}
fun updateChunkContentOnTarget(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer, chunkContent: tBytes) {
if (!(chunkId in chunkContentOnTarget)) {
chunkContentOnTarget += (chunkId, default(map[tChunkVer, map[tTargetId, tBytes]]));
}
if (!(updateVer in chunkContentOnTarget[chunkId])) {
chunkContentOnTarget[chunkId] += (updateVer, default(map[tTargetId, tBytes]));
}
if (targetId in chunkContentOnTarget[chunkId][updateVer]) {
if (chunkContentOnTarget[chunkId][updateVer][targetId] != chunkContent) {
print format("find different chunk content {0} than chunkContentOnTarget[chunkId:{1}][updateVer:{2}] {3}",
chunkContent, chunkId, updateVer, chunkContentOnTarget[chunkId][updateVer]);
}
}
chunkContentOnTarget[chunkId][updateVer][targetId] = chunkContent;
}
fun addUpdateOfChunkReplica(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer, writeWorkDone: tWriteWorkDone) {
if (!(chunkId in updatesOfChunkReplica)) {
updatesOfChunkReplica += (chunkId, default(map[tChunkVer, map[tTargetId, seq[tWriteWorkDone]]]));
}
if (!(updateVer in updatesOfChunkReplica[chunkId])) {
updatesOfChunkReplica[chunkId] += (updateVer, default(map[tTargetId, seq[tWriteWorkDone]]));
}
if (!(targetId in updatesOfChunkReplica[chunkId][updateVer])) {
updatesOfChunkReplica[chunkId][updateVer] += (targetId, default(seq[tWriteWorkDone]));
}
updatesOfChunkReplica[chunkId][updateVer][targetId] += (sizeof(updatesOfChunkReplica[chunkId][updateVer][targetId]), writeWorkDone);
}
start state WaitForUpdates {
on eReadWorkDone do (readWorkDone: tReadWorkDone) {
var chunkId: tChunkId;
var targetId: tTargetId;
var updateVer: tChunkVer;
chunkId = readWorkDone.chunkMetadata.chunkId;
targetId = readWorkDone.targetId;
updateVer = readWorkDone.chunkMetadata.updateVer;
if (readWorkDone.status == ErrorCode_SUCCESS && sizeof(readWorkDone.dataBytes) == readWorkDone.chunkMetadata.chunkSize) {
// updateChunkVersionOnTarget(chunkId, targetId, updateVer);
// updateChunkContentOnTarget(chunkId, targetId, updateVer, readWorkDone.dataBytes);
}
}
on eWriteWorkDone do (writeWorkDone: tWriteWorkDone) {
var chunkId: tChunkId;
var targetId: tTargetId;
var updateVer: tChunkVer;
if (writeWorkDone.status != ErrorCode_SUCCESS &&
writeWorkDone.status != ErrorCode_CHUNK_COMMITTED_UPDATE &&
writeWorkDone.status != ErrorCode_CHUNK_STALE_UPDATE)
return;
chunkId = writeWorkDone.key.chunkId;
targetId = writeWorkDone.targetId;
updateVer = writeWorkDone.updateVer;
if (writeWorkDone.status == ErrorCode_SUCCESS) {
updateChunkVersionOnTarget(chunkId, targetId, updateVer);
updateChunkContentOnTarget(chunkId, targetId, updateVer, writeWorkDone.currentChunkContent);
addUpdateOfChunkReplica(chunkId, targetId, updateVer, writeWorkDone);
}
}
on eCommitWorkDone do (commitWorkDone: tCommitWorkDone) {
var chunkId: tChunkId;
var targetId: tTargetId;
var targetIdx: tTargetId;
var commitVer: tChunkVer;
var updateVer: tChunkVer;
var chunkVer: tChunkVer;
var chunkContent: tBytes;
var replicaChain: tReplicaChain;
var commitMsg: tCommitMsg;
var writeWorkDone: tWriteWorkDone;
var writeWorkIdx: int;
if (commitWorkDone.status != ErrorCode_SUCCESS &&
commitWorkDone.status != ErrorCode_CHUNK_STALE_COMMIT)
return;
targetId = commitWorkDone.targetId;
chunkId = commitWorkDone.key.chunkId;
commitVer = commitWorkDone.commitVer;
replicaChain = replicaChains[commitWorkDone.key.vChainId];
commitMsg = commitWorkDone.commitMsg;
// this is a special commit to remove an old chunk from a returning target
if (commitWorkDone.removeChunk && commitWorkDone.commitVer == 0)
return;
if (chunkId in updatesOfChunkReplica) {
// print all write works on the chunk
foreach (chunkVer in keys(updatesOfChunkReplica[chunkId])) {
foreach (targetIdx in keys(updatesOfChunkReplica[chunkId][chunkVer])) {
writeWorkIdx = 0;
while (writeWorkIdx < sizeof(updatesOfChunkReplica[chunkId][chunkVer][targetIdx])) {
writeWorkDone = updatesOfChunkReplica[chunkId][chunkVer][targetIdx][writeWorkIdx];
print format("updatesOfChunkReplica[chunkId:{0}][updateVer:{1}][targetIdx:{2}][#{3}][chainVer:{4}][remove:{5}][commit:{6}]: {7}",
chunkId, chunkVer, targetIdx, writeWorkIdx, writeWorkDone.chainVer, writeWorkDone.currentChunkContent == default(tBytes), chunkVer <= commitVer, writeWorkDone);
writeWorkIdx = writeWorkIdx + 1;
}
}
}
}
if (chunkId in chunkVersionOnTarget) {
// print all versions of the chunk
print format("chunkVersionOnTarget[chunkId:{0}]: {1}", chunkId, chunkVersionOnTarget[chunkId]);
}
if (chunkId in chunkContentOnTarget) {
// print all contents of the chunk
print format("chunkContentOnTarget[chunkId:{0}]: {1}", chunkId, chunkContentOnTarget[chunkId]);
}
if (commitWorkDone.removeChunk) {
if (chunkId in chunkVersionOnTarget && targetId in chunkVersionOnTarget[chunkId]) {
print format("remove versions of chunk {0}: {1}", chunkId, chunkVersionOnTarget[chunkId]);
chunkVersionOnTarget[chunkId] -= (targetId);
}
if (chunkId in chunkContentOnTarget) {
print format("remove contents of chunk {0}: {1}", chunkId, chunkContentOnTarget[chunkId]);
foreach (chunkVer in keys(chunkContentOnTarget[chunkId])) {
chunkContentOnTarget[chunkId][chunkVer] -= (targetId);
}
}
return;
}
// foreach (targetId in replicaChain.targets) {
// if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
// assert chunkId in chunkVersionOnTarget && targetId in chunkVersionOnTarget[chunkId] && chunkVersionOnTarget[chunkId][targetId] >= commitVer,
// format("missing update, tag:{0}, chunkId:{1}, targetId:{2}, commitVer:{3}, chunkVersionOnTarget: {4}, replica chain: {5}",
// commitMsg.tag, chunkId, targetId, commitVer, chunkVersionOnTarget[chunkId], replicaChain);
// }
// }
// foreach (targetId in replicaChain.targets) {
// if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
// if (sizeof(chunkContent) == 0) {
// assert chunkId in chunkContentOnTarget && commitVer in chunkContentOnTarget[chunkId] && targetId in chunkContentOnTarget[chunkId][commitVer],
// format("missing chunk content, chunkId:{0}, commitVer:{1}, targetId:{2}, chunkContentOnTarget: {3}",
// chunkId, commitVer, targetId, chunkContentOnTarget);
// chunkContent = chunkContentOnTarget[chunkId][commitVer][targetId];
// } else {
// assert chunkContentOnTarget[chunkId][commitVer][targetId] == chunkContent,
// format("inconsistent replica, chunkContentOnTarget[chunkId:{0}][commitVer:{1}] {2}",
// chunkId, commitVer, chunkContentOnTarget[chunkId][commitVer]);
// }
// }
// }
}
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
var replicaChain: tReplicaChain;
if (routingInfo.routingVer in seenRoutingVers) {
return;
} else {
seenRoutingVers += (routingInfo.routingVer);
}
foreach (replicaChain in values(routingInfo.replicaChains)) {
replicaChains[replicaChain.vChainId] = replicaChain;
}
}
}
}
event eStopMonitorTargetStates;
spec AllReplicasInServingState observes eNewRoutingInfo, eSyncStartReq, eSyncDoneResp, eStopMonitorTargetStates {
var knownReplicaChains: tReplicaChainMap;
var unavailableTargets: map[tTargetId, tPublicTargetState];
var syncWorkers: map[tTargetId, set[machine]];
fun checkForUnavailableTargets(routingInfo: tRoutingInfo) {
var targetId: tTargetId;
var replicaChain: tReplicaChain;
unavailableTargets = default(map[tTargetId, tPublicTargetState]);
foreach (replicaChain in values(routingInfo.replicaChains)) {
if (!(replicaChain.vChainId.chainId in knownReplicaChains) ||
replicaChain.vChainId.chainVer > knownReplicaChains[replicaChain.vChainId.chainId].vChainId.chainVer)
{
foreach (targetId in replicaChain.targets) {
if (replicaChain.states[targetId] != PublicTargetState_SERVING &&
replicaChain.states[targetId] != PublicTargetState_LASTSRV)
{
unavailableTargets[targetId] = replicaChain.states[targetId];
}
}
knownReplicaChains[replicaChain.vChainId.chainId] = replicaChain;
print format("added a new chain: {0}, unavailableTargets: {1}", replicaChain, unavailableTargets);
}
}
if (sizeof(unavailableTargets) > 0) {
goto SomeTargetsUnavailable;
} else {
goto AllTargetsAvailable;
}
}
fun onSyncDone(syncDoneResp: tSyncDoneResp) {
// assert syncDoneResp.targetId in unavailableTargets,
// format("sync target {0} not found in unavailableTargets: {1}", syncDoneResp, unavailableTargets);
}
fun onSyncStart(syncStartReq: tSyncStartReq) {
if (!(syncStartReq.targetId in syncWorkers)) {
syncWorkers += (syncStartReq.targetId, default(set[machine]));
}
syncWorkers[syncStartReq.targetId] += (syncStartReq.from);
}
start cold state AllTargetsAvailable {
on eNewRoutingInfo do checkForUnavailableTargets;
on eSyncDoneResp do onSyncDone;
on eSyncStartReq do onSyncStart;
on eStopMonitorTargetStates goto Done;
}
hot state SomeTargetsUnavailable {
entry {
var replicaChain: tReplicaChain;
print format("unavailable targets: {0}, sync workers: {1}", unavailableTargets, syncWorkers);
// foreach (replicaChain in values(knownReplicaChains)) {
// print format("known replica chain: {0}", replicaChain);
// }
}
on eNewRoutingInfo do checkForUnavailableTargets;
on eSyncDoneResp do onSyncDone;
on eSyncStartReq do onSyncStart;
on eStopMonitorTargetStates goto Done;
}
cold state Done {
ignore eNewRoutingInfo, eSyncStartReq, eSyncDoneResp;
}
}

View File

@@ -0,0 +1,30 @@
fun Min(x: int, y: int): int{
if (x < y)
return x;
else
return y;
}
fun Max(x: int, y: int): int{
if (x > y)
return x;
else
return y;
}
fun BitwiseAnd(x: int, y: int): int {
var n: int;
var r: int;
n = 1;
while (x > 0 && y > 0) {
if (x % 2 > 0 && y % 2 > 0) {
r = r + n;
}
x = x / 2;
y = y / 2;
n = n * 2;
}
return r;
}

View File

@@ -0,0 +1,75 @@
type tHeartbeatConns = (mgmtClient: MgmtClient, mgmtService: MgmtService);
event eSendHeartbeat: tHeartbeatConns;
event eNewRoutingInfo: tRoutingInfo;
machine MgmtClient {
var nodeId: tNodeId;
var clientHost: machine;
var mgmtService: MgmtService;
var sendHeartbeats: bool;
var timer: Timer;
var nextRequestId: tRequestId;
var routingInfo: tRoutingInfo;
fun newMessageTag(): tMessageTag {
nextRequestId = nextRequestId + 1;
return (nodeId = nodeId, requestId = nextRequestId);
}
start state Init {
entry (args: (nodeId: tNodeId, clientHost: machine, mgmtService: MgmtService, sendHeartbeats: bool)) {
print format("{0} init: {1}", this, args);
nodeId = args.nodeId;
clientHost = args.clientHost;
mgmtService = args.mgmtService;
sendHeartbeats = args.sendHeartbeats;
timer = CreateTimer(this);
goto SendHeartbeats;
}
}
state SendHeartbeats {
entry {
if (sendHeartbeats) {
print format("{0} of {1} sends heartbeat to {2}", this, clientHost, mgmtService);
send clientHost, eSendHeartbeat, (mgmtClient = this, mgmtService = mgmtService);
}
send mgmtService, eGetRoutingInfoReq, (from = this, tag = newMessageTag(), routingVer = routingInfo.routingVer);
StartTimer(timer);
}
on eTimeOut goto SendHeartbeats;
on eShutDown goto Offline with (from: machine) {
print format("{0} of node {1} is going to shutdown", this, nodeId);
CancelTimer(timer);
}
on eGetRoutingInfoResp do (getRoutingInfoResp: tGetRoutingInfoResp) {
var latestRoutingInfo: tRoutingInfo;
latestRoutingInfo = getRoutingInfoResp.routingInfo;
if (getRoutingInfoResp.status == ErrorCode_SUCCESS &&
routingInfo.routingVer < latestRoutingInfo.routingVer)
{
print format("{0}: routing info version {1} is greater than: {2}", this, latestRoutingInfo.routingVer, routingInfo.routingVer);
routingInfo = latestRoutingInfo;
send clientHost, eNewRoutingInfo, routingInfo;
}
}
}
state Offline {
ignore eTimeOut, eShutDown, eGetRoutingInfoResp;
entry {
print format("{0} #{1} is offline, client host: {2}", this, nodeId, clientHost);
routingInfo = default(tRoutingInfo);
}
on eRestart goto SendHeartbeats with (from: machine) {
print format("{0} #{1} is restarted by {2}", this, nodeId, from);
}
}
}

View File

@@ -0,0 +1,623 @@
enum tPublicTargetState {
PublicTargetState_INVALID = 0, // invalid state
PublicTargetState_SERVING = 1, // online and serving client requests
PublicTargetState_LASTSRV = 2, // offline but it was the last serving target
PublicTargetState_SYNCING = 4, // online and syncing updates
PublicTargetState_WAITING = 8, // online and waiting to join the chain
PublicTargetState_OFFLINE = 16 // crashed or stopped
}
fun IsActiveTargetState(targetState: tPublicTargetState): bool {
return targetState == PublicTargetState_SERVING || targetState == PublicTargetState_SYNCING;
}
fun AllPublicTargetStates(): map[tPublicTargetState, string] {
var states: map[tPublicTargetState, string];
states += (PublicTargetState_SERVING, "SERVING");
states += (PublicTargetState_LASTSRV, "LASTSRV");
states += (PublicTargetState_SYNCING, "SYNCING");
states += (PublicTargetState_WAITING, "WAITING");
states += (PublicTargetState_OFFLINE, "OFFLINE");
return states;
}
fun PublicTargetStateToString(x: int): string {
var states: map[tPublicTargetState, string];
var s: tPublicTargetState;
var y: int;
var str: string;
states = AllPublicTargetStates();
y = x;
while (y > 0) {
if (str != "") {
str = str + "+";
};
foreach (s in keys(states)) {
if (BitwiseAnd(y, (s to int)) == (s to int)) {
str = str + states[s];
y = y - (s to int);
break;
}
}
}
return str + format("({0})", x);
}
fun PublicTargetStatesToString(targetStates: map[tTargetId, tPublicTargetState]): string {
var targetId: tTargetId;
var str: string;
foreach (targetId in keys(targetStates)) {
if (str != "") str = str + ", ";
str = str + format("<{0}->{1}>", targetId, PublicTargetStateToString(targetStates[targetId] to int));
}
return str;
}
type tLocalTargetMap = map[tTargetId, StorageTarget];
type tGlobalTargetMap = map[tNodeId, tLocalTargetMap];
type tReplicaChainMap = map[tChainId, tReplicaChain];
type tStorageClientMap = map[tNodeId, StorageClient];
type tStorageServiceMap = map[tNodeId, StorageService];
type tRoutingVer = int;
type tRoutingInfo = (routingVer: tRoutingVer, replicaChains: tReplicaChainMap, storageServices: tStorageServiceMap, offlineServices: set[tNodeId]);
type tGetRoutingInfoReq = (from: machine, tag: tMessageTag, routingVer: tRoutingVer);
type tGetRoutingInfoResp = (tag: tMessageTag, status: tErrorCode, routingInfo: tRoutingInfo);
event eGetRoutingInfoReq : tGetRoutingInfoReq;
event eGetRoutingInfoResp : tGetRoutingInfoResp;
type tUpdateTargetStateMsg = (from: machine, tag: tMessageTag, routingVer: tRoutingVer, nodeId: tNodeId, targetStates: tLocalTargetStateMap, localTargets: tLocalTargetMap, storageService: StorageService);
event eUpdateTargetStateMsg : tUpdateTargetStateMsg;
type tRegisterClientMsg = (from: machine, nodeId: tNodeId, storageClient: StorageClient);
event eRegisterClientMsg : tRegisterClientMsg;
event eStopFindNewFailures : int;
event eStartNextHeartbeatRound;
// DONE: remove failed storage targets from replication chains
// DONE: re-send pending write requests to successor
// DONE: let failed targets resync and return
// TODO: allow targets moved from one node to another
// TODO: leader election among multiple mgmt services
// TODO: create C++ interfaces from the spec
machine MgmtService {
var nodeId: tNodeId;
var nextRequestId: tRequestId;
var routingVer: tRoutingVer;
var numStorageServices: int;
// var mgmtClients: set[machine];
var fullReplicaChains: tReplicaChainMap;
// var knownStorageClients: tStorageClientMap;
var knownStorageServices: tStorageServiceMap;
var nodeTargetStates: map[tNodeId, tLocalTargetStateMap];
var storageTargets: map[tTargetId, StorageTarget]; // for debug only
var delayedRoutingReqs: map[(machine, tRoutingVer), tGetRoutingInfoReq];
// num of ping attempts made
var numAttempts: int;
var maxAttempts: int;
var stopFindNewFailures: int;
// set of offline storage services
var offlineStorageServices: set[tNodeId];
// nodes that have responded in the current round
var aliveStorageServices: set[tNodeId];
// timer to wait for responses from nodes
var timer: Timer;
fun newMessageTag(): tMessageTag {
nextRequestId = nextRequestId + 1;
return (nodeId = nodeId, requestId = nextRequestId);
}
// fun registerClient(registerClientMsg: tRegisterClientMsg) {
// var nodeId: tNodeId;
// var storageClient: StorageClient;
// nodeId = registerClientMsg.nodeId;
// storageClient = registerClientMsg.storageClient;
// assert !(nodeId in knownStorageClients && knownStorageClients[nodeId] != storageClient);
// knownStorageClients[nodeId] = storageClient;
// mgmtClients += (registerClientMsg.from);
// print format("added client {0}", nodeId);
// }
fun updateLocalTargetState(nodeId: tNodeId, localTargetStates: tLocalTargetStateMap, localTargets: tLocalTargetMap) {
var targetId: tTargetId;
if (!(nodeId in nodeTargetStates)) {
nodeTargetStates += (nodeId, default(tLocalTargetStateMap));
}
foreach (targetId in keys(nodeTargetStates[nodeId])) {
if (!(targetId in localTargetStates)) {
nodeTargetStates[nodeId] -= (targetId);
}
}
foreach (targetId in keys(localTargetStates)) {
nodeTargetStates[nodeId][targetId] = localTargetStates[targetId];
storageTargets[targetId] = localTargets[targetId];
}
}
fun setLocalTargetState(nodeId: tNodeId, targetState: tLocalTargetState) {
var targetId: tTargetId;
if (!(nodeId in nodeTargetStates)) {
nodeTargetStates += (nodeId, default(tLocalTargetStateMap));
}
foreach (targetId in keys(nodeTargetStates[nodeId])) {
nodeTargetStates[nodeId][targetId] = targetState;
}
}
fun processUpdateTargetStateMsg(updateTargetStateMsg: tUpdateTargetStateMsg) {
// mgmtClients += (updateTargetStateMsg.from);
updateLocalTargetState(updateTargetStateMsg.nodeId, updateTargetStateMsg.targetStates, updateTargetStateMsg.localTargets);
}
fun appendTargetToChain(replicaChain: tReplicaChain, targetId: tTargetId, nodeId: tNodeId, targetState: tPublicTargetState): tReplicaChain {
if (targetId in replicaChain.targets)
return replicaChain;
replicaChain.targets += (sizeof(replicaChain.targets), targetId);
replicaChain.states += (targetId, targetState);
replicaChain.nodes += (targetId, nodeId);
replicaChain.services += (targetId, knownStorageServices[nodeId]);
return replicaChain;
}
fun extendChain(chain: tReplicaChain, other: tReplicaChain): tReplicaChain {
var targetId: tTargetId;
foreach (targetId in other.targets) {
if (targetId in chain.targets) continue;
chain = appendTargetToChain(chain, targetId, other.nodes[targetId], other.states[targetId]);
}
return chain;
}
fun updatePublicTargetState(
replicaChain: tReplicaChain,
chainId: tChainId,
targetId: tTargetId,
expectedLocalState: tLocalTargetState,
fromPublicState: int,
toPublicState: tPublicTargetState): tReplicaChain
{
var nodeId: tNodeId;
var currentLocalState: tLocalTargetState;
var currentPublicState: tPublicTargetState;
nodeId = fullReplicaChains[chainId].nodes[targetId];
currentLocalState = nodeTargetStates[nodeId][targetId];
currentPublicState = fullReplicaChains[chainId].states[targetId];
if (currentLocalState == expectedLocalState) {
if (BitwiseAnd(currentPublicState to int, fromPublicState to int) > 0) {
replicaChain = appendTargetToChain(replicaChain, targetId, nodeId, toPublicState);
if (fullReplicaChains[chainId].states[targetId] != toPublicState) {
replicaChain.vChainId.chainVer = replicaChain.vChainId.chainVer + 1;
routingVer = routingVer + 1;
print format("chain {0}, {1} #{2}: public state updated {3} ==> {4}, local state: {5}, routing version: {6}",
replicaChain.vChainId, storageTargets[targetId], targetId,
PublicTargetStateToString(fromPublicState to int),
PublicTargetStateToString(toPublicState to int),
LocalTargetStateToString(currentLocalState),
routingVer);
} else {
print format("chain {0}, {1} #{2}: public state untouched, from state {3}, to state {4}, local state: {5}",
chainId, storageTargets[targetId], targetId,
PublicTargetStateToString(fromPublicState to int),
PublicTargetStateToString(toPublicState to int),
LocalTargetStateToString(expectedLocalState));
}
}
}
return replicaChain;
}
/* transitions of public target states
<up-to-date>
serving syncing waiting lastsrv offline
serving y
syncing y
waiting y
lastsrv y
offline y
<online>
serving syncing waiting lastsrv offline
serving y
syncing c c
waiting c c
lastsrv y
offline y
<offline>
serving syncing waiting lastsrv offline
serving c c
syncing y
waiting y
lastsrv y
offline y
*/
fun updateOneReplicaChain(chainId: tChainId): tReplicaChain {
var states: map[tPublicTargetState, string];
var targetsGroupbyState: map[tPublicTargetState, tReplicaChain];
var updatedReplicaChain: tReplicaChain;
var targetId: tTargetId;
var targetState: tPublicTargetState;
states = AllPublicTargetStates();
foreach (targetState in keys(states)) {
targetsGroupbyState[targetState] = default(tReplicaChain);
targetsGroupbyState[targetState].vChainId.chainId = chainId;
}
// state transitions to serving
foreach (targetId in fullReplicaChains[chainId].targets) {
targetsGroupbyState[PublicTargetState_SERVING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_SERVING],
chainId, targetId,
LocalTargetState_UPTODATE,
(PublicTargetState_SERVING to int) + (PublicTargetState_SYNCING to int) + (PublicTargetState_LASTSRV to int),
PublicTargetState_SERVING);
}
foreach (targetId in fullReplicaChains[chainId].targets) {
targetsGroupbyState[PublicTargetState_SERVING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_SERVING],
chainId, targetId,
LocalTargetState_ONLINE,
(PublicTargetState_SERVING to int) + (PublicTargetState_LASTSRV to int),
PublicTargetState_SERVING);
}
// state transitions to lastsrv
foreach (targetId in fullReplicaChains[chainId].targets) {
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) == 0 &&
sizeof(targetsGroupbyState[PublicTargetState_LASTSRV].targets) == 0) {
targetsGroupbyState[PublicTargetState_LASTSRV] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_LASTSRV],
chainId, targetId,
LocalTargetState_OFFLINE,
PublicTargetState_SERVING to int,
PublicTargetState_LASTSRV);
}
targetsGroupbyState[PublicTargetState_LASTSRV] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_LASTSRV],
chainId, targetId,
LocalTargetState_OFFLINE,
PublicTargetState_LASTSRV to int,
PublicTargetState_LASTSRV);
}
// state transitions to syncing
foreach (targetId in fullReplicaChains[chainId].targets) {
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) > 0) {
targetsGroupbyState[PublicTargetState_SYNCING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_SYNCING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_SYNCING to int,
PublicTargetState_SYNCING);
}
}
foreach (targetId in fullReplicaChains[chainId].targets) {
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) > 0 &&
sizeof(targetsGroupbyState[PublicTargetState_SYNCING].targets) == 0) {
targetsGroupbyState[PublicTargetState_SYNCING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_SYNCING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_WAITING to int,
PublicTargetState_SYNCING);
}
}
// state transitions to waiting
foreach (targetId in fullReplicaChains[chainId].targets) {
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) == 0) {
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_WAITING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_SYNCING to int,
PublicTargetState_WAITING);
}
if (!(targetId in targetsGroupbyState[PublicTargetState_SYNCING].targets)) {
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_WAITING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_WAITING to int,
PublicTargetState_WAITING);
}
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_WAITING],
chainId, targetId,
LocalTargetState_UPTODATE,
(PublicTargetState_OFFLINE to int) + (PublicTargetState_WAITING to int),
PublicTargetState_WAITING);
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_WAITING],
chainId, targetId,
LocalTargetState_ONLINE,
PublicTargetState_OFFLINE to int,
PublicTargetState_WAITING);
}
// state transitions to offline
foreach (targetId in fullReplicaChains[chainId].targets) {
if (!(targetId in targetsGroupbyState[PublicTargetState_LASTSRV].targets)) {
targetsGroupbyState[PublicTargetState_OFFLINE] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_OFFLINE],
chainId, targetId,
LocalTargetState_OFFLINE,
PublicTargetState_SERVING to int,
PublicTargetState_OFFLINE);
}
targetsGroupbyState[PublicTargetState_OFFLINE] = updatePublicTargetState(
targetsGroupbyState[PublicTargetState_OFFLINE],
chainId, targetId,
LocalTargetState_OFFLINE,
(PublicTargetState_SYNCING to int) + (PublicTargetState_WAITING to int) + (PublicTargetState_OFFLINE to int),
PublicTargetState_OFFLINE);
}
// print format("chain {0}, targets group by state: {1}", chainId, targetsGroupbyState);
updatedReplicaChain.vChainId = fullReplicaChains[chainId].vChainId;
foreach (targetState in keys(states)) {
updatedReplicaChain = extendChain(updatedReplicaChain, targetsGroupbyState[targetState]);
updatedReplicaChain.vChainId.chainVer = updatedReplicaChain.vChainId.chainVer + targetsGroupbyState[targetState].vChainId.chainVer;
}
return updatedReplicaChain;
}
fun updateRoutingInfo() {
var updatedReplicaChain: tReplicaChain;
var localTargetStates: map[tTargetId, tLocalTargetState];
var chainId: tChainId;
var targetId: tTargetId;
var prevRoutingVer: tRoutingVer;
prevRoutingVer = routingVer;
foreach (chainId in keys(fullReplicaChains)) {
localTargetStates = default(map[tTargetId, tLocalTargetState]);
foreach (targetId in fullReplicaChains[chainId].targets) {
nodeId = fullReplicaChains[chainId].nodes[targetId];
localTargetStates += (targetId, nodeTargetStates[nodeId][targetId]);
}
print format("start to update chain {0}, public states: {1}, local states: {2}",
fullReplicaChains[chainId].vChainId,
PublicTargetStatesToString(fullReplicaChains[chainId].states),
LocalTargetStatesToString(localTargetStates));
updatedReplicaChain = updateOneReplicaChain(chainId);
if (updatedReplicaChain.vChainId != fullReplicaChains[chainId].vChainId) {
print format("replication chain updated: {0}, updated states: {1}, services: {2}",
updatedReplicaChain.vChainId,
PublicTargetStatesToString(updatedReplicaChain.states),
updatedReplicaChain.services);
}
assert PublicTargetState_SERVING in values(updatedReplicaChain.states) ||
PublicTargetState_LASTSRV in values(updatedReplicaChain.states),
format("no serving target: {0}", ReplicaChainToString(updatedReplicaChain));
assert sizeof(updatedReplicaChain.targets) == sizeof(fullReplicaChains[chainId].targets),
format("updated chain {0} has different number of targets {1} than the old chain {2}",
chainId, updatedReplicaChain.targets, fullReplicaChains[chainId].targets);
fullReplicaChains[chainId] = updatedReplicaChain;
}
if (routingVer != prevRoutingVer) {
print format("routing info updated to version {0}, process delayed routing queries: {1}", routingVer, delayedRoutingReqs);
processDelayedRoutingReqs();
}
}
fun processDelayedRoutingReqs() {
var getRoutingInfo: tGetRoutingInfoReq;
foreach (getRoutingInfo in values(delayedRoutingReqs)) {
delayedRoutingReqs -= (getRoutingInfo.from, getRoutingInfo.routingVer);
replyWithRoutingInfo(getRoutingInfo);
}
}
fun replyWithRoutingInfo(getRoutingInfo: tGetRoutingInfoReq) {
var routingInfo: tRoutingInfo;
if (getRoutingInfo.routingVer == routingVer) {
if (!((getRoutingInfo.from, getRoutingInfo.routingVer) in delayedRoutingReqs))
delayedRoutingReqs += ((getRoutingInfo.from, getRoutingInfo.routingVer), getRoutingInfo);
return;
}
routingInfo = (
routingVer = routingVer,
replicaChains = fullReplicaChains,
storageServices = knownStorageServices,
offlineServices = offlineStorageServices);
send getRoutingInfo.from, eGetRoutingInfoResp, (tag = getRoutingInfo.tag, status = ErrorCode_SUCCESS, routingInfo = routingInfo);
}
fun computeOfflineStorageServices() : set[tNodeId] {
var nodeId: tNodeId;
var servicesOffline: set[tNodeId];
if (stopFindNewFailures == 2) {
return servicesOffline;
} else if (stopFindNewFailures == 1 && sizeof(knownStorageServices) == sizeof(aliveStorageServices)) {
// wait until all storage services are alive and then stop finding new failures
stopFindNewFailures = 2;
return servicesOffline;
}
foreach (nodeId in keys(knownStorageServices)) {
if (!(nodeId in aliveStorageServices)) {
servicesOffline += (nodeId);
}
}
return servicesOffline;
}
start state Init {
entry (args: (nodeId: tNodeId, maxAttempts: int, numStorageServices: int, replicaChains: tReplicaChainMap)) {
nodeId = args.nodeId;
numStorageServices = args.numStorageServices;
fullReplicaChains = args.replicaChains;
routingVer = 10001;
maxAttempts = args.maxAttempts;
timer = CreateTimer(this);
goto Bootstrap;
}
}
state Bootstrap {
defer eGetRoutingInfoReq, eRegisterClientMsg;
on eUpdateTargetStateMsg do (updateTargetStateMsg: tUpdateTargetStateMsg) {
var nodeId: tNodeId;
processUpdateTargetStateMsg(updateTargetStateMsg);
knownStorageServices[updateTargetStateMsg.nodeId] = updateTargetStateMsg.storageService;
if (sizeof(knownStorageServices) == numStorageServices) {
foreach(nodeId in keys(knownStorageServices)) {
aliveStorageServices += (nodeId);
}
updateRoutingInfo();
print format("mgmt service started");
goto WaitForHeartbeats;
}
}
}
state WaitForHeartbeats {
entry {
// start wait timer to wait for responses
StartTimer(timer);
}
on eGetRoutingInfoReq do replyWithRoutingInfo;
// on eRegisterClientMsg do registerClient;
on eStopFindNewFailures do (value: int) {
stopFindNewFailures = value;
}
on eUpdateTargetStateMsg do (updateTargetStateMsg: tUpdateTargetStateMsg) {
if (updateTargetStateMsg.routingVer < routingVer) {
print format("#{0}: ignore stale heartbeat (routingVer < {1}): {2} ", numAttempts, routingVer, updateTargetStateMsg);
return;
}
processUpdateTargetStateMsg(updateTargetStateMsg);
aliveStorageServices += (updateTargetStateMsg.nodeId);
print format("#{0}: {1} added to aliveStorageServices {2}", numAttempts, updateTargetStateMsg.nodeId, aliveStorageServices);
}
on eTimeOut do {
var nodeId: tNodeId;
// one more attempt finished
numAttempts = numAttempts + 1;
print format("#{0}: aliveStorageServices: {1}", numAttempts, aliveStorageServices);
if (numAttempts < maxAttempts) {
// send this, eStartNextHeartbeatRound;
StartTimer(timer);
return;
}
// set storage targets to offline state
offlineStorageServices = computeOfflineStorageServices();
foreach (nodeId in offlineStorageServices) {
print format("detected node {0} {1} is down, set its targets offline: {2}",
nodeId, knownStorageServices[nodeId], keys(nodeTargetStates[nodeId]));
setLocalTargetState(nodeId, LocalTargetState_OFFLINE);
}
updateRoutingInfo();
// lets reset and restart the failure detection
aliveStorageServices = default(set[tNodeId]);
numAttempts = 0;
StartTimer(timer);
// send this, eStartNextHeartbeatRound;
}
// on eStartNextHeartbeatRound goto WaitForHeartbeats;
on eShutDown goto Offline with (from: machine) {
print format("{0} is going to shutdown", this);
send from, eStopped, this;
}
}
state Offline {
// detection has finish, these are all delayed responses and must be ignored
ignore eGetRoutingInfoReq, eUpdateTargetStateMsg, eRegisterClientMsg, eTimeOut, eStartNextHeartbeatRound;
entry {
var client: machine;
var service: StorageService;
print format("stop failure detection");
CancelTimer(timer);
}
}
}

View File

@@ -0,0 +1,315 @@
/* Storage Client */
type tWriteArgs = (from: machine, chunkId: tChunkId, offset: int, length: int, dataBytes: tBytes);
type tReadArgs = (from: machine, chunkId: tChunkId, offset: int, length: int);
type tWriteRes = (status: tErrorCode, chunkId: tChunkId, commitVer: tChunkVer);
type tReadRes = (status: tErrorCode, chunkId: tChunkId, chunkMetadata: tChunkMetadata, dataBytes: tBytes);
event eSubmitWrite : tWriteArgs;
event eSubmitRead : tReadArgs;
event eWriteComplete : tWriteRes;
event eReadComplete : tReadRes;
event eWaitConnected : machine;
event eClientConnected;
machine StorageClient {
var clientId: tNodeId;
var mgmtService: MgmtService;
var mgmtClient: MgmtClient;
// var timer: Timer;
var routingVer: tRoutingVer;
var replicaChains: tReplicaChainMap;
var nextRequestId: tRequestId;
var clientUsers: set[machine];
var submittedWrites: map[tMessageTag, tWriteArgs];
var submittedReads: map[tMessageTag, tReadArgs];
var inflightWriteReqs: map[tMessageTag, tWriteReq];
var inflightReadReqs: map[tMessageTag, tReadReq];
fun newMessageTag(): tMessageTag {
nextRequestId = nextRequestId + 1;
return (nodeId = clientId, requestId = nextRequestId);
}
fun calcGlobalKeyFromChunkId(chunkId: tChainId): tGlobalKey {
var chainIds: seq[tChainId];
var targetChain: tChainId;
var replicaChain: tReplicaChain;
chainIds = keys(replicaChains);
targetChain = chainIds[chunkId % sizeof(chainIds)];
replicaChain = replicaChains[targetChain];
return (vChainId = replicaChain.vChainId, chunkId = chunkId);
}
fun processRoutingInfo(routingInfo: tRoutingInfo) {
var newRoutingVer: tRoutingVer;
var newReplicaChains: tReplicaChainMap;
var replicaChain: tReplicaChain;
var targetId: tTargetId;
var chainId: tChainId;
var nodeId: tNodeId;
var services: seq[StorageService];
newRoutingVer = routingInfo.routingVer;
newReplicaChains = routingInfo.replicaChains;
if (routingVer > newRoutingVer) {
print format("{0}: error: routingVer {1} > newRoutingVer {2}", this, routingVer, newRoutingVer);
return;
} else if (routingVer == newRoutingVer) {
print format("{0}: ignore: routingVer {1} == newRoutingVer {2}", this, routingVer, newRoutingVer);
return;
}
print format("{0}: updating replica chains from version {1} to {2}", this, routingVer, newRoutingVer);
routingVer = newRoutingVer;
foreach (chainId in keys(replicaChains)) {
if (!(chainId in newReplicaChains))
replicaChains -= (chainId);
}
foreach (chainId in keys(newReplicaChains)) {
replicaChains[chainId] = newReplicaChains[chainId];
replicaChain = replicaChains[chainId];
print format("{0}: new replica chain {1}, targets: {2}, services: {3}",
this, newReplicaChains[chainId].vChainId, newReplicaChains[chainId].targets, newReplicaChains[chainId].services);
}
}
// fun onSendHeartbeatEvent(heartbeatConns: tHeartbeatConns) {
// send heartbeatConns.mgmtService, eRegisterClientMsg, (from = heartbeatConns.mgmtClient, nodeId = clientId, storageClient = this);
// }
fun chooseServingTarget(replicaChain: tReplicaChain): tTargetId {
var targetId: tTargetId;
var servingTargetIds: set[tTargetId];
targetId = replicaChain.targets[0];
if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
return targetId;
}
return 0;
}
fun sendWriteReq(writeReq: tWriteReq) {
var replicaChain: tReplicaChain;
var targetId: tTargetId;
var targetService: StorageService;
// get the latest chain and update versioned chain id
replicaChain = replicaChains[writeReq.key.vChainId.chainId];
writeReq.key.vChainId = replicaChain.vChainId;
targetId = chooseServingTarget(replicaChain);
if (targetId > 0) {
print format("{0}: send write request #{1}: {2}", this, writeReq.retries, writeReq);
targetService = replicaChain.services[targetId];
send targetService, eWriteReq, writeReq;
}
}
fun reissueWriteReq(reqTag: tMessageTag) {
inflightWriteReqs[reqTag].retries = inflightWriteReqs[reqTag].retries + 1;
sendWriteReq(inflightWriteReqs[reqTag]);
}
fun sendReadReq(readReq: tReadReq) {
var replicaChain: tReplicaChain;
var targetId: tTargetId;
var targetService: StorageService;
// get the latest chain and update versioned chain id
replicaChain = replicaChains[readReq.key.vChainId.chainId];
readReq.key.vChainId = replicaChain.vChainId;
targetId = chooseServingTarget(replicaChain);
if (targetId > 0) {
print format("{0}: send read request #{1}: {2}", this, readReq.retries, readReq);
targetService = replicaChain.services[targetId];
send targetService, eReadReq, readReq;
}
}
fun reissueReadReq(reqTag: tMessageTag) {
sendReadReq(inflightReadReqs[reqTag]);
inflightReadReqs[reqTag].retries = inflightReadReqs[reqTag].retries + 1;
}
fun processInflightWriteReqs() {
var oldChainId: tVersionedChainId;
var newChainId: tVersionedChainId;
var writeReq: tWriteReq;
foreach (writeReq in values(inflightWriteReqs)) {
oldChainId = writeReq.key.vChainId;
newChainId = replicaChains[oldChainId.chainId].vChainId;
if (oldChainId != newChainId) {
print format("{0}: chain version updated: {1} --> {2}, reissuing request {3}", this, oldChainId, newChainId, writeReq);
reissueWriteReq(writeReq.tag);
}
}
}
fun processInflightReadReqs() {
var oldChainId: tVersionedChainId;
var newChainId: tVersionedChainId;
var readReq: tReadReq;
foreach (readReq in values(inflightReadReqs)) {
oldChainId = readReq.key.vChainId;
newChainId = replicaChains[oldChainId.chainId].vChainId;
if (oldChainId != newChainId) {
print format("{0}: chain version updated: {1} --> {2}, reissuing request {3}", this, oldChainId, newChainId, readReq);
reissueReadReq(readReq.tag);
}
}
}
start state Init {
ignore eSendHeartbeat;
entry (args: (clientId: tNodeId, mgmtService: MgmtService)) {
clientId = args.clientId;
mgmtService = args.mgmtService;
mgmtClient = new MgmtClient((nodeId = clientId, clientHost = this, mgmtService = mgmtService, sendHeartbeats = false));
// timer = new Timer(this);
}
on eWaitConnected do (user: machine) {
clientUsers += (user);
}
// on eSendHeartbeat do onSendHeartbeatEvent;
on eNewRoutingInfo goto WaitForReqs with processRoutingInfo;
}
state WaitForReqs {
ignore eSendHeartbeat;
entry {
var user: machine;
foreach (user in clientUsers) {
send user, eClientConnected;
}
// StartTimer(timer);
}
on eWaitConnected do (user: machine) {
send user, eClientConnected;
}
on eShutDown goto Stopped with (from: machine) {
print format("{0} is going to shutdown", this);
send mgmtClient, eShutDown, this;
}
// on eSendHeartbeat do onSendHeartbeatEvent;
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
processRoutingInfo(routingInfo);
processInflightWriteReqs();
processInflightReadReqs();
}
// on eTimeOut do {
// processInflightWriteReqs();
// processInflightReadReqs();
// StartTimer(timer);
// }
on eSubmitWrite do (writeArgs: tWriteArgs) {
var writeReq: tWriteReq;
writeReq = (from = this,
retries = 1,
tag = newMessageTag(),
key = calcGlobalKeyFromChunkId(writeArgs.chunkId),
updateVer = 0,
commitChainVer = 0,
fullChunkReplace = false,
removeChunk = writeArgs.dataBytes == default(tBytes),
fromClient = true,
offset = writeArgs.offset, length = writeArgs.length,
dataBytes = writeArgs.dataBytes);
sendWriteReq(writeReq);
submittedWrites += (writeReq.tag, writeArgs);
inflightWriteReqs += (writeReq.tag, writeReq);
}
on eSubmitRead do (readArgs: tReadArgs) {
var readReq: tReadReq;
readReq = (from = this,
retries = 1,
tag = newMessageTag(),
key = calcGlobalKeyFromChunkId(readArgs.chunkId),
offset = readArgs.offset, length = readArgs.length);
sendReadReq(readReq);
submittedReads += (readReq.tag, readArgs);
inflightReadReqs += (readReq.tag, readReq);
}
on eWriteResp do (writeResp: tWriteResp) {
if (!(writeResp.tag in inflightWriteReqs)) {
print format("{0}: got response for completed write request: {1}", this, writeResp.key);
return;
}
if (writeResp.status == ErrorCode_CHAIN_VERION_MISMATCH) {
print format("{0}: retry write request: {1}", this, writeResp.key);
reissueWriteReq(writeResp.tag);
return;
}
print format("{0}: write response {1}", this, writeResp);
send submittedWrites[writeResp.tag].from, eWriteComplete, (status = writeResp.status, chunkId = writeResp.key.chunkId, commitVer = writeResp.commitVer);
submittedWrites -= (writeResp.tag);
inflightWriteReqs -= (writeResp.tag);
}
on eReadResp do (readResp: tReadResp) {
if (!(readResp.tag in inflightReadReqs)) {
return;
}
if ((readResp.status == ErrorCode_CHAIN_VERION_MISMATCH) || (readResp.status == ErrorCode_CHUNK_NOT_COMMIT)) {
print format("{0}: retry read request: {1}", this, readResp.key);
reissueReadReq(readResp.tag);
return;
}
print format("{0}: read response {1}", this, readResp);
send submittedReads[readResp.tag].from, eReadComplete, (status = readResp.status, chunkId = readResp.key.chunkId,
chunkMetadata = readResp.chunkMetadata, dataBytes = readResp.dataBytes);
submittedReads -= (readResp.tag);
inflightReadReqs -= (readResp.tag);
}
}
state Stopped {
ignore eReadResp, eWriteResp, eSendHeartbeat, eNewRoutingInfo, eTimeOut;
entry {
// CancelTimer(timer);
print format("{0} stopped", this);
}
}
}

View File

@@ -0,0 +1,9 @@
// the storage system module
module StorageSystem = {
TestClient,
MgmtClient, MgmtService,
StorageClient, StorageService,
ReadProcess, WriteProcess,
SyncWorker, StorageTarget, ChunkReplica,
SystemMonitor, Timer
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,804 @@
type tSystemConfig = (
chunkSize: int,
numChains: int,
numReplicas: int,
numStorageServices: int,
failStorageServices: int,
failDetectionMaxAttempts: int,
numClients: int,
numIters: int
);
type tStorageSystem = (
mgmt: MgmtService,
storages: tStorageServiceMap,
clients: tTestClientMap
);
fun BuildNodeTargetMap(chunkSize: int, numNodes: int, numTargetsPerNode: int)
: tGlobalTargetMap
{
var nodeId: tNodeId;
var targetId: tTargetId;
var storageTarget: StorageTarget;
var localTargets: tLocalTargetMap;
var nodeTargets: tGlobalTargetMap;
assert numTargetsPerNode < 100;
nodeId = 1;
while (nodeId <= numNodes) {
localTargets = default(tLocalTargetMap);
targetId = nodeId * 100 + 1;
while (sizeof(localTargets) < numTargetsPerNode) {
storageTarget = new StorageTarget((targetId = targetId, chunkSize = chunkSize));
localTargets += (targetId, storageTarget);
targetId = targetId + 1;
}
nodeTargets += (nodeId, localTargets);
nodeId = nodeId + 1;
}
return nodeTargets;
}
fun BuildRepliaChainMap(numChains: int, numReplicas: int, nodeTargets: tGlobalTargetMap)
: tReplicaChainMap
{
var vChainId: tVersionedChainId;
var targetId: tTargetId;
var nodeId: tNodeId;
var replicaChain: tReplicaChain;
var replicaChains: tReplicaChainMap;
var serviceNodeIds: seq[tNodeId];
var n: int;
n = 0;
serviceNodeIds = keys(nodeTargets);
vChainId = (chainId = 1, chainVer = 1);
while (vChainId.chainId <= numChains) {
replicaChain = default(tReplicaChain);
replicaChain.vChainId = vChainId;
while (sizeof(replicaChain.targets) < numReplicas) {
nodeId = serviceNodeIds[n % sizeof(serviceNodeIds)];
targetId = keys(nodeTargets[nodeId])[0];
nodeTargets[nodeId] -= (targetId);
print format("chain {0} added target {1} from node {2}", vChainId.chainId, targetId, nodeId);
replicaChain.targets += (sizeof(replicaChain.targets), targetId);
replicaChain.nodes += (targetId, nodeId);
replicaChain.states += (targetId, PublicTargetState_SERVING);
n = n + 1;
}
print format("create new replica chain: {0}", replicaChain);
replicaChains += (vChainId.chainId, replicaChain);
vChainId.chainId = vChainId.chainId + 1;
}
return replicaChains;
}
fun CreateStorageServices(nodeTargets: tGlobalTargetMap, mgmtService: MgmtService)
: tStorageServiceMap
{
var nodeId: tNodeId;
var localTargets: tLocalTargetMap;
var service: StorageService;
var storageServices: tStorageServiceMap;
foreach (nodeId in keys(nodeTargets)) {
service = new StorageService((nodeId = nodeId, localTargets = nodeTargets[nodeId], mgmtService = mgmtService));
storageServices += (nodeId, service);
}
return storageServices;
}
fun CreateTestClients(numClients: int, numChains: int, numIters: int, failStorageServices: int, mgmtService: MgmtService, storageServices: tStorageServiceMap, systemMonitor: SystemMonitor)
: tTestClientMap
{
var clientId: tNodeId;
var client: TestClient;
var testClients: tTestClientMap;
clientId = 1;
while (clientId <= numClients) {
client = new TestClient((
clientId = clientId,
chunkIdBegin = 789001,
chunkIdEnd = 789000 + numChains * 2,
numIters = numIters,
failStorageServices = failStorageServices,
mgmtService = mgmtService,
storageServices = storageServices,
systemMonitor = systemMonitor));
testClients += (clientId, client);
clientId = clientId + 1;
}
return testClients;
}
fun SetUpStorageSystem(testDriver: machine, config: tSystemConfig) {
var numTargetsPerNode: int;
var nodeTargets: tGlobalTargetMap;
var replicaChains: tReplicaChainMap;
var storageServices: tStorageServiceMap;
var storageService: StorageService;
var mgmtService: MgmtService;
var testClients: tTestClientMap;
var storageSystem: tStorageSystem;
var systemMonitor: SystemMonitor;
print format("system config: {0}", config);
announce eSystemConfig, (config = config,);
assert config.failStorageServices <= config.numStorageServices;
assert config.numStorageServices >= config.numReplicas;
assert config.numChains * config.numReplicas % config.numStorageServices == 0;
assert config.chunkSize > config.numClients * config.numIters;
numTargetsPerNode = config.numChains * config.numReplicas / config.numStorageServices;
nodeTargets = BuildNodeTargetMap(config.chunkSize, config.numStorageServices, numTargetsPerNode);
print format("init nodeTargets {0}", nodeTargets);
replicaChains = BuildRepliaChainMap(config.numChains, config.numReplicas, nodeTargets);
print format("init replicaChains {0}", replicaChains);
mgmtService = new MgmtService((nodeId = 9001, maxAttempts = config.failDetectionMaxAttempts,
numStorageServices = config.numStorageServices, replicaChains = replicaChains));
storageServices = CreateStorageServices(nodeTargets, mgmtService);
systemMonitor = new SystemMonitor((nodeId = 9002, numClients = config.numClients, mgmtService = mgmtService, storageServices = storageServices));
testClients = CreateTestClients(config.numClients, config.numChains, config.numIters, config.failStorageServices, mgmtService, storageServices, systemMonitor);
storageSystem = (mgmt = mgmtService, storages = storageServices, clients = testClients);
announce eStorageSystem, (system = storageSystem,);
}
fun InitBytes(size: int, value: int): tBytes {
var i: int;
var bytes: tBytes;
i = 0;
while (i < size) {
bytes += (i, value);
i = i + 1;
}
return bytes;
}
/* Service Monitor */
event eRestart: machine;
event eStarted: machine;
event eStartUp: machine;
event eShutDown: machine;
event eStopped: machine;
machine SystemMonitor {
var nodeId: tNodeId;
var numClients: int;
var mgmtService: MgmtService;
var storageServices: tStorageServiceMap;
var failStorageServices: int;
var mgmtClient: MgmtClient;
var timer: Timer;
var stoppedClients: set[tNodeId];
var offlineTargets: set[tTargetId];
var offlineServices: set[tNodeId];
var restartedServices: set[tNodeId];
fun processRoutingInfo(routingInfo: tRoutingInfo) {
var replicaChain: tReplicaChain;
var targetId: tTargetId;
restartedServices = default(set[tNodeId]);
foreach (replicaChain in values(routingInfo.replicaChains)) {
print format("{0}: replication chain: {1}", this, ReplicaChainToString(replicaChain));
foreach (targetId in replicaChain.targets) {
if (replicaChain.states[targetId] == PublicTargetState_OFFLINE ||
replicaChain.states[targetId] == PublicTargetState_LASTSRV)
{
if (!(targetId in offlineTargets)) {
offlineTargets += (targetId);
offlineServices += (replicaChain.nodes[targetId]);
}
}
else if (replicaChain.states[targetId] == PublicTargetState_SERVING ||
replicaChain.states[targetId] == PublicTargetState_SYNCING ||
replicaChain.states[targetId] == PublicTargetState_WAITING)
{
if (targetId in offlineTargets) {
offlineTargets -= (targetId);
offlineServices -= (replicaChain.nodes[targetId]);
}
}
}
}
}
fun restartOfflineServices() {
var nodeId: tNodeId;
foreach (nodeId in offlineServices) {
send storageServices[nodeId], eRestart, this;
restartedServices += (nodeId);
}
}
start state Init {
entry (args: (nodeId: tNodeId, numClients: int, mgmtService: MgmtService, storageServices: tStorageServiceMap)) {
nodeId = args.nodeId;
numClients = args.numClients;
mgmtService = args.mgmtService;
storageServices = args.storageServices;
// failStorageServices = args.failStorageServices;
mgmtClient = new MgmtClient((nodeId = nodeId, clientHost = this, mgmtService = mgmtService, sendHeartbeats = false));
timer = CreateTimer(this);
goto WaitUntilTestDone;
}
}
state WaitUntilTestDone {
ignore eSendHeartbeat;
entry {
print format("wait until test done: offlineTargets {0}, offlineServices {1}, restartedServices {2}, stoppedClients {3}",
offlineTargets, offlineServices, restartedServices, stoppedClients);
StartTimer(timer);
}
// on eSendHeartbeat do (heartbeatConns: tHeartbeatConns) {
// send heartbeatConns.mgmtService, eRegisterClientMsg, (from = heartbeatConns.mgmtClient, nodeId = nodeId, storageClient = this);
// }
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
processRoutingInfo(routingInfo);
if (sizeof(offlineServices) > 0) {
StartTimer(timer);
}
}
on eTimeOut do {
restartOfflineServices();
if (sizeof(offlineServices) > 0) {
StartTimer(timer);
} else {
CancelTimer(timer);
}
}
on eTestClientDone do (clientId: tNodeId) {
stoppedClients += (clientId);
if (sizeof(stoppedClients) == numClients) {
print format("all test clients stopped");
send mgmtService, eStopFindNewFailures, 1;
goto WaitUntilSyncDone;
}
}
}
state WaitUntilSyncDone {
ignore eSendHeartbeat;
entry {
print format("wait until sync done: offlineTargets {0}, offlineServices {1}, restartedServices {2}, stoppedClients {3}",
offlineTargets, offlineServices, restartedServices, stoppedClients);
StartTimer(timer);
}
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
processRoutingInfo(routingInfo);
if (sizeof(offlineServices) > 0) {
StartTimer(timer);
} else {
goto ShutdownSystem;
}
}
on eTimeOut do {
restartOfflineServices();
if (sizeof(offlineServices) > 0) {
StartTimer(timer);
} else {
goto ShutdownSystem;
}
}
}
state ShutdownSystem {
ignore eSendHeartbeat, eNewRoutingInfo, eTimeOut;
entry {
var storageService: StorageService;
print format("{0}: all done, restartedServices: {1}", this, restartedServices);
announce eStopMonitorTargetStates;
CancelTimer(timer);
send mgmtClient, eShutDown, this;
send mgmtService, eShutDown, this;
receive {
case eStopped: (mgmt: machine) {
assert mgmt == mgmtService;
}
}
foreach (storageService in values(storageServices)) {
send storageService, eShutDown, this;
}
}
}
}
/* Test Client */
// DONE: write different part of the chunk for each write to detect any error
// DONE: stop a storage service more than once during test (stop it when it's syncing)
// TODO: [new test] stop mgmt client of an alive storage service to simulate network partition
// TODO: [new test] shut down storage service and then restart
// TODO: [new test] make storage service crash during syncing
type tTestClientMap = map[tNodeId, TestClient];
// type tTestStatus = (nodeId: tNodeId, done: bool);
event eTestClientDone : tNodeId;
// event eTestStatusReq : tTestStatus;
// event eTestStatusResp : tTestStatus;
machine TestClient {
var clientId: tNodeId;
var chunkIdBegin: tChainId;
var chunkIdEnd: tChainId;
var numIters: int;
var failStorageServices: int;
var storageClient: StorageClient;
var storageServices: tStorageServiceMap;
var systemMonitor: SystemMonitor;
var nextWritePos: int;
var currIter: int;
var currChunkId: tChunkId;
var lastChunkVer: map[tChunkId, tChunkVer];
fun CreateNewWrite(chunkId: tChunkId, offset: int, length: int, value: int): tWriteArgs {
var dataBytes: tBytes;
var writeArgs: tWriteArgs;
dataBytes = InitBytes(length, value);
print format("data bytes size {0}", sizeof(dataBytes));
writeArgs = (from = this, chunkId = chunkId, offset = offset, length = sizeof(dataBytes), dataBytes = dataBytes);
print format("{0}: created a new write: {1}", this, writeArgs);
return writeArgs;
}
fun CreateNewRemove(chunkId: tChunkId): tWriteArgs {
var writeArgs: tWriteArgs;
writeArgs = (from = this, chunkId = chunkId, offset = 0, length = 0, dataBytes = default(tBytes));
print format("{0}: created a new remove: {1}", this, writeArgs);
return writeArgs;
}
fun CreateNewRead(chunkId: tChunkId, offset: int, length: int): tReadArgs {
var readArgs: tReadArgs;
readArgs = (from = this, chunkId = chunkId, offset = offset, length = length);
print format("{0}: created a new read: {1}", this, readArgs);
return readArgs;
}
start state Init {
// defer eTestStatusReq;
entry (args: (clientId: tNodeId, chunkIdBegin: int, chunkIdEnd: int, numIters: int, failStorageServices: int, mgmtService: MgmtService, storageServices: tStorageServiceMap, systemMonitor: SystemMonitor)) {
assert args.chunkIdBegin < args.chunkIdEnd;
clientId = args.clientId + 8000;
chunkIdBegin = args.chunkIdBegin;
chunkIdEnd = args.chunkIdEnd;
numIters = args.numIters;
failStorageServices = args.failStorageServices;
storageServices = args.storageServices;
systemMonitor = args.systemMonitor;
nextWritePos = 0;
storageClient = new StorageClient((clientId = clientId, mgmtService = args.mgmtService));
send storageClient, eWaitConnected, this;
}
on eClientConnected goto SendingWriteReq;
}
state SendingWriteReq {
entry {
var offset: int;
var length: int;
var machineToFail: machine;
/* ---------------------------------------------------------------------
currChunkId
---------------------------------------------------------------------
client 8001 | client 8002 | client 8003 | ......
---------------------------------------------------------------------
<currIter> bytes | <currIter> bytes | <currIter> bytes | ......
---------------------------------------------------------------------
^ ^
| |
offset----|<---length-->|
*/
currChunkId = chunkIdBegin + nextWritePos / numIters;
currIter = nextWritePos % numIters + 1;
offset = (clientId - 8001) * numIters + currIter - 1;
length = numIters - currIter + 1;
nextWritePos = nextWritePos + 1;
if (!(currChunkId in lastChunkVer))
lastChunkVer += (currChunkId, 0);
send storageClient, eSubmitWrite, CreateNewWrite(currChunkId, offset, length, currIter);
if (failStorageServices > 0 && choose()) {
machineToFail = choose(values(storageServices));
send machineToFail, eShutDown, machineToFail;
failStorageServices = failStorageServices - 1;
}
}
on eWriteComplete do (writeRes: tWriteRes) {
assert writeRes.status == ErrorCode_SUCCESS, format("error: {0}", writeRes);
// assert lastChunkVer[writeRes.chunkId] < writeRes.commitVer,
// format("error: last chunk version {0} >= commit version {1}", lastChunkVer[writeRes.chunkId], writeRes.commitVer);
// lastChunkVer[writeRes.chunkId] = writeRes.commitVer;
if (nextWritePos >= numIters * (chunkIdEnd - chunkIdBegin)) {
goto Done;
} else if (nextWritePos % numIters == numIters / 2) {
goto SendingRemoveReq;
} else {
goto SendingWriteReq;
}
}
// on eTestStatusReq do (from: machine) {
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
// }
}
state SendingReadReq {
entry {
var offset: int;
var length: int;
offset = (clientId - 101) * numIters;
length = numIters;
send storageClient, eSubmitRead, CreateNewRead(currChunkId, offset, length);
}
on eReadComplete do (readRes: tReadRes) {
var i: int;
if (readRes.status == ErrorCode_CHUNK_NOT_FOUND) {
print format("{0} chunk {1} removed by other client, re-create the chunk", this, currChunkId);
goto SendingWriteReq;
return;
}
if (readRes.status == ErrorCode_TARGET_OFFLINE) {
goto SendingReadReq;
return;
}
assert readRes.status == ErrorCode_SUCCESS, format("readRes.status {0}", readRes.status);
assert readRes.chunkId == currChunkId, format("readRes.chunkId {0} != currChunkId {1}", readRes.chunkId, currChunkId);
// assert lastChunkVer[currChunkId] <= readRes.chunkMetadata.commitVer,
// format("lastChunkVer[currChunkId:{0}] {1} > readRes.chunkMetadata.commitVer {2}",
// currChunkId, lastChunkVer[currChunkId], readRes.chunkMetadata.commitVer);
// if (lastChunkVer[currChunkId] == readRes.chunkMetadata.commitVer) {
while (i < sizeof(readRes.dataBytes)) {
assert readRes.dataBytes[i] <= Min(currIter, i + 1),
format("readRes.dataBytes[i:{0}] {1} != {2}, nextWritePos {3}, currIter {4}",
i, readRes.dataBytes[i], Min(currIter, i + 1), nextWritePos, currIter);
i = i + 1;
}
// }
if (nextWritePos % numIters == 0) {
goto SendingRemoveReq;
} else {
goto SendingWriteReq;
}
}
// on eTestStatusReq do (from: machine) {
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
// }
}
state SendingRemoveReq {
entry {
send storageClient, eSubmitWrite, CreateNewRemove(currChunkId);
}
on eWriteComplete do (writeRes: tWriteRes) {
assert writeRes.status == ErrorCode_SUCCESS, format("error: {0}", writeRes);
// assert lastChunkVer[writeRes.chunkId] < writeRes.commitVer,
// format("error: last chunk version {0} >= commit version {1}", lastChunkVer[writeRes.chunkId], writeRes.commitVer);
// lastChunkVer -= (writeRes.chunkId);
// check if the chunk removed or re-created
send storageClient, eSubmitRead, CreateNewRead(currChunkId, 0, numIters);
receive {
case eReadComplete: (readRes: tReadRes) {
if (readRes.status == ErrorCode_CHUNK_NOT_FOUND) {
print format("Chunk {0} removed, result: {1}", currChunkId, readRes);
} else {
print format("Chunk {0} re-created, result: {1}", currChunkId, readRes);
}
}
}
if (nextWritePos < numIters * (chunkIdEnd - chunkIdBegin)) {
goto SendingWriteReq;
} else {
goto Done;
}
}
// on eTestStatusReq do (from: machine) {
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
// }
}
state Done {
entry {
print format("{0}: all done", this);
send systemMonitor, eTestClientDone, clientId;
send storageClient, eShutDown, this;
}
// on eTestStatusReq do (from: machine) {
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = true);
// }
}
}
// no failure
machine OneClientWriteNoFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 0,
failDetectionMaxAttempts = 11,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteNoFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 0,
failDetectionMaxAttempts = 11,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine ThreeClientsWriteNoFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 0,
failDetectionMaxAttempts = 11,
numClients = 3,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
// unreliable failure detector
machine OneClientWriteUnreliableDetector {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 1,
failDetectionMaxAttempts = 7,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteUnreliableDetector {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 1,
failDetectionMaxAttempts = 7,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
// with failures
machine OneClientWriteWithFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 1,
failDetectionMaxAttempts = 11,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteWithFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 1,
failDetectionMaxAttempts = 11,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine OneClientWriteWithFailures {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 3,
failDetectionMaxAttempts = 11,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteWithFailures {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 3,
numStorageServices = 3,
failStorageServices = 3,
failDetectionMaxAttempts = 11,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
// short chain: two replicas
machine OneClientWriteShortChainWithFailure {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 2,
numStorageServices = 2,
failStorageServices = 1,
failDetectionMaxAttempts = 11,
numClients = 1,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
machine TwoClientsWriteShortChainWithFailures {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 2,
numStorageServices = 2,
failStorageServices = 2,
failDetectionMaxAttempts = 11,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}
// long chain: four replicas
machine TwoClientsWriteLongChainWithFailures {
start state Init {
entry {
var config: tSystemConfig;
config = (
chunkSize = 16,
numChains = 1,
numReplicas = 4,
numStorageServices = 4,
failStorageServices = 2,
failDetectionMaxAttempts = 23,
numClients = 2,
numIters = 2);
SetUpStorageSystem(this, config);
}
}
}

View File

@@ -0,0 +1,57 @@
// no failure
test tcOneClientWriteNoFailure [main = OneClientWriteNoFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { OneClientWriteNoFailure };
test tcTwoClientsWriteNoFailure [main = TwoClientsWriteNoFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteNoFailure };
test tcThreeClientsWriteNoFailure [main = ThreeClientsWriteNoFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { ThreeClientsWriteNoFailure };
// unreliable failure detector
test tcOneClientWriteUnreliableDetector [main = OneClientWriteUnreliableDetector]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated in
union StorageSystem, { OneClientWriteUnreliableDetector };
test tcTwoClientsWriteUnreliableDetector [main = TwoClientsWriteUnreliableDetector]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated in
union StorageSystem, { TwoClientsWriteUnreliableDetector };
// with failures
test tcOneClientWriteWithFailure [main = OneClientWriteWithFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { OneClientWriteWithFailure };
test tcTwoClientsWriteWithFailure [main = TwoClientsWriteWithFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteWithFailure };
test tcOneClientWriteWithFailures [main = OneClientWriteWithFailures]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { OneClientWriteWithFailures };
test tcTwoClientsWriteWithFailures [main = TwoClientsWriteWithFailures]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteWithFailures };
// short chain
test tcOneClientWriteShortChainWithFailure [main = OneClientWriteShortChainWithFailure]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { OneClientWriteShortChainWithFailure };
test tcTwoClientsWriteShortChainWithFailures [main = TwoClientsWriteShortChainWithFailures]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteShortChainWithFailures };
// long chain
test tcTwoClientsWriteLongChainWithFailures [main = TwoClientsWriteLongChainWithFailures]:
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
union StorageSystem, { TwoClientsWriteLongChainWithFailures };

View File

@@ -0,0 +1,163 @@
spec RecvComplete observes eSendBytes, eRecvBytes, eRecvBytesResp {
var pendingRecv: int;
var pendingRecvBytes: int;
var sentBytes: int;
var recvBytes: int;
fun AddSendBytes(bytes: tBytes) {
sentBytes = sentBytes + sizeof(bytes);
}
fun AddRecvBytes(args: (from: machine, length: int)) {
pendingRecv = pendingRecv + 1;
pendingRecvBytes = pendingRecvBytes + args.length;
}
start hot state NoPendingRecv {
entry{
assert pendingRecvBytes == 0, format("{0} pending recv bytes not equal to zero", pendingRecvBytes);
assert recvBytes <= sentBytes, format("error: {0} recv bytes > {1} sent bytes", recvBytes, sentBytes);
if (recvBytes == sentBytes) {
goto AllDataRecved;
}
}
on eSendBytes do AddSendBytes;
on eRecvBytes goto PendingRecv with AddRecvBytes;
}
hot state PendingRecv {
on eSendBytes do AddSendBytes;
on eRecvBytes do AddRecvBytes;
on eRecvBytesResp do (bytes: tBytes) {
recvBytes = recvBytes + sizeof(bytes);
pendingRecv = pendingRecv - 1;
pendingRecvBytes = pendingRecvBytes - sizeof(bytes);
if (pendingRecv == 0)
goto NoPendingRecv;
}
}
cold state AllDataRecved {
entry {
print format("all data received");
}
on eSendBytes do AddSendBytes;
on eRecvBytes goto PendingRecv with AddRecvBytes;
}
}
spec NoDuplicatePostedBuffers observes ePostSend, ePostRecv, ePollSendCQReturn, ePollRecvCQReturn {
var postedRecvBufs: set[int];
var postedSendBufs: set[int];
start state Init {
on ePostRecv do (wr: tWorkRequest) {
assert wr.wrIdx >= 0, format("buffer index {0} < 0", wr.wrIdx);
assert !(wr.wrIdx in postedRecvBufs), format("buffer with index {0} already posted", wr.wrIdx);
postedRecvBufs += (wr.wrIdx);
}
on ePostSend do (wr: tWorkRequest) {
if (wr.wrIdx >= 0) {
assert !(wr.wrIdx in postedSendBufs), format("buffer with index {0} already posted", wr.wrIdx);
postedSendBufs += (wr.wrIdx);
} else {
assert wr.opcode == WROpCode_SEND_WITH_IMM && wr.imm > 0;
}
}
on ePollRecvCQReturn do (wc: tWorkComplete) {
assert wc.wrIdx >= 0, format("buffer index {0} < 0", wc.wrIdx);
assert wc.wrIdx in postedRecvBufs, format("unexpected buffer index {0} returned", wc.wrIdx);
postedRecvBufs -= (wc.wrIdx);
}
on ePollSendCQReturn do (wc: tWorkComplete) {
if (wc.wrIdx >= 0) {
assert wc.wrIdx in postedSendBufs, format("unexpected buffer index {0} returned", wc.wrIdx);
postedSendBufs -= (wc.wrIdx);
} else {
assert wc.opcode == WCOpCode_SEND && wc.imm > 0;
}
}
}
}
event eSystemConfig: (config: tSystemConfig);
spec AllIterationsProcessed observes eSendBytes, eRecvBytesResp, eSystemConfig {
var config: tSystemConfig;
var sendIters: tBytes;
var recvIters: tBytes;
fun CheckStopCondition(iters: tBytes): bool {
var i: int;
i = 0;
while (i < sizeof(iters)) {
if (iters[i] != config.numIters) {
return false;
}
i = i + 1;
}
return true;
}
fun UpdateIters(iters: tBytes, expected: int): tBytes {
var i: int;
i = 0;
while (i < sizeof(iters)) {
if (expected == iters[i] + 1) {
iters[i] = iters[i] + 1;
return iters;
}
i = i + 1;
}
print format("failed to update iters to {0}", expected);
return iters;
}
start state Init {
on eSystemConfig goto Communicating with (args: (config: tSystemConfig)) {
var i: int;
i = 0;
config = args.config;
while (i < config.numSenders) {
print format("i {0}/{1}", i, config.numSenders);
sendIters += (i, 0);
recvIters += (i, 0);
i = i + 1;
}
}
}
hot state Communicating {
on eSendBytes do (bytes: tBytes) {
sendIters = UpdateIters(sendIters, bytes[sizeof(bytes)-1]);
if (CheckStopCondition(sendIters) && CheckStopCondition(recvIters)) {
goto Done;
}
}
on eRecvBytesResp do (bytes: tBytes) {
recvIters = UpdateIters(recvIters, bytes[sizeof(bytes)-1]);
if (CheckStopCondition(sendIters) && CheckStopCondition(recvIters)) {
goto Done;
}
}
}
cold state Done {
ignore eSendBytes, eRecvBytesResp;
entry {
print format("all iterations processed");
}
}
}

View File

@@ -0,0 +1,2 @@
// the rdma network module
module RDMANetwork = { Network, QueuePair, RDMASocket };

View File

@@ -0,0 +1,556 @@
type tBytes = seq[int];
fun InitBytes(size: int, value: int): tBytes {
var i: int;
var bytes: tBytes;
i = 0;
while (i < size) {
bytes += (i, value);
i = i + 1;
}
return bytes;
}
fun Append(a: tBytes, b: tBytes): tBytes {
var i: int;
i = 0;
while (i < sizeof(b)) {
a += (sizeof(a), b[i]);
i = i + 1;
}
return a;
}
enum tWROpCode {
WROpCode_INVALID = 100,
WROpCode_SEND = 101,
WROpCode_SEND_WITH_IMM = 102
}
enum tWCOpCode {
WCOpCode_INVALID = 200,
WCOpCode_SEND = 201,
WCOpCode_RECV = 202,
WCOpCode_RECV_WITH_IMM = 203 // no such opcode in ibv APIs, this is added to indicate a recv completion with wc_flags = IBV_WC_WITH_IMM
}
fun ConvertWRToWCOpCode(opcode: tWROpCode): tWCOpCode {
if (opcode == WROpCode_SEND) return WCOpCode_RECV;
if (opcode == WROpCode_SEND_WITH_IMM) return WCOpCode_RECV_WITH_IMM;
return WCOpCode_INVALID;
}
enum tStatus {
Status_OK,
Status_ERR,
Status_AGAIN
}
type tXmitPacket = (opcode: tWROpCode, payload: tBytes, length: int, imm: int);
type tGetPacketResp = (from: QueuePair, status: tStatus, packet: tXmitPacket);
event ePutPacket: tXmitPacket;
event eGetPacket: Network;
event eGetPacketResp: tGetPacketResp;
event eWaitConnected: machine;
event eWaitConnectedResp;
event eNextExchangeIter;
machine Network {
var qps: seq[QueuePair];
var user: machine;
start state Init {
entry (args: (sock: RDMASocket, peer: RDMASocket)) {
send args.sock, eConnect, this;
receive {
case eConnectResp: (qp: QueuePair) { qps += (0, qp); }
}
send args.peer, eConnect, this;
receive {
case eConnectResp: (qp: QueuePair) { qps += (1, qp); }
}
print format("network connected {0}", qps);
if (user != null)
send user, eWaitConnectedResp;
goto ExchangePackets;
}
on eWaitConnected do (from: machine) {
user = from;
}
}
state ExchangePackets {
entry {
var i: int;
var n: int;
i = 0;
while (i < sizeof(qps)) {
// exchange a nondeterministic number of packets between 1..4
n = choose(3) + 1;
while (n > 0) {
send qps[i], eGetPacket, this;
n = n - 1;
}
i = i + 1;
}
}
on eWaitConnected do (from: machine) {
send from, eWaitConnectedResp;
}
on eGetPacketResp do (resp: tGetPacketResp) {
var i: int;
if (resp.status == Status_OK) {
i = 0;
while (i < 2) {
if (qps[i] != resp.from)
break;
i = i + 1;
}
send qps[i], ePutPacket, resp.packet;
}
send this, eNextExchangeIter;
}
on eNextExchangeIter goto ExchangePackets;
}
}
type tWorkComplete = (wrIdx: int, opcode: tWCOpCode, payload: tBytes, length: int, imm: int, status: tStatus);
type tWorkRequest = (wrIdx: int, opcode: tWROpCode, payload: tBytes, length: int, imm: int);
event ePostRecv: tWorkRequest;
event ePostSend: tWorkRequest;
event ePollRecvCQ: RDMASocket;
event ePollSendCQ: RDMASocket;
machine QueuePair {
var maxNumSendWRs: int;
var maxNumRecvWRs: int;
var postedRecvWRs: seq[tWorkRequest];
var postedSendWRs: seq[tWorkRequest];
var sendCompQueue: seq[tWorkComplete];
var recvCompQueue: seq[tWorkComplete];
var outboundQueue: seq[tXmitPacket];
var inboundQueue: seq[tXmitPacket];
// users waiting on events
var network: Network;
var sockPollSendCQ: RDMASocket;
var sockPollRecvCQ: RDMASocket;
var pendingGetPkt: int;
var pendingPollSend: int;
var pendingPollRecv: int;
fun PushPacketToNetwork(net: Network) {
var wr: tWorkRequest;
var wc: tWorkComplete;
var packet: tXmitPacket;
wr = postedSendWRs[0];
postedSendWRs -= (0);
print format("{0} -sizeof postedSendWRs {1}", this, sizeof(postedSendWRs));
wc = (wrIdx = wr.wrIdx,
opcode = WCOpCode_SEND,
payload = wr.payload,
length = wr.length,
imm = wr.imm,
status = Status_OK);
sendCompQueue += (sizeof(sendCompQueue), wc);
print format("{0} +sizeof sendCompQueue {1}", this, sizeof(sendCompQueue));
if (pendingPollSend > 0) {
NotifySendCQ(sockPollSendCQ);
pendingPollSend = pendingPollSend - 1;
if (pendingPollSend == 0)
sockPollSendCQ = default(RDMASocket);
}
packet = (opcode = wr.opcode,
payload = wr.payload,
length = wr.length,
imm = wr.imm);
send net, eGetPacketResp, (from = this, status = Status_OK, packet = packet);
}
fun NotifyRecvCQ(sock: RDMASocket) {
var wc: tWorkComplete;
wc = recvCompQueue[0];
recvCompQueue -= (0);
print format("{0} -sizeof recvCompQueue {1}", this, sizeof(recvCompQueue));
send sock, ePollRecvCQReturn, wc;
}
fun NotifySendCQ(sock: RDMASocket) {
var wc: tWorkComplete;
wc = sendCompQueue[0];
sendCompQueue -= (0);
print format("{0} -sizeof sendCompQueue {1}", this, sizeof(sendCompQueue));
send sock, ePollSendCQReturn, wc;
}
start state Init {
entry (args: (maxNumSendWRs: int, maxNumRecvWRs: int)) {
print format("qp init start {0}", this);
maxNumSendWRs = args.maxNumSendWRs;
maxNumRecvWRs = args.maxNumRecvWRs;
print format("qp init done {0}", this);
goto WaitForEvents;
}
}
state WaitForEvents {
on ePostRecv do (wr: tWorkRequest) {
assert sizeof(postedRecvWRs) < maxNumRecvWRs;
postedRecvWRs += (sizeof(postedRecvWRs), wr);
print format("{0} +sizeof postedRecvWRs {1}", this, sizeof(postedRecvWRs));
}
on ePostSend do (wr: tWorkRequest) {
assert sizeof(postedSendWRs) < maxNumSendWRs;
postedSendWRs += (sizeof(postedSendWRs), wr);
print format("{0} +sizeof postedSendWRs {1}", this, sizeof(postedSendWRs));
if (pendingGetPkt > 0) {
PushPacketToNetwork(network);
pendingGetPkt = pendingGetPkt - 1;
if (pendingGetPkt == 0)
network = default(Network);
}
}
on ePutPacket do (packet: tXmitPacket) {
var wr: tWorkRequest;
var wc: tWorkComplete;
var i: int;
assert sizeof(postedRecvWRs) > 0, "error: receive not ready";
wr = postedRecvWRs[0];
postedRecvWRs -= (0);
print format("{0} -sizeof postedRecvWRs {1}", this, sizeof(postedRecvWRs));
assert packet.length <= wr.length;
wc = (wrIdx = wr.wrIdx,
opcode = ConvertWRToWCOpCode(packet.opcode),
payload = wr.payload,
length = packet.length,
imm = packet.imm,
status = Status_OK);
while (i < packet.length) {
wc.payload[i] = packet.payload[i];
i = i + 1;
}
recvCompQueue += (sizeof(recvCompQueue), wc);
print format("{0} +sizeof recvCompQueue {1}", this, sizeof(recvCompQueue));
if (pendingPollRecv > 0) {
NotifyRecvCQ(sockPollRecvCQ);
pendingPollRecv = pendingPollRecv - 1;
if (pendingPollRecv == 0)
sockPollRecvCQ = default(RDMASocket);
}
}
on eGetPacket do (net: Network) {
if (sizeof(postedSendWRs) == 0) {
// send net, eGetPacketResp, (from = this, status = Status_AGAIN, packet = default(tXmitPacket));
pendingGetPkt = pendingGetPkt + 1;
if (pendingGetPkt > 1)
assert network == net;
else
network = net;
} else {
PushPacketToNetwork(net);
}
}
on ePollRecvCQ do (sock: RDMASocket) {
if (sizeof(recvCompQueue) == 0) {
pendingPollRecv = pendingPollRecv + 1;
if (pendingPollRecv > 1)
assert sockPollRecvCQ == sock;
else
sockPollRecvCQ = sock;
} else {
NotifyRecvCQ(sock);
}
}
on ePollSendCQ do (sock: RDMASocket) {
if (sizeof(sendCompQueue) == 0) {
pendingPollSend = pendingPollSend + 1;
if (pendingPollSend > 1)
assert sockPollSendCQ == sock;
else
sockPollSendCQ = sock;
} else {
NotifySendCQ(sock);
}
}
}
}
type tTaggedBuffer = (bufIdx: int, payload: tBytes, length: int);
event ePollRecvCQReturn: tWorkComplete;
event ePollSendCQReturn: tWorkComplete;
event eRecvBytes: (from: machine, length: int);
event eSendBytes: tBytes;
event eRecvBytesResp: tBytes;
event eConnect: Network;
event eConnectResp: QueuePair;
event eNextPollCQIter;
machine RDMASocket {
var qp: QueuePair;
var sockId: int;
var bufSize: int;
var bufNum: int; // assume the numbers of local and remote send/recv buffers are the same
var flowCtrlBufNum: int;
var unusedSendBufs: seq[tTaggedBuffer];
var remotePostedBufNum: int;
var effectiveSendBufNum: int;
var numRecvBeforeAck: int;
var numRecvSinceLastAck: int;
var bytesToSend: tBytes;
var bytesRecved: tBytes;
// pending recv
var userWaited: machine;
var recvedData: tBytes;
var recvLength: int;
var pendingPollSendCQ: int;
var pendingPollRecvCQ: int;
start state Init {
entry (args: (sockId: int, bufSize: int, bufNum: int, numRecvBeforeAck: int)) {
print format("socket init start {0}", this);
flowCtrlBufNum = (args.bufNum + args.numRecvBeforeAck - 1) / args.numRecvBeforeAck;
qp = new QueuePair((
maxNumSendWRs = args.bufNum + flowCtrlBufNum,
maxNumRecvWRs = args.bufNum + flowCtrlBufNum));
sockId = args.sockId;
bufSize = args.bufSize;
bufNum = args.bufNum;
remotePostedBufNum = args.bufNum;
effectiveSendBufNum = args.bufNum;
numRecvBeforeAck = args.numRecvBeforeAck;
numRecvSinceLastAck = 0;
print format("socket init done {0} with qp {1}", this, qp);
goto BeforeConnect;
}
}
state BeforeConnect {
entry {
var i: int;
print format("post {0} recv buffers in {1}", bufNum, this);
i = 0;
while (i < bufNum + flowCtrlBufNum) {
send qp, ePostRecv, (wrIdx = sockId * bufNum * 2 + i, opcode = WROpCode_INVALID, payload = InitBytes(bufSize, 0), length = bufSize, imm = 0);
i = i + 1;
}
print format("create {0} send buffers", effectiveSendBufNum);
i = 0;
while (i < effectiveSendBufNum) {
unusedSendBufs += (i, (bufIdx = sockId * bufNum * 2 + i, payload = InitBytes(bufSize, 0), length = bufSize));
i = i + 1;
}
goto WaitConnect;
}
}
state WaitConnect {
on eConnect do (net: Network) {
print format("{0} connected to {1}", this, net);
send net, eConnectResp, qp;
goto PollCQEvents;
}
}
state PollCQEvents {
entry {
var i: int;
var sendBuf: tTaggedBuffer;
print format("{0} sizeof(bytesToSend) {1} && sizeof(unusedSendBufs) {2} && remotePostedBufNum {3}",
this, sizeof(bytesToSend), sizeof(unusedSendBufs), remotePostedBufNum);
if (sizeof(bytesToSend) > 0 && remotePostedBufNum == 0) {
print format("{0}: remote side not posted any recv buffer", this);
}
if (sizeof(bytesToSend) > 0 && sizeof(unusedSendBufs) == 0) {
print format("{0}: local side does not have send buffer", this);
}
while (sizeof(bytesToSend) > 0 && sizeof(unusedSendBufs) > 0 && remotePostedBufNum > 0) {
remotePostedBufNum = remotePostedBufNum - 1;
sendBuf = unusedSendBufs[0];
unusedSendBufs -= (0);
i = 0;
while (i < bufSize && sizeof(bytesToSend) > 0) {
sendBuf.payload[i] = bytesToSend[0];
bytesToSend -= (0);
i = i + 1;
}
send qp, ePostSend, (wrIdx = sendBuf.bufIdx, opcode = WROpCode_SEND, payload = sendBuf.payload, length = i, imm = 0);
}
if (pendingPollRecvCQ < bufNum + flowCtrlBufNum) {
i = 0;
while (i < bufNum + flowCtrlBufNum - pendingPollRecvCQ) {
send qp, ePollRecvCQ, this;
pendingPollRecvCQ = pendingPollRecvCQ + 1;
}
}
if (pendingPollSendCQ < bufNum) {
i = 0;
while (i < bufNum - pendingPollSendCQ) {
send qp, ePollSendCQ, this;
pendingPollSendCQ = pendingPollSendCQ + 1;
}
}
}
on ePollRecvCQReturn do (wc: tWorkComplete) {
var i: int;
if (wc.status == Status_OK) {
assert wc.opcode == WCOpCode_RECV_WITH_IMM || wc.opcode == WCOpCode_RECV;
if (wc.opcode == WCOpCode_RECV_WITH_IMM) {
remotePostedBufNum = remotePostedBufNum + wc.imm;
print format("{0} received flow control packet with imm {1}, remotePostedBufNum {2}", this, wc.imm, remotePostedBufNum);
} else if (wc.opcode == WCOpCode_RECV) {
i = 0;
while (i < wc.length) {
bytesRecved += (sizeof(bytesRecved), wc.payload[i]);
i = i + 1;
}
print format("recv cq returned, user {0} waited", userWaited);
if (userWaited != null) {
print format("recvLength {0}, sizeof(recvedData) {1}, sizeof(bytesRecved) {2}",
recvLength, sizeof(recvedData), sizeof(bytesRecved));
i = 0;
while (sizeof(recvedData) < recvLength && sizeof(bytesRecved) > 0) {
recvedData += (sizeof(recvedData), bytesRecved[0]);
bytesRecved -= (0);
i = i + 1;
}
print format("copy recv data, copy length {0}, recvLength {1}, recvedData {2}", i, recvLength, recvedData);
if (sizeof(recvedData) == recvLength) {
send userWaited, eRecvBytesResp, recvedData;
userWaited = default(machine);
recvedData = default(tBytes);
recvLength = 0;
}
}
}
send qp, ePostRecv, (wrIdx = wc.wrIdx, opcode = WROpCode_INVALID, payload = wc.payload, length = bufSize, imm = 0);
if (wc.opcode == WCOpCode_RECV) {
numRecvSinceLastAck = numRecvSinceLastAck + 1;
if (numRecvSinceLastAck == numRecvBeforeAck) {
send qp, ePostSend, (wrIdx = -1, opcode = WROpCode_SEND_WITH_IMM, payload = default(tBytes), length = 0, imm = numRecvSinceLastAck);
numRecvSinceLastAck = 0;
}
}
assert pendingPollRecvCQ > 0;
pendingPollRecvCQ = pendingPollRecvCQ - 1;
send this, eNextPollCQIter;
} else if (wc.status != Status_AGAIN) {
assert false, "Unexpected wc status";
}
}
on ePollSendCQReturn do (wc: tWorkComplete) {
var sendBuf: tTaggedBuffer;
if (wc.status == Status_OK) {
if (wc.opcode == WCOpCode_SEND) {
if (wc.wrIdx >= 0) {
sendBuf = (bufIdx = wc.wrIdx, payload = wc.payload, length = bufSize);
unusedSendBufs += (sizeof(unusedSendBufs), sendBuf);
}
} else {
assert false, "Unexpected wc opcode";
}
assert pendingPollSendCQ > 0;
pendingPollSendCQ = pendingPollSendCQ - 1;
send this, eNextPollCQIter;
} else if (wc.status != Status_AGAIN) {
assert false, "Unexpected wc status";
}
}
on eRecvBytes do (args: (from: machine, length: int)) {
var i: int;
print format("{0} requested to receive {1} bytes, sizeof(bytesRecved) {2}", args.from, args.length, sizeof(bytesRecved));
i = 0;
while (i < args.length && sizeof(bytesRecved) > 0) {
recvedData += (i, bytesRecved[0]);
bytesRecved -= (0);
i = i + 1;
}
if (sizeof(recvedData) == args.length) {
send args.from, eRecvBytesResp, recvedData;
recvedData = default(tBytes);
} else {
userWaited = args.from;
recvLength = args.length;
}
send this, eNextPollCQIter;
}
on eSendBytes do (bytes: tBytes) {
var i: int;
i = 0;
while (i < sizeof(bytes)) {
bytesToSend += (sizeof(bytesToSend), bytes[i] % 256);
i = i + 1;
}
send this, eNextPollCQIter;
}
on eNextPollCQIter goto PollCQEvents;
}
}

View File

@@ -0,0 +1,418 @@
fun ConvertToInt4Bytes(n: int): tBytes {
var bytes: tBytes;
var i: int;
i = 0;
while (i < 4) {
bytes += (i, n % 256);
n = n / 256;
i = i + 1;
}
assert n == 0;
return bytes;
}
fun Convert4BytesToInt(bytes: tBytes): int {
var n: int;
var i: int;
var b: int;
assert sizeof(bytes) >= 4;
n = 0;
i = 0;
b = 1;
while (i < 4) {
n = n + bytes[i] * b;
i = i + 1;
b = b * 256;
}
return n;
}
fun RecvBytes(user: machine, socket: RDMASocket, length: int): tBytes {
var result: tBytes;
var i: int;
send socket, eRecvBytes, (from = user, length = length);
receive {
case eRecvBytesResp: (bytes: tBytes) {
assert sizeof(bytes) == length;
result = bytes;
}
}
return result;
}
fun RecvMessage(user: machine, socket: RDMASocket): tBytes {
var header: tBytes;
var message: tBytes;
var msgLen: int;
header = RecvBytes(user, socket, 4);
msgLen = Convert4BytesToInt(header);
print format("{0} try to receive message of length {1}", user, msgLen);
message = RecvBytes(user, socket, msgLen);
print format("{0} received message {1}", user, message);
return message;
}
fun SendMessage(user: machine, socket: RDMASocket, message: tBytes) {
var header: tBytes;
var response: tBytes;
header = ConvertToInt4Bytes(sizeof(message));
response = Append(header, message);
send socket, eSendBytes, response;
print format("{0} sent message {1}", user, message);
}
type tSystemConfig = (
bufSize: int,
bufNum: int,
numRecvBeforeAck: int,
numIters: int,
numSenders: int
);
type tNetworkSystem = (
sock: RDMASocket,
peer: RDMASocket,
net: Network
);
fun CreateRDMASocketPair(user: machine, config: tSystemConfig): tNetworkSystem {
var system: tNetworkSystem;
system.sock = new RDMASocket((sockId = 1, bufSize = config.bufSize, bufNum = config.bufNum, numRecvBeforeAck = config.numRecvBeforeAck));
system.peer = new RDMASocket((sockId = 2, bufSize = config.bufSize, bufNum = config.bufNum, numRecvBeforeAck = config.numRecvBeforeAck));
system.net = new Network((sock = system.sock, peer = system.peer));
print format("network system created {0}", system);
send system.net, eWaitConnected, user;
receive {
case eWaitConnectedResp: { }
}
print format("network system connected {0}", system);
return system;
}
/* Ping-pong server and client */
machine PingPongServer {
var socket: RDMASocket;
start state Init {
entry (args: (socket: RDMASocket)) {
print format("server init {0}", this);
socket = args.socket;
print format("server started {0}", this);
goto ProcessPing;
}
}
state ProcessPing {
entry {
var message: tBytes;
message = RecvMessage(this, socket);
if (sizeof(message) == 0) // client disconnected
goto Stopped;
SendMessage(this, socket, message);
goto ProcessPing;
}
}
state Stopped {
entry {
print format("{0} stopped", this);
}
}
}
machine PingPongClient {
var config: tSystemConfig;
var socket: RDMASocket;
var server: PingPongServer;
var currIter: int;
var message: tBytes;
var response: tBytes;
start state Init {
entry (args: (config: tSystemConfig, socket: RDMASocket, server: PingPongServer)) {
print format("client init {0}", this);
config = args.config;
socket = args.socket;
server = args.server;
currIter = 0;
print format("client init done {0}", this);
goto SendPing;
}
}
state SendPing {
entry {
var msgLen: int;
currIter = currIter + 1;
msgLen = choose(config.bufSize * config.bufNum * 2) + 1;
message = InitBytes(msgLen, currIter % 256);
print format("#{0} message {1}", currIter, message);
SendMessage(this, socket, message);
goto WaitPong;
}
}
state WaitPong {
entry {
var i: int;
response = RecvMessage(this, socket);
assert sizeof(message) == sizeof(response);
i = 0;
while (i < sizeof(response)) {
assert response[i] == message[i] && message[i] == currIter % 256;
i = i + 1;
}
if (currIter < config.numIters)
goto SendPing;
else
goto Stopped;
}
}
state Stopped {
entry {
SendMessage(this, socket, default(tBytes)); // disconnect
print format("{0} stopped", this);
}
}
}
machine PingPongTest {
start state Init {
entry {
var config: tSystemConfig;
var system: tNetworkSystem;
var server: PingPongServer;
var client: PingPongClient;
print format("test init {0}", this);
config = (bufSize = 16, bufNum = 10, numRecvBeforeAck = 4, numIters = 10, numSenders = 2);
announce eSystemConfig, (config = config,);
system = CreateRDMASocketPair(this, config);
server = new PingPongServer((socket = system.peer,));
client = new PingPongClient((config = config, socket = system.sock, server = server));
print format("test init done {0}", this);
}
}
}
/* One-way communication */
machine OneWayReceiver {
var socket: RDMASocket;
start state Init {
entry (args: (socket: RDMASocket)) {
print format("receiver init {0}", this);
socket = args.socket;
print format("receiver started {0}", this);
goto Receiving;
}
}
state Receiving {
entry {
var message: tBytes;
message = RecvMessage(this, socket);
if (sizeof(message) == 0) // client disconnected
goto Stopped;
else
goto Receiving;
}
}
state Stopped {
entry {
print format("{0} stopped", this);
}
}
}
machine OneWaySender {
var config: tSystemConfig;
var socket: RDMASocket;
var receiver: OneWayReceiver;
var currIter: int;
var message: tBytes;
var response: tBytes;
start state Init {
entry (args: (config: tSystemConfig, socket: RDMASocket, receiver: OneWayReceiver)) {
print format("sender init {0}", this);
config = args.config;
socket = args.socket;
receiver = args.receiver;
currIter = 0;
print format("sender init done {0}", this);
goto Sending;
}
}
state Sending {
entry {
var msgLen: int;
currIter = currIter + 1;
msgLen = choose(config.bufSize * config.bufNum * 2) + 1;
message = InitBytes(msgLen, currIter % 256);
print format("#{0} message {1}", currIter, message);
SendMessage(this, socket, message);
if (currIter < config.numIters)
goto Sending;
else
goto Stopped;
}
}
state Stopped {
entry {
SendMessage(this, socket, default(tBytes)); // disconnect
print format("{0} stopped", this);
}
}
}
machine OneWayCommunication {
start state Init {
entry {
var config: tSystemConfig;
var system: tNetworkSystem;
var receiver: OneWayReceiver;
var sender: OneWaySender;
config = (bufSize = 16, bufNum = 10, numRecvBeforeAck = 4, numIters = 10, numSenders = 1);
announce eSystemConfig, (config = config,);
system = CreateRDMASocketPair(this, config);
receiver = new OneWayReceiver((socket = system.peer,));
sender = new OneWaySender((config = config, socket = system.sock, receiver = receiver));
}
}
}
/* Two-way communication */
event eSetPeer: (peer: TwoWaySenderReceiver);
event eNextSendAndRecvIter;
machine TwoWaySenderReceiver {
var config: tSystemConfig;
var socket: RDMASocket;
var peer: TwoWaySenderReceiver;
var currIter: int;
var recvLen: int;
var message: tBytes;
var response: tBytes;
fun ProcessRecv (bytes: tBytes) {
if (recvLen < 0) {
assert sizeof(bytes) == 4;
recvLen = Convert4BytesToInt(bytes);
if (recvLen > 0)
send socket, eRecvBytes, (from = this, length = recvLen);
} else {
assert sizeof(bytes) == recvLen;
recvLen = -1;
send socket, eRecvBytes, (from = this, length = 4);
}
}
start state Init {
entry (args: (config: tSystemConfig, socket: RDMASocket)) {
print format("two-way sender/receiver init {0}", this);
config = args.config;
socket = args.socket;
currIter = 0;
recvLen = -1;
print format("two-way sender/receiver init done {0}", this);
send socket, eRecvBytes, (from = this, length = 4);
goto WaitPeer;
}
}
state WaitPeer {
on eSetPeer goto SendAndRecv with (args: (peer: TwoWaySenderReceiver)) {
peer = args.peer;
}
on eRecvBytesResp do ProcessRecv;
}
state SendAndRecv {
entry {
var msgLen: int;
currIter = currIter + 1;
msgLen = choose(config.bufSize * config.bufNum * 2) + 1;
message = InitBytes(msgLen, currIter % 256);
print format("#{0} message {1}", currIter, message);
SendMessage(this, socket, message);
if (currIter == config.numIters)
goto StopSend;
else
send this, eNextSendAndRecvIter;
}
on eNextSendAndRecvIter goto SendAndRecv;
on eRecvBytesResp do ProcessRecv;
}
state StopSend {
entry {
SendMessage(this, socket, default(tBytes)); // disconnect
print format("{0} stopped sending", this);
}
on eRecvBytesResp do ProcessRecv;
}
}
machine TwoWayCommunication {
start state Init {
entry {
var config: tSystemConfig;
var system: tNetworkSystem;
var first: TwoWaySenderReceiver;
var second: TwoWaySenderReceiver;
config = (bufSize = 16, bufNum = 10, numRecvBeforeAck = 4, numIters = 10, numSenders = 2);
announce eSystemConfig, (config = config,);
system = CreateRDMASocketPair(this, config);
first = new TwoWaySenderReceiver((config = config, socket = system.peer));
second = new TwoWaySenderReceiver((config = config, socket = system.sock));
send first, eSetPeer, (peer = second,);
send second, eSetPeer, (peer = first,);
}
}
}

View File

@@ -0,0 +1,11 @@
test tcPingPong [main = PingPongTest]:
assert RecvComplete, NoDuplicatePostedBuffers, AllIterationsProcessed in
(union RDMANetwork, { PingPongServer, PingPongClient, PingPongTest });
test tcOneWay [main = OneWayCommunication]:
assert RecvComplete, NoDuplicatePostedBuffers, AllIterationsProcessed in
(union RDMANetwork, { OneWayReceiver, OneWaySender, OneWayCommunication });
test tcTwoWay [main = TwoWayCommunication]:
assert RecvComplete, NoDuplicatePostedBuffers, AllIterationsProcessed in
(union RDMANetwork, { TwoWaySenderReceiver, TwoWayCommunication });

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
<ApplicationIcon />
<OutputType>Exe</OutputType>
<StartupObject />
<LangVersion>latest</LangVersion>
<OutputPath>POutput/</OutputPath>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Coyote" Version="1.0.5"/>
<PackageReference Include="PCSharpRuntime" Version="*"/>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,10 @@
<!-- P project file for rdma socket -->
<Project>
<ProjectName>RDMASocket</ProjectName>
<InputFiles>
<PFile>./PSrc/</PFile>
<PFile>./PSpec/</PFile>
<PFile>./PTst/</PFile>
</InputFiles>
<OutputDir>./PGenerated/</OutputDir>
</Project>

71
specs/README.md Normal file
View File

@@ -0,0 +1,71 @@
# P specifications
## Build prerequisites
Follow the [offcial guide](https://p-org.github.io/P/getstarted/install/) to install the [P](https://github.com/p-org/P) framework.
Or if `dotnet` has been installed, run the following command to store the `p` command.
```
dotnet tool restore
```
## Run tests
A helper script [`RunTests.ps1`](RunTests.ps1), implemented in [PowerShell](https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell), is used to run tests and summarize the results.
[`DataStorage`](DataStorage) specifies the CRAQ implementation in 3FS.
```powershell
PS > cd DataStorage
PS > ..\RunTests.ps1
...
-----------------------
Summary of test results
-----------------------
[02/26/2025 10:57:58] Elapsed time: 372.4s
test status seed schedules seconds min avg max
---- ------ ---- --------- ------- --- --- ---
tcOneClientWriteNoFailure[0] pass 1402445568 10 15.8 -1 -1 -1
tcTwoClientsWriteNoFailure[0] pass 189933208 10 23.6 -1 -1 -1
tcThreeClientsWriteNoFailure[0] pass 3060254145 10 40.7 -1 -1 -1
tcOneClientWriteUnreliableDetector[0] pass 2016460916 10 17.7 -1 -1 -1
tcTwoClientsWriteUnreliableDetector[0] pass 18777396 10 24.7 -1 -1 -1
tcOneClientWriteWithFailure[0] pass 2559323541 10 15.7 -1 -1 -1
tcTwoClientsWriteWithFailure[0] pass 1199246267 10 29.9 -1 -1 -1
tcOneClientWriteWithFailures[0] pass 672618818 10 15.4 -1 -1 -1
tcTwoClientsWriteWithFailures[0] pass 1908913074 10 32.3 -1 -1 -1
tcOneClientWriteShortChainWithFailure[0] pass 3031701162 10 6.3 -1 -1 -1
tcTwoClientsWriteShortChainWithFailures[0] pass 2907349611 10 16.6 -1 -1 -1
tcTwoClientsWriteLongChainWithFailures[0] pass 260515276 10 67.0 -1 -1 -1
[02/26/2025 10:57:58] All tests passed
```
[`RDMASocket`](RDMASocket) verifies the RDMA socket implementation in 3FS.
```powershell
PS > cd RDMASocket
PS > ..\RunTests.ps1
...
-----------------------
Summary of test results
-----------------------
[02/26/2025 11:19:22] Elapsed time: 40.6s
test status seed schedules seconds min avg max
---- ------ ---- --------- ------- --- --- ---
tcPingPong[0] pass 3776118231 10 9.8 -1 -1 -1
tcOneWay[0] pass 200216558 10 3.6 -1 -1 -1
tcTwoWay[0] pass 1923093627 10 7.1 -1 -1 -1
[02/26/2025 11:19:22] All tests passed
```
[`Timer`](Timer) includes modified portions of the following open-source project:
- The [original implementation](https://github.com/p-org/P/tree/master/Tutorial/Common/Timer) of `Timer` is part of [P tutorials](https://p-org.github.io/P/tutsoutline/) licensed under MIT License.

237
specs/RunTests.ps1 Normal file
View File

@@ -0,0 +1,237 @@
param (
[String] $ProjectFilter = "*.pproj",
[Alias('ms')]
[Int] $MaxSteps = 200000,
[Alias('i')]
[Int] $NumIters = 10,
[Alias('p')]
[Int] $Parallel = 1,
[Alias('s')]
[Int64] $Seed = -1,
[ValidateSet('random', 'pos', 'feedback', 'feedbackpos')]
[Alias('sch')]
[String] $Scheduling = "pos",
[Alias('v')]
[Switch] $Verbose,
[Alias('m')]
[String[]] $TestMethods = @(),
[Alias('t')]
[String] $TestFilter = ".*",
[Alias('c')]
[Switch] $ContinueOnFailure,
[Alias('k')]
[Switch] $SkipBuildProject,
[Alias('w')]
[Int] $StartTaskDelayMilliSecs = 100,
[Alias('o')]
[Int] $TimeoutSecs = 0
);
try {
$projectPath = Get-ChildItem -Filter $ProjectFilter | Select-Object -First 1
$projectFolder = Split-Path -Parent $projectPath.FullName
[xml]$projectObj = Get-Content -Path $projectPath.FullName
$projectName = $projectObj.Project.ProjectName
} catch {
Write-Error "Cannot load project file: $ProjectFilter"
exit 1
}
if ($SkipBuildProject) {
Write-Host -ForegroundColor DarkYellow "Skip building project"
$LASTEXITCODE = 0
} else {
try {
Push-Location $projectFolder
Write-Host -ForegroundColor DarkYellow "Building project: $projectName"
dotnet tool run p compile --pproj $projectPath.FullName
} finally {
Pop-Location
}
}
if ($LASTEXITCODE -ne 0) {
exit $LASTEXITCODE
}
if ($TestMethods.Count -eq 0) {
$TestMethods = dotnet tool run p check --list-tests | Select-String -SimpleMatch tc
Write-Host -ForegroundColor Blue "Test methods: {$TestMethods}"
}
$exitCode = 0
$testResults = @();
$failedTasks = @();
$startTime = Get-Date
$outputRoot = Join-Path "PCheckerOutput" $startTime.ToString("yyyy-MM-dd_HH-mm-ss")
foreach ($testMethod in $TestMethods) {
if (!($testMethod -match $TestFilter)) {
Write-Host -ForegroundColor Blue "Skipped test: $testMethod"
continue;
}
$testParams = "--fail-on-maxsteps --max-steps $MaxSteps --schedules $NumIters --sch-$Scheduling --testcase $testMethod"
if ($Seed -ge 0) { $testParams += " --seed $Seed" }
if ($Verbose) { $testParams += " --verbose" }
Write-Host -ForegroundColor DarkYellow "Running test: $testMethod"
# start the test tasks
$testStart = Get-Date
$testTasks = @{}
$testOutputs = @{}
for ($taskId = 0; $taskId -lt $Parallel; $taskId++) {
Start-Sleep -Milliseconds $StartTaskDelayMilliSecs
$outputPath = Join-Path $outputRoot "$testMethod" "t$taskId"
New-Item -ItemType Directory -Force $outputPath | Out-Null
$testOutput = New-Item (Join-Path $outputPath "test.log")
Write-Host "Test task output: $($testOutput.FullName)"
$testTask = Start-Process -NoNewWindow -Passthru -RedirectStandardOutput $testOutput -FilePath "dotnet" -ArgumentList "tool run p check $testParams --outdir $outputPath"
$testTasks[$taskId] = $testTask
$testOutputs[$taskId] = $testOutput
}
while ($true) {
$runningTasks = @()
for ($taskId = 0; $taskId -lt $Parallel; $taskId++) {
if (!$testTasks[$taskId].HasExited) { $runningTasks += $taskId }
}
if ($runningTasks.Count -eq 0) {
break;
}
Write-Host "[$(Get-Date)] Found $($runningTasks.Count) running test processes: $($runningTasks)"
Start-Sleep -Seconds 10
if ($TimeoutSecs -gt 0) {
$elapsedSecs = (New-TimeSpan -Start $testStart -End (Get-Date)).TotalSeconds
if ($elapsedSecs -ge $TimeoutSecs) {
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] Elapsed time ($($elapsedSecs.ToString("#.#"))s) exceeds timeout ($($timeoutSecs)s), stopping tests..."
for ($taskId = 0; $taskId -lt $Parallel; $taskId++) {
Stop-Process -Force $testTasks[$taskId]
$testTasks[$taskId].WaitForExit()
}
}
}
}
$elapsedSecs = (New-TimeSpan -Start $testStart -End (Get-Date)).TotalSeconds
Write-Host "[$(Get-Date)] Test processes stopped after running for $($elapsedSecs.ToString("#.#"))s"
# print outputs of test processes
for ($taskId = 0; $taskId -lt $Parallel; $taskId++) {
if ($Verbose) {
Copy-Item -Verbose -Force $testOutputs[$taskId] ($testMethod + ".$taskId.txt")
}
$numSchedPoints = Get-Content $testOutputs[$taskId] | Select-String -Pattern "(\d+) \(min\), (\d+) \(avg\), (\d+) \(max\)" |
Foreach-Object {
$min, $avg, $max = $_.Matches[0].Groups[1..3].Value
[PSCustomObject] @{
min = $min
avg = $avg
max = $max
}
} | Select-Object -Last 1
$taskName = "$testMethod[$taskId]"
$foundBug = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Checker found a bug" |
Foreach-Object { ($_.Matches[0].Groups[0].Value) } | Select-Object -First 1
$testStatus = if ($null -eq $foundBug) { "pass" } else { "fail" }
$testSeed = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Checker is using '[A-Za-z]+' strategy \(seed:(\d+)\)" |
Foreach-Object { [Int64]($_.Matches[0].Groups[1].Value) } | Select-Object -First 1
$testSchedules = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Explored ([\d.]+) schedules" |
Foreach-Object { [Int64]($_.Matches[0].Groups[1].Value) } | Select-Object -First 1
$testSeconds = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Elapsed ([\d.]+) sec" |
Foreach-Object { [Double]($_.Matches[0].Groups[1].Value) } | Select-Object -First 1
$testResults += [PSCustomObject] @{
test = $taskName
status = $testStatus
seed = $testSeed
schedules = $testSchedules
seconds = $testSeconds.ToString("0.0")
min = if ($null -ne $numSchedPoints) { $numSchedPoints.min } else { -1 }
avg = if ($null -ne $numSchedPoints) { $numSchedPoints.avg } else { -1 }
max = if ($null -ne $numSchedPoints) { $numSchedPoints.max } else { -1 }
};
if ($null -ne $foundBug) {
Write-Host -ForegroundColor DarkYellow "Test process output #${taskId}/${Parallel}:"
Get-Content $testOutputs[$taskId]
$checkerOutputPath = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Writing (.*)" |
Foreach-Object { ($_.Matches[0].Groups[1].Value) } | Select-String -SimpleMatch -Pattern ".txt" | Select-Object -First 1
# filter key messages from output
$checkerOutputFile = Get-Item $checkerOutputPath
$debugLogFile = (Join-Path $checkerOutputFile.DirectoryName $checkerOutputFile.BaseName) + ".$testMethod" + ".csv"
$logPattern = @(
"sent event 'eWriteWork with payload", "sent event 'eWriteWorkDone with payload",
"sent event 'eCommitWork with payload", "sent event 'eCommitWorkDone with payload",
"sent event 'eWriteReq with payload", "sent event 'eWriteResp with payload",
"sent event 'eUpdateMsg with payload", "sent event 'eCommitMsg with payload",
"sent event 'eGetTargetSyncInfoResult with payload",
"sent event 'eSyncStartResp with payload",
"sent event 'eSyncDoneResp with payload",
"sent event 'eHaltReq with payload",
"sent event 'eShutDown with payload",
"sent event 'eRestart with payload",
# "dequeued event 'eUpdateTargetStateMsg with payload",
"dequeued event 'eNewRoutingInfo with payload",
"set its targets offline",
"replication chain updated",
"start write process",
"updatesOfChunkReplica",
# "aliveStorageServices",
"<ErrorLog>"
)
Select-String -Path $checkerOutputFile -SimpleMatch -CaseSensitive -Pattern $logPattern |
Select-Object -Property 'Line' -First 100000 |
Out-File -Width 10000 -Encoding utf8 $debugLogFile
Write-Host -ForegroundColor DarkYellow "Debug log: $debugLogFile"
# save reprod script to file
$outputPath = Join-Path $outputRoot "$testMethod" "t$taskId"
$reprodCmdstr = "dotnet tool run p check $testParams -s $testSeed --outdir $outputPath"
$reprodScriptFile = (Join-Path $checkerOutputFile.DirectoryName $checkerOutputFile.BaseName) + ".$testMethod" + ".ps1"
Set-Content -Path $reprodScriptFile -Value $reprodCmdstr
Write-Host -ForegroundColor DarkYellow "Reprod script: $reprodScriptFile"
Write-Host -ForegroundColor DarkYellow "Reprod command: $reprodCmdstr"
# set exit code to indicate the failure
$exitCode = -1
$failedTasks += $taskName
}
}
if (($exitCode -ne 0) -and (-not $ContinueOnFailure)) {
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] Test $testMethod failed, stopping..."
break;
}
}
$elapsedTime = New-TimeSpan -Start $startTime -End (Get-Date)
Write-Host -ForegroundColor DarkYellow "-----------------------"
Write-Host -ForegroundColor DarkYellow "Summary of test results"
Write-Host -ForegroundColor DarkYellow "-----------------------"
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] Elapsed time: $($elapsedTime.TotalSeconds.ToString(`"#.#`"))s"
Format-Table -AutoSize -InputObject $testResults
$testResults | Export-Csv -NoTypeInformation -Path (Join-Path $outputRoot "test_results.csv")
if ($exitCode -eq 0) {
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] All tests passed"
} else {
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] Failed test tasks: $failedTasks"
}
exit $exitCode

20
specs/Timer/LICENSE.txt Normal file
View File

@@ -0,0 +1,20 @@
The MIT License
Copyright (c) 2015 P Developers
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

64
specs/Timer/PSrc/Timer.p Normal file
View File

@@ -0,0 +1,64 @@
/*****************************************************************************************
The timer state machine models the non-deterministic behavior of an OS timer
******************************************************************************************/
machine Timer
{
// user of the timer
var client: machine;
start state Init {
entry (_client : machine){
client = _client;
goto WaitForTimerRequests;
}
}
state WaitForTimerRequests {
on eStartTimer goto TimerStarted;
ignore eCancelTimer, eDelayedTimeOut;
}
state TimerStarted {
defer eStartTimer;
entry {
if($)
{
send client, eTimeOut;
goto WaitForTimerRequests;
}
else
{
send this, eDelayedTimeOut;
}
}
on eDelayedTimeOut goto TimerStarted;
on eCancelTimer goto WaitForTimerRequests;
}
}
/************************************************
Events used to interact with the timer machine
************************************************/
event eStartTimer;
event eCancelTimer;
event eTimeOut;
event eDelayedTimeOut;
/************************************************
Functions or API's to interact with the OS Timer
*************************************************/
// create timer
fun CreateTimer(client: machine) : Timer
{
return new Timer(client);
}
// start timer
fun StartTimer(timer: Timer)
{
send timer, eStartTimer;
}
// cancel timer
fun CancelTimer(timer: Timer)
{
send timer, eCancelTimer;
}

View File

@@ -0,0 +1,2 @@
/* Create the timer module which consists of only the timer machine */
module Timer = { Timer };

6
specs/Timer/README.md Normal file
View File

@@ -0,0 +1,6 @@
# Timer
This project includes modified portions of the following open-source project:
- [**P**](https://github.com/p-org/P) Licensed under the [MIT License](LICENSE.txt).
- Source: the original implementation of [`Timer`](https://github.com/p-org/P/tree/master/Tutorial/Common/Timer).

8
specs/Timer/Timer.pproj Normal file
View File

@@ -0,0 +1,8 @@
<!-- P Project file for the Timer Module-->
<Project>
<ProjectName>Timer</ProjectName>
<InputFiles>
<PFile>./PSrc/</PFile>
</InputFiles>
<OutputDir>./PGenerated/</OutputDir>
</Project>