mirror of
https://github.com/deepseek-ai/3FS
synced 2025-06-26 18:16:45 +00:00
Initial commit
This commit is contained in:
13
specs/.config/dotnet-tools.json
Normal file
13
specs/.config/dotnet-tools.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"version": 1,
|
||||
"isRoot": true,
|
||||
"tools": {
|
||||
"p": {
|
||||
"version": "2.3.2",
|
||||
"commands": [
|
||||
"p"
|
||||
],
|
||||
"rollForward": false
|
||||
}
|
||||
}
|
||||
}
|
||||
7
specs/.gitignore
vendored
Normal file
7
specs/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
PCheckerOutput/
|
||||
PGenerated/
|
||||
POutput/
|
||||
obj/
|
||||
*.toolbox/
|
||||
Test.cs
|
||||
*.log
|
||||
15
specs/DataStorage/DataStorage.csproj
Normal file
15
specs/DataStorage/DataStorage.csproj
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
<ApplicationIcon />
|
||||
<OutputType>Exe</OutputType>
|
||||
<StartupObject />
|
||||
<LangVersion>latest</LangVersion>
|
||||
<OutputPath>POutput/</OutputPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Coyote" Version="1.0.5"/>
|
||||
<PackageReference Include="PCSharpRuntime" Version="1.1.15"/>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
12
specs/DataStorage/DataStorage.pproj
Normal file
12
specs/DataStorage/DataStorage.pproj
Normal file
@@ -0,0 +1,12 @@
|
||||
<!-- P project file for data storage -->
|
||||
<Project>
|
||||
<ProjectName>DataStorage</ProjectName>
|
||||
<InputFiles>
|
||||
<PFile>./PSrc/</PFile>
|
||||
<PFile>./PSpec/</PFile>
|
||||
<PFile>./PTst/</PFile>
|
||||
</InputFiles>
|
||||
<OutputDir>./PGenerated/</OutputDir>
|
||||
<!-- Add external dependencies -->
|
||||
<IncludeProject>../Timer/Timer.pproj</IncludeProject>
|
||||
</Project>
|
||||
492
specs/DataStorage/PSpec/SystemSpec.p
Normal file
492
specs/DataStorage/PSpec/SystemSpec.p
Normal file
@@ -0,0 +1,492 @@
|
||||
spec WriteComplete observes eWriteReq, eWriteResp {
|
||||
var completedWriteReqTags: map[tMessageTag, bool];
|
||||
var numPendingWriteReqs: int;
|
||||
|
||||
fun OnWriteReq(writeReq: tWriteReq) {
|
||||
if (writeReq.fromClient && !(writeReq.tag in completedWriteReqTags)) {
|
||||
completedWriteReqTags += (writeReq.tag, false);
|
||||
numPendingWriteReqs = numPendingWriteReqs + 1;
|
||||
if (numPendingWriteReqs > 0) {
|
||||
goto PendingWrites;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun OnWriteResp(writeResp: tWriteResp) {
|
||||
assert writeResp.tag in completedWriteReqTags;
|
||||
|
||||
if (writeResp.status != ErrorCode_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!completedWriteReqTags[writeResp.tag]) {
|
||||
completedWriteReqTags[writeResp.tag] = true;
|
||||
numPendingWriteReqs = numPendingWriteReqs - 1;
|
||||
if (numPendingWriteReqs == 0) {
|
||||
goto NoPendingWrites;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
start cold state NoPendingWrites {
|
||||
entry {
|
||||
print format("numPendingWriteReqs: {0}, completedWriteReqTags: {1}", numPendingWriteReqs, completedWriteReqTags);
|
||||
assert numPendingWriteReqs == 0, format("{0} pending writes not equal to zero", numPendingWriteReqs);
|
||||
}
|
||||
|
||||
on eWriteReq do OnWriteReq;
|
||||
|
||||
on eWriteResp do OnWriteResp;
|
||||
}
|
||||
|
||||
hot state PendingWrites {
|
||||
entry {
|
||||
print format("numPendingWriteReqs: {0}, completedWriteReqTags: {1}", numPendingWriteReqs, completedWriteReqTags);
|
||||
}
|
||||
|
||||
on eWriteReq do OnWriteReq;
|
||||
|
||||
on eWriteResp do OnWriteResp;
|
||||
}
|
||||
}
|
||||
|
||||
event eSystemConfig: (config: tSystemConfig);
|
||||
event eStorageSystem: (system: tStorageSystem);
|
||||
|
||||
spec AllWriteItersProcessed observes eWriteWork, eWriteReq, eWriteResp, eSystemConfig, eStorageSystem {
|
||||
var config: tSystemConfig;
|
||||
var mgmtService: MgmtService;
|
||||
var storageServices: tStorageServiceMap;
|
||||
|
||||
var seenWriteRequestTags: map[tNodeId, set[tMessageTag]];
|
||||
var seenWriteResponseTags: map[tNodeId, set[tMessageTag]];
|
||||
var seenWriteProcs: map[tMessageTag, map[tTargetId, map[tChunkVer, set[machine]]]];
|
||||
var clientDone: set[tNodeId];
|
||||
|
||||
start state Init {
|
||||
on eSystemConfig goto SendingWriteReqs with (args: (config: tSystemConfig)) {
|
||||
config = args.config;
|
||||
}
|
||||
|
||||
on eStorageSystem do (args: (system: tStorageSystem)) {
|
||||
mgmtService = args.system.mgmt;
|
||||
storageServices = args.system.storages;
|
||||
}
|
||||
}
|
||||
|
||||
hot state SendingWriteReqs {
|
||||
entry {
|
||||
var tag: tMessageTag;
|
||||
|
||||
foreach (tag in keys(seenWriteProcs)) {
|
||||
print format("write request tag: {0}, seenWriteProcs: {1}", tag, seenWriteProcs[tag]);
|
||||
}
|
||||
|
||||
print format("seenWriteRequestTags: {0}", seenWriteRequestTags);
|
||||
print format("seenWriteResponseTags: {0}", seenWriteResponseTags);
|
||||
}
|
||||
|
||||
on eWriteWork goto SendingWriteReqs with (writeWork: tWriteWork) {
|
||||
if (!(writeWork.tag in seenWriteProcs)) {
|
||||
seenWriteProcs += (writeWork.tag, default(map[tTargetId, map[tChunkVer, set[machine]]]));
|
||||
}
|
||||
|
||||
if (!(writeWork.targetId in seenWriteProcs[writeWork.tag])) {
|
||||
seenWriteProcs[writeWork.tag] += (writeWork.targetId, default(map[tChunkVer, set[machine]]));
|
||||
}
|
||||
|
||||
if (!(writeWork.updateVer in seenWriteProcs[writeWork.tag][writeWork.targetId])) {
|
||||
seenWriteProcs[writeWork.tag][writeWork.targetId] += (writeWork.updateVer, default(set[machine]));
|
||||
}
|
||||
|
||||
seenWriteProcs[writeWork.tag][writeWork.targetId][writeWork.updateVer] += (writeWork.from);
|
||||
}
|
||||
|
||||
on eStorageSystem do (args: (system: tStorageSystem)) {
|
||||
mgmtService = args.system.mgmt;
|
||||
storageServices = args.system.storages;
|
||||
}
|
||||
|
||||
on eWriteReq do (writeReq: tWriteReq) {
|
||||
if (!(writeReq.tag.nodeId in seenWriteRequestTags))
|
||||
seenWriteRequestTags += (writeReq.tag.nodeId, default(set[tMessageTag]));
|
||||
seenWriteRequestTags[writeReq.tag.nodeId] += (writeReq.tag);
|
||||
}
|
||||
|
||||
on eWriteResp do (writeResp: tWriteResp) {
|
||||
if (writeResp.status != ErrorCode_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert writeResp.tag in seenWriteRequestTags[writeResp.tag.nodeId];
|
||||
|
||||
if (!(writeResp.tag.nodeId in seenWriteResponseTags))
|
||||
seenWriteResponseTags += (writeResp.tag.nodeId, default(set[tMessageTag]));
|
||||
seenWriteResponseTags[writeResp.tag.nodeId] += (writeResp.tag);
|
||||
|
||||
if (sizeof(seenWriteResponseTags[writeResp.tag.nodeId]) == config.numIters) {
|
||||
clientDone += (writeResp.tag.nodeId);
|
||||
if (sizeof(clientDone) == config.numClients)
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cold state Done {
|
||||
ignore eWriteWork, eWriteReq, eWriteResp;
|
||||
|
||||
entry {
|
||||
print format("all iterations processed {0}", clientDone);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spec MonotoneIncreasingVersionNumber observes eWriteOpFinishResult, eCommitOpResult {
|
||||
var chunkReplicaCommits: map[(tChunkId, tTargetId), map[tChunkVer, tCommitWork]];
|
||||
|
||||
start state WaitForResponses {
|
||||
|
||||
on eWriteOpFinishResult do (writeFinishRes: tWriteOpFinishResult) {
|
||||
var writeWork: tWriteWork;
|
||||
var chunkIdOnTarget: (tChunkId, tTargetId);
|
||||
|
||||
if (writeFinishRes.status != ErrorCode_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeWork = writeFinishRes.writeWork;
|
||||
chunkIdOnTarget = (writeWork.key.chunkId, writeWork.targetId);
|
||||
|
||||
if (writeWork.fullChunkReplace) {
|
||||
chunkReplicaCommits -= (chunkIdOnTarget);
|
||||
}
|
||||
}
|
||||
|
||||
on eCommitOpResult do (commitOpResult: tCommitOpResult) {
|
||||
var commitWork: tCommitWork;
|
||||
var commitMsg: tCommitMsg;
|
||||
var chunkVer: tChunkVer;
|
||||
var commitVer: tChunkVer;
|
||||
var chunkId: tChunkId;
|
||||
var chunkIdOnTarget: (tChunkId, tTargetId);
|
||||
|
||||
if (commitOpResult.status != ErrorCode_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
commitWork = commitOpResult.commitWork;
|
||||
commitMsg = commitWork.commitMsg;
|
||||
commitVer = commitOpResult.commitVer;
|
||||
chunkId = commitWork.key.chunkId;
|
||||
chunkIdOnTarget = (chunkId, commitWork.targetId);
|
||||
|
||||
if (!(chunkIdOnTarget in chunkReplicaCommits)) {
|
||||
chunkReplicaCommits += (chunkIdOnTarget, default(map[tChunkVer, tCommitWork]));
|
||||
}
|
||||
|
||||
if (commitOpResult.removeChunk) {
|
||||
print format("remove request {0} committed, clear chunkReplicaCommits[{1}]: {2}",
|
||||
commitMsg.tag, chunkIdOnTarget, chunkReplicaCommits[chunkIdOnTarget]);
|
||||
chunkReplicaCommits -= (chunkIdOnTarget);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(commitVer in chunkReplicaCommits[chunkIdOnTarget])) {
|
||||
// all existing commits should have smaller version
|
||||
foreach (chunkVer in keys(chunkReplicaCommits[chunkIdOnTarget])) {
|
||||
assert commitVer > chunkVer,
|
||||
format ("current commit version {0} <= previous version {1} found in chunkReplicaCommits[chunkId:{2}]: {3}, commit result: {4}",
|
||||
commitVer, chunkVer, chunkIdOnTarget, chunkReplicaCommits[chunkIdOnTarget], commitOpResult);
|
||||
}
|
||||
|
||||
chunkReplicaCommits[chunkIdOnTarget] += (commitVer, commitWork);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DONE: check chunk content after each update
|
||||
|
||||
spec AllReplicasOnChainUpdated observes eReadWorkDone, eWriteWorkDone, eCommitWorkDone, eNewRoutingInfo {
|
||||
var seenRoutingVers: set[tRoutingVer];
|
||||
var replicaChains: map[tVersionedChainId, tReplicaChain];
|
||||
var chunkVersionOnTarget: map[tChunkId, map[tTargetId, tChunkVer]];
|
||||
var chunkContentOnTarget: map[tChunkId, map[tChunkVer, map[tTargetId, tBytes]]];
|
||||
var updatesOfChunkReplica: map[tChunkId, map[tChunkVer, map[tTargetId, seq[tWriteWorkDone]]]];
|
||||
|
||||
fun updateChunkVersionOnTarget(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer) {
|
||||
if (!(chunkId in chunkVersionOnTarget)) {
|
||||
chunkVersionOnTarget += (chunkId, default(map[tTargetId, tChunkVer]));
|
||||
}
|
||||
|
||||
if (!(targetId in chunkVersionOnTarget[chunkId])) {
|
||||
chunkVersionOnTarget[chunkId] += (targetId, 0);
|
||||
}
|
||||
|
||||
if (chunkVersionOnTarget[chunkId][targetId] < updateVer) {
|
||||
chunkVersionOnTarget[chunkId][targetId] = updateVer;
|
||||
}
|
||||
}
|
||||
|
||||
fun updateChunkContentOnTarget(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer, chunkContent: tBytes) {
|
||||
if (!(chunkId in chunkContentOnTarget)) {
|
||||
chunkContentOnTarget += (chunkId, default(map[tChunkVer, map[tTargetId, tBytes]]));
|
||||
}
|
||||
|
||||
if (!(updateVer in chunkContentOnTarget[chunkId])) {
|
||||
chunkContentOnTarget[chunkId] += (updateVer, default(map[tTargetId, tBytes]));
|
||||
}
|
||||
|
||||
if (targetId in chunkContentOnTarget[chunkId][updateVer]) {
|
||||
if (chunkContentOnTarget[chunkId][updateVer][targetId] != chunkContent) {
|
||||
print format("find different chunk content {0} than chunkContentOnTarget[chunkId:{1}][updateVer:{2}] {3}",
|
||||
chunkContent, chunkId, updateVer, chunkContentOnTarget[chunkId][updateVer]);
|
||||
}
|
||||
}
|
||||
|
||||
chunkContentOnTarget[chunkId][updateVer][targetId] = chunkContent;
|
||||
}
|
||||
|
||||
fun addUpdateOfChunkReplica(chunkId: tChunkId, targetId: tTargetId, updateVer: tChunkVer, writeWorkDone: tWriteWorkDone) {
|
||||
if (!(chunkId in updatesOfChunkReplica)) {
|
||||
updatesOfChunkReplica += (chunkId, default(map[tChunkVer, map[tTargetId, seq[tWriteWorkDone]]]));
|
||||
}
|
||||
|
||||
if (!(updateVer in updatesOfChunkReplica[chunkId])) {
|
||||
updatesOfChunkReplica[chunkId] += (updateVer, default(map[tTargetId, seq[tWriteWorkDone]]));
|
||||
}
|
||||
|
||||
if (!(targetId in updatesOfChunkReplica[chunkId][updateVer])) {
|
||||
updatesOfChunkReplica[chunkId][updateVer] += (targetId, default(seq[tWriteWorkDone]));
|
||||
}
|
||||
|
||||
updatesOfChunkReplica[chunkId][updateVer][targetId] += (sizeof(updatesOfChunkReplica[chunkId][updateVer][targetId]), writeWorkDone);
|
||||
}
|
||||
|
||||
start state WaitForUpdates {
|
||||
on eReadWorkDone do (readWorkDone: tReadWorkDone) {
|
||||
var chunkId: tChunkId;
|
||||
var targetId: tTargetId;
|
||||
var updateVer: tChunkVer;
|
||||
|
||||
chunkId = readWorkDone.chunkMetadata.chunkId;
|
||||
targetId = readWorkDone.targetId;
|
||||
updateVer = readWorkDone.chunkMetadata.updateVer;
|
||||
|
||||
if (readWorkDone.status == ErrorCode_SUCCESS && sizeof(readWorkDone.dataBytes) == readWorkDone.chunkMetadata.chunkSize) {
|
||||
// updateChunkVersionOnTarget(chunkId, targetId, updateVer);
|
||||
// updateChunkContentOnTarget(chunkId, targetId, updateVer, readWorkDone.dataBytes);
|
||||
}
|
||||
}
|
||||
|
||||
on eWriteWorkDone do (writeWorkDone: tWriteWorkDone) {
|
||||
var chunkId: tChunkId;
|
||||
var targetId: tTargetId;
|
||||
var updateVer: tChunkVer;
|
||||
|
||||
if (writeWorkDone.status != ErrorCode_SUCCESS &&
|
||||
writeWorkDone.status != ErrorCode_CHUNK_COMMITTED_UPDATE &&
|
||||
writeWorkDone.status != ErrorCode_CHUNK_STALE_UPDATE)
|
||||
return;
|
||||
|
||||
chunkId = writeWorkDone.key.chunkId;
|
||||
targetId = writeWorkDone.targetId;
|
||||
updateVer = writeWorkDone.updateVer;
|
||||
|
||||
if (writeWorkDone.status == ErrorCode_SUCCESS) {
|
||||
updateChunkVersionOnTarget(chunkId, targetId, updateVer);
|
||||
updateChunkContentOnTarget(chunkId, targetId, updateVer, writeWorkDone.currentChunkContent);
|
||||
addUpdateOfChunkReplica(chunkId, targetId, updateVer, writeWorkDone);
|
||||
}
|
||||
}
|
||||
|
||||
on eCommitWorkDone do (commitWorkDone: tCommitWorkDone) {
|
||||
var chunkId: tChunkId;
|
||||
var targetId: tTargetId;
|
||||
var targetIdx: tTargetId;
|
||||
var commitVer: tChunkVer;
|
||||
var updateVer: tChunkVer;
|
||||
var chunkVer: tChunkVer;
|
||||
var chunkContent: tBytes;
|
||||
var replicaChain: tReplicaChain;
|
||||
var commitMsg: tCommitMsg;
|
||||
var writeWorkDone: tWriteWorkDone;
|
||||
var writeWorkIdx: int;
|
||||
|
||||
if (commitWorkDone.status != ErrorCode_SUCCESS &&
|
||||
commitWorkDone.status != ErrorCode_CHUNK_STALE_COMMIT)
|
||||
return;
|
||||
|
||||
targetId = commitWorkDone.targetId;
|
||||
chunkId = commitWorkDone.key.chunkId;
|
||||
commitVer = commitWorkDone.commitVer;
|
||||
replicaChain = replicaChains[commitWorkDone.key.vChainId];
|
||||
commitMsg = commitWorkDone.commitMsg;
|
||||
|
||||
// this is a special commit to remove an old chunk from a returning target
|
||||
if (commitWorkDone.removeChunk && commitWorkDone.commitVer == 0)
|
||||
return;
|
||||
|
||||
if (chunkId in updatesOfChunkReplica) {
|
||||
// print all write works on the chunk
|
||||
foreach (chunkVer in keys(updatesOfChunkReplica[chunkId])) {
|
||||
foreach (targetIdx in keys(updatesOfChunkReplica[chunkId][chunkVer])) {
|
||||
writeWorkIdx = 0;
|
||||
while (writeWorkIdx < sizeof(updatesOfChunkReplica[chunkId][chunkVer][targetIdx])) {
|
||||
writeWorkDone = updatesOfChunkReplica[chunkId][chunkVer][targetIdx][writeWorkIdx];
|
||||
print format("updatesOfChunkReplica[chunkId:{0}][updateVer:{1}][targetIdx:{2}][#{3}][chainVer:{4}][remove:{5}][commit:{6}]: {7}",
|
||||
chunkId, chunkVer, targetIdx, writeWorkIdx, writeWorkDone.chainVer, writeWorkDone.currentChunkContent == default(tBytes), chunkVer <= commitVer, writeWorkDone);
|
||||
writeWorkIdx = writeWorkIdx + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (chunkId in chunkVersionOnTarget) {
|
||||
// print all versions of the chunk
|
||||
print format("chunkVersionOnTarget[chunkId:{0}]: {1}", chunkId, chunkVersionOnTarget[chunkId]);
|
||||
}
|
||||
|
||||
if (chunkId in chunkContentOnTarget) {
|
||||
// print all contents of the chunk
|
||||
print format("chunkContentOnTarget[chunkId:{0}]: {1}", chunkId, chunkContentOnTarget[chunkId]);
|
||||
}
|
||||
|
||||
if (commitWorkDone.removeChunk) {
|
||||
if (chunkId in chunkVersionOnTarget && targetId in chunkVersionOnTarget[chunkId]) {
|
||||
print format("remove versions of chunk {0}: {1}", chunkId, chunkVersionOnTarget[chunkId]);
|
||||
chunkVersionOnTarget[chunkId] -= (targetId);
|
||||
}
|
||||
if (chunkId in chunkContentOnTarget) {
|
||||
print format("remove contents of chunk {0}: {1}", chunkId, chunkContentOnTarget[chunkId]);
|
||||
foreach (chunkVer in keys(chunkContentOnTarget[chunkId])) {
|
||||
chunkContentOnTarget[chunkId][chunkVer] -= (targetId);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// foreach (targetId in replicaChain.targets) {
|
||||
// if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
|
||||
// assert chunkId in chunkVersionOnTarget && targetId in chunkVersionOnTarget[chunkId] && chunkVersionOnTarget[chunkId][targetId] >= commitVer,
|
||||
// format("missing update, tag:{0}, chunkId:{1}, targetId:{2}, commitVer:{3}, chunkVersionOnTarget: {4}, replica chain: {5}",
|
||||
// commitMsg.tag, chunkId, targetId, commitVer, chunkVersionOnTarget[chunkId], replicaChain);
|
||||
// }
|
||||
// }
|
||||
|
||||
// foreach (targetId in replicaChain.targets) {
|
||||
// if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
|
||||
// if (sizeof(chunkContent) == 0) {
|
||||
// assert chunkId in chunkContentOnTarget && commitVer in chunkContentOnTarget[chunkId] && targetId in chunkContentOnTarget[chunkId][commitVer],
|
||||
// format("missing chunk content, chunkId:{0}, commitVer:{1}, targetId:{2}, chunkContentOnTarget: {3}",
|
||||
// chunkId, commitVer, targetId, chunkContentOnTarget);
|
||||
// chunkContent = chunkContentOnTarget[chunkId][commitVer][targetId];
|
||||
// } else {
|
||||
// assert chunkContentOnTarget[chunkId][commitVer][targetId] == chunkContent,
|
||||
// format("inconsistent replica, chunkContentOnTarget[chunkId:{0}][commitVer:{1}] {2}",
|
||||
// chunkId, commitVer, chunkContentOnTarget[chunkId][commitVer]);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
|
||||
var replicaChain: tReplicaChain;
|
||||
|
||||
if (routingInfo.routingVer in seenRoutingVers) {
|
||||
return;
|
||||
} else {
|
||||
seenRoutingVers += (routingInfo.routingVer);
|
||||
}
|
||||
|
||||
foreach (replicaChain in values(routingInfo.replicaChains)) {
|
||||
replicaChains[replicaChain.vChainId] = replicaChain;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
event eStopMonitorTargetStates;
|
||||
|
||||
spec AllReplicasInServingState observes eNewRoutingInfo, eSyncStartReq, eSyncDoneResp, eStopMonitorTargetStates {
|
||||
var knownReplicaChains: tReplicaChainMap;
|
||||
var unavailableTargets: map[tTargetId, tPublicTargetState];
|
||||
var syncWorkers: map[tTargetId, set[machine]];
|
||||
|
||||
fun checkForUnavailableTargets(routingInfo: tRoutingInfo) {
|
||||
var targetId: tTargetId;
|
||||
var replicaChain: tReplicaChain;
|
||||
|
||||
unavailableTargets = default(map[tTargetId, tPublicTargetState]);
|
||||
|
||||
foreach (replicaChain in values(routingInfo.replicaChains)) {
|
||||
if (!(replicaChain.vChainId.chainId in knownReplicaChains) ||
|
||||
replicaChain.vChainId.chainVer > knownReplicaChains[replicaChain.vChainId.chainId].vChainId.chainVer)
|
||||
{
|
||||
foreach (targetId in replicaChain.targets) {
|
||||
if (replicaChain.states[targetId] != PublicTargetState_SERVING &&
|
||||
replicaChain.states[targetId] != PublicTargetState_LASTSRV)
|
||||
{
|
||||
unavailableTargets[targetId] = replicaChain.states[targetId];
|
||||
}
|
||||
}
|
||||
|
||||
knownReplicaChains[replicaChain.vChainId.chainId] = replicaChain;
|
||||
print format("added a new chain: {0}, unavailableTargets: {1}", replicaChain, unavailableTargets);
|
||||
}
|
||||
}
|
||||
|
||||
if (sizeof(unavailableTargets) > 0) {
|
||||
goto SomeTargetsUnavailable;
|
||||
} else {
|
||||
goto AllTargetsAvailable;
|
||||
}
|
||||
}
|
||||
|
||||
fun onSyncDone(syncDoneResp: tSyncDoneResp) {
|
||||
// assert syncDoneResp.targetId in unavailableTargets,
|
||||
// format("sync target {0} not found in unavailableTargets: {1}", syncDoneResp, unavailableTargets);
|
||||
}
|
||||
|
||||
fun onSyncStart(syncStartReq: tSyncStartReq) {
|
||||
if (!(syncStartReq.targetId in syncWorkers)) {
|
||||
syncWorkers += (syncStartReq.targetId, default(set[machine]));
|
||||
}
|
||||
|
||||
syncWorkers[syncStartReq.targetId] += (syncStartReq.from);
|
||||
}
|
||||
|
||||
start cold state AllTargetsAvailable {
|
||||
on eNewRoutingInfo do checkForUnavailableTargets;
|
||||
|
||||
on eSyncDoneResp do onSyncDone;
|
||||
|
||||
on eSyncStartReq do onSyncStart;
|
||||
|
||||
on eStopMonitorTargetStates goto Done;
|
||||
}
|
||||
|
||||
hot state SomeTargetsUnavailable {
|
||||
entry {
|
||||
var replicaChain: tReplicaChain;
|
||||
|
||||
print format("unavailable targets: {0}, sync workers: {1}", unavailableTargets, syncWorkers);
|
||||
|
||||
// foreach (replicaChain in values(knownReplicaChains)) {
|
||||
// print format("known replica chain: {0}", replicaChain);
|
||||
// }
|
||||
}
|
||||
|
||||
on eNewRoutingInfo do checkForUnavailableTargets;
|
||||
|
||||
on eSyncDoneResp do onSyncDone;
|
||||
|
||||
on eSyncStartReq do onSyncStart;
|
||||
|
||||
on eStopMonitorTargetStates goto Done;
|
||||
}
|
||||
|
||||
cold state Done {
|
||||
ignore eNewRoutingInfo, eSyncStartReq, eSyncDoneResp;
|
||||
}
|
||||
}
|
||||
30
specs/DataStorage/PSrc/Common.p
Normal file
30
specs/DataStorage/PSrc/Common.p
Normal file
@@ -0,0 +1,30 @@
|
||||
fun Min(x: int, y: int): int{
|
||||
if (x < y)
|
||||
return x;
|
||||
else
|
||||
return y;
|
||||
}
|
||||
|
||||
fun Max(x: int, y: int): int{
|
||||
if (x > y)
|
||||
return x;
|
||||
else
|
||||
return y;
|
||||
}
|
||||
|
||||
fun BitwiseAnd(x: int, y: int): int {
|
||||
var n: int;
|
||||
var r: int;
|
||||
|
||||
n = 1;
|
||||
while (x > 0 && y > 0) {
|
||||
if (x % 2 > 0 && y % 2 > 0) {
|
||||
r = r + n;
|
||||
}
|
||||
x = x / 2;
|
||||
y = y / 2;
|
||||
n = n * 2;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
75
specs/DataStorage/PSrc/MgmtClient.p
Normal file
75
specs/DataStorage/PSrc/MgmtClient.p
Normal file
@@ -0,0 +1,75 @@
|
||||
type tHeartbeatConns = (mgmtClient: MgmtClient, mgmtService: MgmtService);
|
||||
event eSendHeartbeat: tHeartbeatConns;
|
||||
event eNewRoutingInfo: tRoutingInfo;
|
||||
|
||||
machine MgmtClient {
|
||||
var nodeId: tNodeId;
|
||||
var clientHost: machine;
|
||||
var mgmtService: MgmtService;
|
||||
var sendHeartbeats: bool;
|
||||
var timer: Timer;
|
||||
var nextRequestId: tRequestId;
|
||||
var routingInfo: tRoutingInfo;
|
||||
|
||||
fun newMessageTag(): tMessageTag {
|
||||
nextRequestId = nextRequestId + 1;
|
||||
return (nodeId = nodeId, requestId = nextRequestId);
|
||||
}
|
||||
|
||||
start state Init {
|
||||
entry (args: (nodeId: tNodeId, clientHost: machine, mgmtService: MgmtService, sendHeartbeats: bool)) {
|
||||
print format("{0} init: {1}", this, args);
|
||||
nodeId = args.nodeId;
|
||||
clientHost = args.clientHost;
|
||||
mgmtService = args.mgmtService;
|
||||
sendHeartbeats = args.sendHeartbeats;
|
||||
timer = CreateTimer(this);
|
||||
goto SendHeartbeats;
|
||||
}
|
||||
}
|
||||
|
||||
state SendHeartbeats {
|
||||
entry {
|
||||
if (sendHeartbeats) {
|
||||
print format("{0} of {1} sends heartbeat to {2}", this, clientHost, mgmtService);
|
||||
send clientHost, eSendHeartbeat, (mgmtClient = this, mgmtService = mgmtService);
|
||||
}
|
||||
send mgmtService, eGetRoutingInfoReq, (from = this, tag = newMessageTag(), routingVer = routingInfo.routingVer);
|
||||
StartTimer(timer);
|
||||
}
|
||||
|
||||
on eTimeOut goto SendHeartbeats;
|
||||
|
||||
on eShutDown goto Offline with (from: machine) {
|
||||
print format("{0} of node {1} is going to shutdown", this, nodeId);
|
||||
CancelTimer(timer);
|
||||
}
|
||||
|
||||
on eGetRoutingInfoResp do (getRoutingInfoResp: tGetRoutingInfoResp) {
|
||||
var latestRoutingInfo: tRoutingInfo;
|
||||
|
||||
latestRoutingInfo = getRoutingInfoResp.routingInfo;
|
||||
|
||||
if (getRoutingInfoResp.status == ErrorCode_SUCCESS &&
|
||||
routingInfo.routingVer < latestRoutingInfo.routingVer)
|
||||
{
|
||||
print format("{0}: routing info version {1} is greater than: {2}", this, latestRoutingInfo.routingVer, routingInfo.routingVer);
|
||||
routingInfo = latestRoutingInfo;
|
||||
send clientHost, eNewRoutingInfo, routingInfo;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state Offline {
|
||||
ignore eTimeOut, eShutDown, eGetRoutingInfoResp;
|
||||
|
||||
entry {
|
||||
print format("{0} #{1} is offline, client host: {2}", this, nodeId, clientHost);
|
||||
routingInfo = default(tRoutingInfo);
|
||||
}
|
||||
|
||||
on eRestart goto SendHeartbeats with (from: machine) {
|
||||
print format("{0} #{1} is restarted by {2}", this, nodeId, from);
|
||||
}
|
||||
}
|
||||
}
|
||||
623
specs/DataStorage/PSrc/MgmtService.p
Normal file
623
specs/DataStorage/PSrc/MgmtService.p
Normal file
@@ -0,0 +1,623 @@
|
||||
enum tPublicTargetState {
|
||||
PublicTargetState_INVALID = 0, // invalid state
|
||||
PublicTargetState_SERVING = 1, // online and serving client requests
|
||||
PublicTargetState_LASTSRV = 2, // offline but it was the last serving target
|
||||
PublicTargetState_SYNCING = 4, // online and syncing updates
|
||||
PublicTargetState_WAITING = 8, // online and waiting to join the chain
|
||||
PublicTargetState_OFFLINE = 16 // crashed or stopped
|
||||
}
|
||||
|
||||
fun IsActiveTargetState(targetState: tPublicTargetState): bool {
|
||||
return targetState == PublicTargetState_SERVING || targetState == PublicTargetState_SYNCING;
|
||||
}
|
||||
|
||||
fun AllPublicTargetStates(): map[tPublicTargetState, string] {
|
||||
var states: map[tPublicTargetState, string];
|
||||
states += (PublicTargetState_SERVING, "SERVING");
|
||||
states += (PublicTargetState_LASTSRV, "LASTSRV");
|
||||
states += (PublicTargetState_SYNCING, "SYNCING");
|
||||
states += (PublicTargetState_WAITING, "WAITING");
|
||||
states += (PublicTargetState_OFFLINE, "OFFLINE");
|
||||
return states;
|
||||
}
|
||||
|
||||
fun PublicTargetStateToString(x: int): string {
|
||||
var states: map[tPublicTargetState, string];
|
||||
var s: tPublicTargetState;
|
||||
var y: int;
|
||||
var str: string;
|
||||
|
||||
states = AllPublicTargetStates();
|
||||
y = x;
|
||||
|
||||
while (y > 0) {
|
||||
if (str != "") {
|
||||
str = str + "+";
|
||||
};
|
||||
|
||||
foreach (s in keys(states)) {
|
||||
if (BitwiseAnd(y, (s to int)) == (s to int)) {
|
||||
str = str + states[s];
|
||||
y = y - (s to int);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return str + format("({0})", x);
|
||||
}
|
||||
|
||||
fun PublicTargetStatesToString(targetStates: map[tTargetId, tPublicTargetState]): string {
|
||||
var targetId: tTargetId;
|
||||
var str: string;
|
||||
|
||||
foreach (targetId in keys(targetStates)) {
|
||||
if (str != "") str = str + ", ";
|
||||
str = str + format("<{0}->{1}>", targetId, PublicTargetStateToString(targetStates[targetId] to int));
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
type tLocalTargetMap = map[tTargetId, StorageTarget];
|
||||
type tGlobalTargetMap = map[tNodeId, tLocalTargetMap];
|
||||
type tReplicaChainMap = map[tChainId, tReplicaChain];
|
||||
type tStorageClientMap = map[tNodeId, StorageClient];
|
||||
type tStorageServiceMap = map[tNodeId, StorageService];
|
||||
|
||||
type tRoutingVer = int;
|
||||
type tRoutingInfo = (routingVer: tRoutingVer, replicaChains: tReplicaChainMap, storageServices: tStorageServiceMap, offlineServices: set[tNodeId]);
|
||||
|
||||
type tGetRoutingInfoReq = (from: machine, tag: tMessageTag, routingVer: tRoutingVer);
|
||||
type tGetRoutingInfoResp = (tag: tMessageTag, status: tErrorCode, routingInfo: tRoutingInfo);
|
||||
event eGetRoutingInfoReq : tGetRoutingInfoReq;
|
||||
event eGetRoutingInfoResp : tGetRoutingInfoResp;
|
||||
|
||||
type tUpdateTargetStateMsg = (from: machine, tag: tMessageTag, routingVer: tRoutingVer, nodeId: tNodeId, targetStates: tLocalTargetStateMap, localTargets: tLocalTargetMap, storageService: StorageService);
|
||||
event eUpdateTargetStateMsg : tUpdateTargetStateMsg;
|
||||
|
||||
type tRegisterClientMsg = (from: machine, nodeId: tNodeId, storageClient: StorageClient);
|
||||
event eRegisterClientMsg : tRegisterClientMsg;
|
||||
|
||||
event eStopFindNewFailures : int;
|
||||
|
||||
event eStartNextHeartbeatRound;
|
||||
|
||||
// DONE: remove failed storage targets from replication chains
|
||||
// DONE: re-send pending write requests to successor
|
||||
// DONE: let failed targets resync and return
|
||||
// TODO: allow targets moved from one node to another
|
||||
// TODO: leader election among multiple mgmt services
|
||||
// TODO: create C++ interfaces from the spec
|
||||
|
||||
machine MgmtService {
|
||||
var nodeId: tNodeId;
|
||||
var nextRequestId: tRequestId;
|
||||
var routingVer: tRoutingVer;
|
||||
|
||||
var numStorageServices: int;
|
||||
// var mgmtClients: set[machine];
|
||||
var fullReplicaChains: tReplicaChainMap;
|
||||
// var knownStorageClients: tStorageClientMap;
|
||||
var knownStorageServices: tStorageServiceMap;
|
||||
var nodeTargetStates: map[tNodeId, tLocalTargetStateMap];
|
||||
var storageTargets: map[tTargetId, StorageTarget]; // for debug only
|
||||
var delayedRoutingReqs: map[(machine, tRoutingVer), tGetRoutingInfoReq];
|
||||
|
||||
// num of ping attempts made
|
||||
var numAttempts: int;
|
||||
var maxAttempts: int;
|
||||
var stopFindNewFailures: int;
|
||||
// set of offline storage services
|
||||
var offlineStorageServices: set[tNodeId];
|
||||
// nodes that have responded in the current round
|
||||
var aliveStorageServices: set[tNodeId];
|
||||
// timer to wait for responses from nodes
|
||||
var timer: Timer;
|
||||
|
||||
fun newMessageTag(): tMessageTag {
|
||||
nextRequestId = nextRequestId + 1;
|
||||
return (nodeId = nodeId, requestId = nextRequestId);
|
||||
}
|
||||
|
||||
// fun registerClient(registerClientMsg: tRegisterClientMsg) {
|
||||
// var nodeId: tNodeId;
|
||||
// var storageClient: StorageClient;
|
||||
|
||||
// nodeId = registerClientMsg.nodeId;
|
||||
// storageClient = registerClientMsg.storageClient;
|
||||
|
||||
// assert !(nodeId in knownStorageClients && knownStorageClients[nodeId] != storageClient);
|
||||
|
||||
// knownStorageClients[nodeId] = storageClient;
|
||||
// mgmtClients += (registerClientMsg.from);
|
||||
|
||||
// print format("added client {0}", nodeId);
|
||||
// }
|
||||
|
||||
fun updateLocalTargetState(nodeId: tNodeId, localTargetStates: tLocalTargetStateMap, localTargets: tLocalTargetMap) {
|
||||
var targetId: tTargetId;
|
||||
|
||||
if (!(nodeId in nodeTargetStates)) {
|
||||
nodeTargetStates += (nodeId, default(tLocalTargetStateMap));
|
||||
}
|
||||
|
||||
foreach (targetId in keys(nodeTargetStates[nodeId])) {
|
||||
if (!(targetId in localTargetStates)) {
|
||||
nodeTargetStates[nodeId] -= (targetId);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (targetId in keys(localTargetStates)) {
|
||||
nodeTargetStates[nodeId][targetId] = localTargetStates[targetId];
|
||||
storageTargets[targetId] = localTargets[targetId];
|
||||
}
|
||||
}
|
||||
|
||||
fun setLocalTargetState(nodeId: tNodeId, targetState: tLocalTargetState) {
|
||||
var targetId: tTargetId;
|
||||
|
||||
if (!(nodeId in nodeTargetStates)) {
|
||||
nodeTargetStates += (nodeId, default(tLocalTargetStateMap));
|
||||
}
|
||||
|
||||
foreach (targetId in keys(nodeTargetStates[nodeId])) {
|
||||
nodeTargetStates[nodeId][targetId] = targetState;
|
||||
}
|
||||
}
|
||||
|
||||
fun processUpdateTargetStateMsg(updateTargetStateMsg: tUpdateTargetStateMsg) {
|
||||
// mgmtClients += (updateTargetStateMsg.from);
|
||||
updateLocalTargetState(updateTargetStateMsg.nodeId, updateTargetStateMsg.targetStates, updateTargetStateMsg.localTargets);
|
||||
}
|
||||
|
||||
fun appendTargetToChain(replicaChain: tReplicaChain, targetId: tTargetId, nodeId: tNodeId, targetState: tPublicTargetState): tReplicaChain {
|
||||
if (targetId in replicaChain.targets)
|
||||
return replicaChain;
|
||||
|
||||
replicaChain.targets += (sizeof(replicaChain.targets), targetId);
|
||||
replicaChain.states += (targetId, targetState);
|
||||
replicaChain.nodes += (targetId, nodeId);
|
||||
replicaChain.services += (targetId, knownStorageServices[nodeId]);
|
||||
|
||||
return replicaChain;
|
||||
}
|
||||
|
||||
fun extendChain(chain: tReplicaChain, other: tReplicaChain): tReplicaChain {
|
||||
var targetId: tTargetId;
|
||||
|
||||
foreach (targetId in other.targets) {
|
||||
if (targetId in chain.targets) continue;
|
||||
chain = appendTargetToChain(chain, targetId, other.nodes[targetId], other.states[targetId]);
|
||||
}
|
||||
|
||||
return chain;
|
||||
}
|
||||
|
||||
fun updatePublicTargetState(
|
||||
replicaChain: tReplicaChain,
|
||||
chainId: tChainId,
|
||||
targetId: tTargetId,
|
||||
expectedLocalState: tLocalTargetState,
|
||||
fromPublicState: int,
|
||||
toPublicState: tPublicTargetState): tReplicaChain
|
||||
{
|
||||
var nodeId: tNodeId;
|
||||
var currentLocalState: tLocalTargetState;
|
||||
var currentPublicState: tPublicTargetState;
|
||||
|
||||
nodeId = fullReplicaChains[chainId].nodes[targetId];
|
||||
currentLocalState = nodeTargetStates[nodeId][targetId];
|
||||
currentPublicState = fullReplicaChains[chainId].states[targetId];
|
||||
|
||||
if (currentLocalState == expectedLocalState) {
|
||||
|
||||
if (BitwiseAnd(currentPublicState to int, fromPublicState to int) > 0) {
|
||||
replicaChain = appendTargetToChain(replicaChain, targetId, nodeId, toPublicState);
|
||||
|
||||
if (fullReplicaChains[chainId].states[targetId] != toPublicState) {
|
||||
replicaChain.vChainId.chainVer = replicaChain.vChainId.chainVer + 1;
|
||||
routingVer = routingVer + 1;
|
||||
|
||||
print format("chain {0}, {1} #{2}: public state updated {3} ==> {4}, local state: {5}, routing version: {6}",
|
||||
replicaChain.vChainId, storageTargets[targetId], targetId,
|
||||
PublicTargetStateToString(fromPublicState to int),
|
||||
PublicTargetStateToString(toPublicState to int),
|
||||
LocalTargetStateToString(currentLocalState),
|
||||
routingVer);
|
||||
} else {
|
||||
print format("chain {0}, {1} #{2}: public state untouched, from state {3}, to state {4}, local state: {5}",
|
||||
chainId, storageTargets[targetId], targetId,
|
||||
PublicTargetStateToString(fromPublicState to int),
|
||||
PublicTargetStateToString(toPublicState to int),
|
||||
LocalTargetStateToString(expectedLocalState));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return replicaChain;
|
||||
}
|
||||
|
||||
/* transitions of public target states
|
||||
|
||||
<up-to-date>
|
||||
serving syncing waiting lastsrv offline
|
||||
serving y
|
||||
syncing y
|
||||
waiting y
|
||||
lastsrv y
|
||||
offline y
|
||||
|
||||
<online>
|
||||
serving syncing waiting lastsrv offline
|
||||
serving y
|
||||
syncing c c
|
||||
waiting c c
|
||||
lastsrv y
|
||||
offline y
|
||||
|
||||
<offline>
|
||||
serving syncing waiting lastsrv offline
|
||||
serving c c
|
||||
syncing y
|
||||
waiting y
|
||||
lastsrv y
|
||||
offline y
|
||||
|
||||
*/
|
||||
|
||||
fun updateOneReplicaChain(chainId: tChainId): tReplicaChain {
|
||||
var states: map[tPublicTargetState, string];
|
||||
var targetsGroupbyState: map[tPublicTargetState, tReplicaChain];
|
||||
var updatedReplicaChain: tReplicaChain;
|
||||
var targetId: tTargetId;
|
||||
var targetState: tPublicTargetState;
|
||||
|
||||
states = AllPublicTargetStates();
|
||||
|
||||
foreach (targetState in keys(states)) {
|
||||
targetsGroupbyState[targetState] = default(tReplicaChain);
|
||||
targetsGroupbyState[targetState].vChainId.chainId = chainId;
|
||||
}
|
||||
|
||||
// state transitions to serving
|
||||
|
||||
foreach (targetId in fullReplicaChains[chainId].targets) {
|
||||
targetsGroupbyState[PublicTargetState_SERVING] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_SERVING],
|
||||
chainId, targetId,
|
||||
LocalTargetState_UPTODATE,
|
||||
(PublicTargetState_SERVING to int) + (PublicTargetState_SYNCING to int) + (PublicTargetState_LASTSRV to int),
|
||||
PublicTargetState_SERVING);
|
||||
}
|
||||
|
||||
foreach (targetId in fullReplicaChains[chainId].targets) {
|
||||
targetsGroupbyState[PublicTargetState_SERVING] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_SERVING],
|
||||
chainId, targetId,
|
||||
LocalTargetState_ONLINE,
|
||||
(PublicTargetState_SERVING to int) + (PublicTargetState_LASTSRV to int),
|
||||
PublicTargetState_SERVING);
|
||||
}
|
||||
|
||||
// state transitions to lastsrv
|
||||
|
||||
foreach (targetId in fullReplicaChains[chainId].targets) {
|
||||
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) == 0 &&
|
||||
sizeof(targetsGroupbyState[PublicTargetState_LASTSRV].targets) == 0) {
|
||||
targetsGroupbyState[PublicTargetState_LASTSRV] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_LASTSRV],
|
||||
chainId, targetId,
|
||||
LocalTargetState_OFFLINE,
|
||||
PublicTargetState_SERVING to int,
|
||||
PublicTargetState_LASTSRV);
|
||||
}
|
||||
|
||||
targetsGroupbyState[PublicTargetState_LASTSRV] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_LASTSRV],
|
||||
chainId, targetId,
|
||||
LocalTargetState_OFFLINE,
|
||||
PublicTargetState_LASTSRV to int,
|
||||
PublicTargetState_LASTSRV);
|
||||
}
|
||||
|
||||
// state transitions to syncing
|
||||
|
||||
foreach (targetId in fullReplicaChains[chainId].targets) {
|
||||
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) > 0) {
|
||||
targetsGroupbyState[PublicTargetState_SYNCING] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_SYNCING],
|
||||
chainId, targetId,
|
||||
LocalTargetState_ONLINE,
|
||||
PublicTargetState_SYNCING to int,
|
||||
PublicTargetState_SYNCING);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (targetId in fullReplicaChains[chainId].targets) {
|
||||
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) > 0 &&
|
||||
sizeof(targetsGroupbyState[PublicTargetState_SYNCING].targets) == 0) {
|
||||
targetsGroupbyState[PublicTargetState_SYNCING] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_SYNCING],
|
||||
chainId, targetId,
|
||||
LocalTargetState_ONLINE,
|
||||
PublicTargetState_WAITING to int,
|
||||
PublicTargetState_SYNCING);
|
||||
}
|
||||
}
|
||||
|
||||
// state transitions to waiting
|
||||
|
||||
foreach (targetId in fullReplicaChains[chainId].targets) {
|
||||
if (sizeof(targetsGroupbyState[PublicTargetState_SERVING].targets) == 0) {
|
||||
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_WAITING],
|
||||
chainId, targetId,
|
||||
LocalTargetState_ONLINE,
|
||||
PublicTargetState_SYNCING to int,
|
||||
PublicTargetState_WAITING);
|
||||
}
|
||||
|
||||
if (!(targetId in targetsGroupbyState[PublicTargetState_SYNCING].targets)) {
|
||||
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_WAITING],
|
||||
chainId, targetId,
|
||||
LocalTargetState_ONLINE,
|
||||
PublicTargetState_WAITING to int,
|
||||
PublicTargetState_WAITING);
|
||||
}
|
||||
|
||||
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_WAITING],
|
||||
chainId, targetId,
|
||||
LocalTargetState_UPTODATE,
|
||||
(PublicTargetState_OFFLINE to int) + (PublicTargetState_WAITING to int),
|
||||
PublicTargetState_WAITING);
|
||||
|
||||
targetsGroupbyState[PublicTargetState_WAITING] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_WAITING],
|
||||
chainId, targetId,
|
||||
LocalTargetState_ONLINE,
|
||||
PublicTargetState_OFFLINE to int,
|
||||
PublicTargetState_WAITING);
|
||||
}
|
||||
|
||||
// state transitions to offline
|
||||
|
||||
foreach (targetId in fullReplicaChains[chainId].targets) {
|
||||
if (!(targetId in targetsGroupbyState[PublicTargetState_LASTSRV].targets)) {
|
||||
targetsGroupbyState[PublicTargetState_OFFLINE] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_OFFLINE],
|
||||
chainId, targetId,
|
||||
LocalTargetState_OFFLINE,
|
||||
PublicTargetState_SERVING to int,
|
||||
PublicTargetState_OFFLINE);
|
||||
}
|
||||
|
||||
targetsGroupbyState[PublicTargetState_OFFLINE] = updatePublicTargetState(
|
||||
targetsGroupbyState[PublicTargetState_OFFLINE],
|
||||
chainId, targetId,
|
||||
LocalTargetState_OFFLINE,
|
||||
(PublicTargetState_SYNCING to int) + (PublicTargetState_WAITING to int) + (PublicTargetState_OFFLINE to int),
|
||||
PublicTargetState_OFFLINE);
|
||||
}
|
||||
|
||||
// print format("chain {0}, targets group by state: {1}", chainId, targetsGroupbyState);
|
||||
|
||||
updatedReplicaChain.vChainId = fullReplicaChains[chainId].vChainId;
|
||||
|
||||
foreach (targetState in keys(states)) {
|
||||
updatedReplicaChain = extendChain(updatedReplicaChain, targetsGroupbyState[targetState]);
|
||||
updatedReplicaChain.vChainId.chainVer = updatedReplicaChain.vChainId.chainVer + targetsGroupbyState[targetState].vChainId.chainVer;
|
||||
}
|
||||
|
||||
return updatedReplicaChain;
|
||||
}
|
||||
|
||||
fun updateRoutingInfo() {
|
||||
var updatedReplicaChain: tReplicaChain;
|
||||
var localTargetStates: map[tTargetId, tLocalTargetState];
|
||||
var chainId: tChainId;
|
||||
var targetId: tTargetId;
|
||||
var prevRoutingVer: tRoutingVer;
|
||||
|
||||
prevRoutingVer = routingVer;
|
||||
|
||||
foreach (chainId in keys(fullReplicaChains)) {
|
||||
localTargetStates = default(map[tTargetId, tLocalTargetState]);
|
||||
|
||||
foreach (targetId in fullReplicaChains[chainId].targets) {
|
||||
nodeId = fullReplicaChains[chainId].nodes[targetId];
|
||||
localTargetStates += (targetId, nodeTargetStates[nodeId][targetId]);
|
||||
}
|
||||
|
||||
print format("start to update chain {0}, public states: {1}, local states: {2}",
|
||||
fullReplicaChains[chainId].vChainId,
|
||||
PublicTargetStatesToString(fullReplicaChains[chainId].states),
|
||||
LocalTargetStatesToString(localTargetStates));
|
||||
|
||||
updatedReplicaChain = updateOneReplicaChain(chainId);
|
||||
|
||||
if (updatedReplicaChain.vChainId != fullReplicaChains[chainId].vChainId) {
|
||||
print format("replication chain updated: {0}, updated states: {1}, services: {2}",
|
||||
updatedReplicaChain.vChainId,
|
||||
PublicTargetStatesToString(updatedReplicaChain.states),
|
||||
updatedReplicaChain.services);
|
||||
}
|
||||
|
||||
assert PublicTargetState_SERVING in values(updatedReplicaChain.states) ||
|
||||
PublicTargetState_LASTSRV in values(updatedReplicaChain.states),
|
||||
format("no serving target: {0}", ReplicaChainToString(updatedReplicaChain));
|
||||
assert sizeof(updatedReplicaChain.targets) == sizeof(fullReplicaChains[chainId].targets),
|
||||
format("updated chain {0} has different number of targets {1} than the old chain {2}",
|
||||
chainId, updatedReplicaChain.targets, fullReplicaChains[chainId].targets);
|
||||
|
||||
fullReplicaChains[chainId] = updatedReplicaChain;
|
||||
}
|
||||
|
||||
if (routingVer != prevRoutingVer) {
|
||||
print format("routing info updated to version {0}, process delayed routing queries: {1}", routingVer, delayedRoutingReqs);
|
||||
processDelayedRoutingReqs();
|
||||
}
|
||||
}
|
||||
|
||||
fun processDelayedRoutingReqs() {
|
||||
var getRoutingInfo: tGetRoutingInfoReq;
|
||||
|
||||
foreach (getRoutingInfo in values(delayedRoutingReqs)) {
|
||||
delayedRoutingReqs -= (getRoutingInfo.from, getRoutingInfo.routingVer);
|
||||
replyWithRoutingInfo(getRoutingInfo);
|
||||
}
|
||||
}
|
||||
|
||||
fun replyWithRoutingInfo(getRoutingInfo: tGetRoutingInfoReq) {
|
||||
var routingInfo: tRoutingInfo;
|
||||
|
||||
if (getRoutingInfo.routingVer == routingVer) {
|
||||
if (!((getRoutingInfo.from, getRoutingInfo.routingVer) in delayedRoutingReqs))
|
||||
delayedRoutingReqs += ((getRoutingInfo.from, getRoutingInfo.routingVer), getRoutingInfo);
|
||||
return;
|
||||
}
|
||||
|
||||
routingInfo = (
|
||||
routingVer = routingVer,
|
||||
replicaChains = fullReplicaChains,
|
||||
storageServices = knownStorageServices,
|
||||
offlineServices = offlineStorageServices);
|
||||
send getRoutingInfo.from, eGetRoutingInfoResp, (tag = getRoutingInfo.tag, status = ErrorCode_SUCCESS, routingInfo = routingInfo);
|
||||
}
|
||||
|
||||
fun computeOfflineStorageServices() : set[tNodeId] {
|
||||
var nodeId: tNodeId;
|
||||
var servicesOffline: set[tNodeId];
|
||||
|
||||
if (stopFindNewFailures == 2) {
|
||||
return servicesOffline;
|
||||
} else if (stopFindNewFailures == 1 && sizeof(knownStorageServices) == sizeof(aliveStorageServices)) {
|
||||
// wait until all storage services are alive and then stop finding new failures
|
||||
stopFindNewFailures = 2;
|
||||
return servicesOffline;
|
||||
}
|
||||
|
||||
foreach (nodeId in keys(knownStorageServices)) {
|
||||
if (!(nodeId in aliveStorageServices)) {
|
||||
servicesOffline += (nodeId);
|
||||
}
|
||||
}
|
||||
|
||||
return servicesOffline;
|
||||
}
|
||||
|
||||
start state Init {
|
||||
entry (args: (nodeId: tNodeId, maxAttempts: int, numStorageServices: int, replicaChains: tReplicaChainMap)) {
|
||||
nodeId = args.nodeId;
|
||||
numStorageServices = args.numStorageServices;
|
||||
fullReplicaChains = args.replicaChains;
|
||||
routingVer = 10001;
|
||||
maxAttempts = args.maxAttempts;
|
||||
timer = CreateTimer(this);
|
||||
goto Bootstrap;
|
||||
}
|
||||
}
|
||||
|
||||
state Bootstrap {
|
||||
defer eGetRoutingInfoReq, eRegisterClientMsg;
|
||||
|
||||
on eUpdateTargetStateMsg do (updateTargetStateMsg: tUpdateTargetStateMsg) {
|
||||
var nodeId: tNodeId;
|
||||
|
||||
processUpdateTargetStateMsg(updateTargetStateMsg);
|
||||
|
||||
knownStorageServices[updateTargetStateMsg.nodeId] = updateTargetStateMsg.storageService;
|
||||
|
||||
if (sizeof(knownStorageServices) == numStorageServices) {
|
||||
foreach(nodeId in keys(knownStorageServices)) {
|
||||
aliveStorageServices += (nodeId);
|
||||
}
|
||||
|
||||
updateRoutingInfo();
|
||||
|
||||
print format("mgmt service started");
|
||||
|
||||
goto WaitForHeartbeats;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state WaitForHeartbeats {
|
||||
entry {
|
||||
// start wait timer to wait for responses
|
||||
StartTimer(timer);
|
||||
}
|
||||
|
||||
on eGetRoutingInfoReq do replyWithRoutingInfo;
|
||||
|
||||
// on eRegisterClientMsg do registerClient;
|
||||
|
||||
on eStopFindNewFailures do (value: int) {
|
||||
stopFindNewFailures = value;
|
||||
}
|
||||
|
||||
on eUpdateTargetStateMsg do (updateTargetStateMsg: tUpdateTargetStateMsg) {
|
||||
if (updateTargetStateMsg.routingVer < routingVer) {
|
||||
print format("#{0}: ignore stale heartbeat (routingVer < {1}): {2} ", numAttempts, routingVer, updateTargetStateMsg);
|
||||
return;
|
||||
}
|
||||
|
||||
processUpdateTargetStateMsg(updateTargetStateMsg);
|
||||
aliveStorageServices += (updateTargetStateMsg.nodeId);
|
||||
print format("#{0}: {1} added to aliveStorageServices {2}", numAttempts, updateTargetStateMsg.nodeId, aliveStorageServices);
|
||||
}
|
||||
|
||||
on eTimeOut do {
|
||||
var nodeId: tNodeId;
|
||||
|
||||
// one more attempt finished
|
||||
numAttempts = numAttempts + 1;
|
||||
print format("#{0}: aliveStorageServices: {1}", numAttempts, aliveStorageServices);
|
||||
|
||||
if (numAttempts < maxAttempts) {
|
||||
// send this, eStartNextHeartbeatRound;
|
||||
StartTimer(timer);
|
||||
return;
|
||||
}
|
||||
|
||||
// set storage targets to offline state
|
||||
offlineStorageServices = computeOfflineStorageServices();
|
||||
|
||||
foreach (nodeId in offlineStorageServices) {
|
||||
print format("detected node {0} {1} is down, set its targets offline: {2}",
|
||||
nodeId, knownStorageServices[nodeId], keys(nodeTargetStates[nodeId]));
|
||||
setLocalTargetState(nodeId, LocalTargetState_OFFLINE);
|
||||
}
|
||||
|
||||
updateRoutingInfo();
|
||||
|
||||
// lets reset and restart the failure detection
|
||||
aliveStorageServices = default(set[tNodeId]);
|
||||
numAttempts = 0;
|
||||
StartTimer(timer);
|
||||
// send this, eStartNextHeartbeatRound;
|
||||
}
|
||||
|
||||
// on eStartNextHeartbeatRound goto WaitForHeartbeats;
|
||||
|
||||
on eShutDown goto Offline with (from: machine) {
|
||||
print format("{0} is going to shutdown", this);
|
||||
send from, eStopped, this;
|
||||
}
|
||||
}
|
||||
|
||||
state Offline {
|
||||
// detection has finish, these are all delayed responses and must be ignored
|
||||
ignore eGetRoutingInfoReq, eUpdateTargetStateMsg, eRegisterClientMsg, eTimeOut, eStartNextHeartbeatRound;
|
||||
|
||||
entry {
|
||||
var client: machine;
|
||||
var service: StorageService;
|
||||
|
||||
print format("stop failure detection");
|
||||
CancelTimer(timer);
|
||||
}
|
||||
}
|
||||
}
|
||||
315
specs/DataStorage/PSrc/StorageClient.p
Normal file
315
specs/DataStorage/PSrc/StorageClient.p
Normal file
@@ -0,0 +1,315 @@
|
||||
/* Storage Client */
|
||||
|
||||
type tWriteArgs = (from: machine, chunkId: tChunkId, offset: int, length: int, dataBytes: tBytes);
|
||||
type tReadArgs = (from: machine, chunkId: tChunkId, offset: int, length: int);
|
||||
type tWriteRes = (status: tErrorCode, chunkId: tChunkId, commitVer: tChunkVer);
|
||||
type tReadRes = (status: tErrorCode, chunkId: tChunkId, chunkMetadata: tChunkMetadata, dataBytes: tBytes);
|
||||
|
||||
event eSubmitWrite : tWriteArgs;
|
||||
event eSubmitRead : tReadArgs;
|
||||
event eWriteComplete : tWriteRes;
|
||||
event eReadComplete : tReadRes;
|
||||
event eWaitConnected : machine;
|
||||
event eClientConnected;
|
||||
|
||||
machine StorageClient {
|
||||
var clientId: tNodeId;
|
||||
var mgmtService: MgmtService;
|
||||
var mgmtClient: MgmtClient;
|
||||
// var timer: Timer;
|
||||
|
||||
var routingVer: tRoutingVer;
|
||||
var replicaChains: tReplicaChainMap;
|
||||
|
||||
var nextRequestId: tRequestId;
|
||||
|
||||
var clientUsers: set[machine];
|
||||
var submittedWrites: map[tMessageTag, tWriteArgs];
|
||||
var submittedReads: map[tMessageTag, tReadArgs];
|
||||
var inflightWriteReqs: map[tMessageTag, tWriteReq];
|
||||
var inflightReadReqs: map[tMessageTag, tReadReq];
|
||||
|
||||
fun newMessageTag(): tMessageTag {
|
||||
nextRequestId = nextRequestId + 1;
|
||||
return (nodeId = clientId, requestId = nextRequestId);
|
||||
}
|
||||
|
||||
fun calcGlobalKeyFromChunkId(chunkId: tChainId): tGlobalKey {
|
||||
var chainIds: seq[tChainId];
|
||||
var targetChain: tChainId;
|
||||
var replicaChain: tReplicaChain;
|
||||
|
||||
chainIds = keys(replicaChains);
|
||||
targetChain = chainIds[chunkId % sizeof(chainIds)];
|
||||
replicaChain = replicaChains[targetChain];
|
||||
|
||||
return (vChainId = replicaChain.vChainId, chunkId = chunkId);
|
||||
}
|
||||
|
||||
fun processRoutingInfo(routingInfo: tRoutingInfo) {
|
||||
var newRoutingVer: tRoutingVer;
|
||||
var newReplicaChains: tReplicaChainMap;
|
||||
var replicaChain: tReplicaChain;
|
||||
var targetId: tTargetId;
|
||||
var chainId: tChainId;
|
||||
var nodeId: tNodeId;
|
||||
var services: seq[StorageService];
|
||||
|
||||
newRoutingVer = routingInfo.routingVer;
|
||||
newReplicaChains = routingInfo.replicaChains;
|
||||
|
||||
if (routingVer > newRoutingVer) {
|
||||
print format("{0}: error: routingVer {1} > newRoutingVer {2}", this, routingVer, newRoutingVer);
|
||||
return;
|
||||
} else if (routingVer == newRoutingVer) {
|
||||
print format("{0}: ignore: routingVer {1} == newRoutingVer {2}", this, routingVer, newRoutingVer);
|
||||
return;
|
||||
}
|
||||
|
||||
print format("{0}: updating replica chains from version {1} to {2}", this, routingVer, newRoutingVer);
|
||||
routingVer = newRoutingVer;
|
||||
|
||||
foreach (chainId in keys(replicaChains)) {
|
||||
if (!(chainId in newReplicaChains))
|
||||
replicaChains -= (chainId);
|
||||
}
|
||||
|
||||
foreach (chainId in keys(newReplicaChains)) {
|
||||
replicaChains[chainId] = newReplicaChains[chainId];
|
||||
replicaChain = replicaChains[chainId];
|
||||
|
||||
print format("{0}: new replica chain {1}, targets: {2}, services: {3}",
|
||||
this, newReplicaChains[chainId].vChainId, newReplicaChains[chainId].targets, newReplicaChains[chainId].services);
|
||||
}
|
||||
}
|
||||
|
||||
// fun onSendHeartbeatEvent(heartbeatConns: tHeartbeatConns) {
|
||||
// send heartbeatConns.mgmtService, eRegisterClientMsg, (from = heartbeatConns.mgmtClient, nodeId = clientId, storageClient = this);
|
||||
// }
|
||||
|
||||
fun chooseServingTarget(replicaChain: tReplicaChain): tTargetId {
|
||||
var targetId: tTargetId;
|
||||
var servingTargetIds: set[tTargetId];
|
||||
|
||||
targetId = replicaChain.targets[0];
|
||||
|
||||
if (replicaChain.states[targetId] == PublicTargetState_SERVING) {
|
||||
return targetId;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
fun sendWriteReq(writeReq: tWriteReq) {
|
||||
var replicaChain: tReplicaChain;
|
||||
var targetId: tTargetId;
|
||||
var targetService: StorageService;
|
||||
|
||||
// get the latest chain and update versioned chain id
|
||||
replicaChain = replicaChains[writeReq.key.vChainId.chainId];
|
||||
writeReq.key.vChainId = replicaChain.vChainId;
|
||||
|
||||
targetId = chooseServingTarget(replicaChain);
|
||||
if (targetId > 0) {
|
||||
print format("{0}: send write request #{1}: {2}", this, writeReq.retries, writeReq);
|
||||
targetService = replicaChain.services[targetId];
|
||||
send targetService, eWriteReq, writeReq;
|
||||
}
|
||||
}
|
||||
|
||||
fun reissueWriteReq(reqTag: tMessageTag) {
|
||||
inflightWriteReqs[reqTag].retries = inflightWriteReqs[reqTag].retries + 1;
|
||||
sendWriteReq(inflightWriteReqs[reqTag]);
|
||||
}
|
||||
|
||||
fun sendReadReq(readReq: tReadReq) {
|
||||
var replicaChain: tReplicaChain;
|
||||
var targetId: tTargetId;
|
||||
var targetService: StorageService;
|
||||
|
||||
// get the latest chain and update versioned chain id
|
||||
replicaChain = replicaChains[readReq.key.vChainId.chainId];
|
||||
readReq.key.vChainId = replicaChain.vChainId;
|
||||
|
||||
targetId = chooseServingTarget(replicaChain);
|
||||
if (targetId > 0) {
|
||||
print format("{0}: send read request #{1}: {2}", this, readReq.retries, readReq);
|
||||
targetService = replicaChain.services[targetId];
|
||||
send targetService, eReadReq, readReq;
|
||||
}
|
||||
}
|
||||
|
||||
fun reissueReadReq(reqTag: tMessageTag) {
|
||||
sendReadReq(inflightReadReqs[reqTag]);
|
||||
inflightReadReqs[reqTag].retries = inflightReadReqs[reqTag].retries + 1;
|
||||
}
|
||||
|
||||
fun processInflightWriteReqs() {
|
||||
var oldChainId: tVersionedChainId;
|
||||
var newChainId: tVersionedChainId;
|
||||
var writeReq: tWriteReq;
|
||||
|
||||
foreach (writeReq in values(inflightWriteReqs)) {
|
||||
oldChainId = writeReq.key.vChainId;
|
||||
newChainId = replicaChains[oldChainId.chainId].vChainId;
|
||||
if (oldChainId != newChainId) {
|
||||
print format("{0}: chain version updated: {1} --> {2}, reissuing request {3}", this, oldChainId, newChainId, writeReq);
|
||||
reissueWriteReq(writeReq.tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun processInflightReadReqs() {
|
||||
var oldChainId: tVersionedChainId;
|
||||
var newChainId: tVersionedChainId;
|
||||
var readReq: tReadReq;
|
||||
|
||||
foreach (readReq in values(inflightReadReqs)) {
|
||||
oldChainId = readReq.key.vChainId;
|
||||
newChainId = replicaChains[oldChainId.chainId].vChainId;
|
||||
if (oldChainId != newChainId) {
|
||||
print format("{0}: chain version updated: {1} --> {2}, reissuing request {3}", this, oldChainId, newChainId, readReq);
|
||||
reissueReadReq(readReq.tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
start state Init {
|
||||
ignore eSendHeartbeat;
|
||||
|
||||
entry (args: (clientId: tNodeId, mgmtService: MgmtService)) {
|
||||
clientId = args.clientId;
|
||||
mgmtService = args.mgmtService;
|
||||
mgmtClient = new MgmtClient((nodeId = clientId, clientHost = this, mgmtService = mgmtService, sendHeartbeats = false));
|
||||
// timer = new Timer(this);
|
||||
}
|
||||
|
||||
on eWaitConnected do (user: machine) {
|
||||
clientUsers += (user);
|
||||
}
|
||||
|
||||
// on eSendHeartbeat do onSendHeartbeatEvent;
|
||||
|
||||
on eNewRoutingInfo goto WaitForReqs with processRoutingInfo;
|
||||
}
|
||||
|
||||
state WaitForReqs {
|
||||
ignore eSendHeartbeat;
|
||||
|
||||
entry {
|
||||
var user: machine;
|
||||
foreach (user in clientUsers) {
|
||||
send user, eClientConnected;
|
||||
}
|
||||
// StartTimer(timer);
|
||||
}
|
||||
|
||||
on eWaitConnected do (user: machine) {
|
||||
send user, eClientConnected;
|
||||
}
|
||||
|
||||
on eShutDown goto Stopped with (from: machine) {
|
||||
print format("{0} is going to shutdown", this);
|
||||
send mgmtClient, eShutDown, this;
|
||||
}
|
||||
|
||||
// on eSendHeartbeat do onSendHeartbeatEvent;
|
||||
|
||||
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
|
||||
processRoutingInfo(routingInfo);
|
||||
processInflightWriteReqs();
|
||||
processInflightReadReqs();
|
||||
}
|
||||
|
||||
// on eTimeOut do {
|
||||
// processInflightWriteReqs();
|
||||
// processInflightReadReqs();
|
||||
// StartTimer(timer);
|
||||
// }
|
||||
|
||||
on eSubmitWrite do (writeArgs: tWriteArgs) {
|
||||
var writeReq: tWriteReq;
|
||||
|
||||
writeReq = (from = this,
|
||||
retries = 1,
|
||||
tag = newMessageTag(),
|
||||
key = calcGlobalKeyFromChunkId(writeArgs.chunkId),
|
||||
updateVer = 0,
|
||||
commitChainVer = 0,
|
||||
fullChunkReplace = false,
|
||||
removeChunk = writeArgs.dataBytes == default(tBytes),
|
||||
fromClient = true,
|
||||
offset = writeArgs.offset, length = writeArgs.length,
|
||||
dataBytes = writeArgs.dataBytes);
|
||||
|
||||
sendWriteReq(writeReq);
|
||||
|
||||
submittedWrites += (writeReq.tag, writeArgs);
|
||||
inflightWriteReqs += (writeReq.tag, writeReq);
|
||||
}
|
||||
|
||||
on eSubmitRead do (readArgs: tReadArgs) {
|
||||
var readReq: tReadReq;
|
||||
|
||||
readReq = (from = this,
|
||||
retries = 1,
|
||||
tag = newMessageTag(),
|
||||
key = calcGlobalKeyFromChunkId(readArgs.chunkId),
|
||||
offset = readArgs.offset, length = readArgs.length);
|
||||
|
||||
sendReadReq(readReq);
|
||||
|
||||
submittedReads += (readReq.tag, readArgs);
|
||||
inflightReadReqs += (readReq.tag, readReq);
|
||||
}
|
||||
|
||||
on eWriteResp do (writeResp: tWriteResp) {
|
||||
if (!(writeResp.tag in inflightWriteReqs)) {
|
||||
print format("{0}: got response for completed write request: {1}", this, writeResp.key);
|
||||
return;
|
||||
}
|
||||
|
||||
if (writeResp.status == ErrorCode_CHAIN_VERION_MISMATCH) {
|
||||
print format("{0}: retry write request: {1}", this, writeResp.key);
|
||||
reissueWriteReq(writeResp.tag);
|
||||
return;
|
||||
}
|
||||
|
||||
print format("{0}: write response {1}", this, writeResp);
|
||||
|
||||
send submittedWrites[writeResp.tag].from, eWriteComplete, (status = writeResp.status, chunkId = writeResp.key.chunkId, commitVer = writeResp.commitVer);
|
||||
|
||||
submittedWrites -= (writeResp.tag);
|
||||
inflightWriteReqs -= (writeResp.tag);
|
||||
}
|
||||
|
||||
on eReadResp do (readResp: tReadResp) {
|
||||
if (!(readResp.tag in inflightReadReqs)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ((readResp.status == ErrorCode_CHAIN_VERION_MISMATCH) || (readResp.status == ErrorCode_CHUNK_NOT_COMMIT)) {
|
||||
print format("{0}: retry read request: {1}", this, readResp.key);
|
||||
reissueReadReq(readResp.tag);
|
||||
return;
|
||||
}
|
||||
|
||||
print format("{0}: read response {1}", this, readResp);
|
||||
|
||||
send submittedReads[readResp.tag].from, eReadComplete, (status = readResp.status, chunkId = readResp.key.chunkId,
|
||||
chunkMetadata = readResp.chunkMetadata, dataBytes = readResp.dataBytes);
|
||||
|
||||
submittedReads -= (readResp.tag);
|
||||
inflightReadReqs -= (readResp.tag);
|
||||
}
|
||||
}
|
||||
|
||||
state Stopped {
|
||||
ignore eReadResp, eWriteResp, eSendHeartbeat, eNewRoutingInfo, eTimeOut;
|
||||
|
||||
entry {
|
||||
// CancelTimer(timer);
|
||||
print format("{0} stopped", this);
|
||||
}
|
||||
}
|
||||
}
|
||||
9
specs/DataStorage/PSrc/StorageModules.p
Normal file
9
specs/DataStorage/PSrc/StorageModules.p
Normal file
@@ -0,0 +1,9 @@
|
||||
// the storage system module
|
||||
module StorageSystem = {
|
||||
TestClient,
|
||||
MgmtClient, MgmtService,
|
||||
StorageClient, StorageService,
|
||||
ReadProcess, WriteProcess,
|
||||
SyncWorker, StorageTarget, ChunkReplica,
|
||||
SystemMonitor, Timer
|
||||
};
|
||||
2243
specs/DataStorage/PSrc/StorageService.p
Normal file
2243
specs/DataStorage/PSrc/StorageService.p
Normal file
File diff suppressed because it is too large
Load Diff
804
specs/DataStorage/PTst/TestDriver.p
Normal file
804
specs/DataStorage/PTst/TestDriver.p
Normal file
@@ -0,0 +1,804 @@
|
||||
type tSystemConfig = (
|
||||
chunkSize: int,
|
||||
numChains: int,
|
||||
numReplicas: int,
|
||||
numStorageServices: int,
|
||||
failStorageServices: int,
|
||||
failDetectionMaxAttempts: int,
|
||||
numClients: int,
|
||||
numIters: int
|
||||
);
|
||||
|
||||
type tStorageSystem = (
|
||||
mgmt: MgmtService,
|
||||
storages: tStorageServiceMap,
|
||||
clients: tTestClientMap
|
||||
);
|
||||
|
||||
fun BuildNodeTargetMap(chunkSize: int, numNodes: int, numTargetsPerNode: int)
|
||||
: tGlobalTargetMap
|
||||
{
|
||||
var nodeId: tNodeId;
|
||||
var targetId: tTargetId;
|
||||
var storageTarget: StorageTarget;
|
||||
var localTargets: tLocalTargetMap;
|
||||
var nodeTargets: tGlobalTargetMap;
|
||||
|
||||
assert numTargetsPerNode < 100;
|
||||
|
||||
nodeId = 1;
|
||||
while (nodeId <= numNodes) {
|
||||
localTargets = default(tLocalTargetMap);
|
||||
targetId = nodeId * 100 + 1;
|
||||
|
||||
while (sizeof(localTargets) < numTargetsPerNode) {
|
||||
storageTarget = new StorageTarget((targetId = targetId, chunkSize = chunkSize));
|
||||
localTargets += (targetId, storageTarget);
|
||||
targetId = targetId + 1;
|
||||
}
|
||||
|
||||
nodeTargets += (nodeId, localTargets);
|
||||
nodeId = nodeId + 1;
|
||||
}
|
||||
|
||||
return nodeTargets;
|
||||
}
|
||||
|
||||
fun BuildRepliaChainMap(numChains: int, numReplicas: int, nodeTargets: tGlobalTargetMap)
|
||||
: tReplicaChainMap
|
||||
{
|
||||
var vChainId: tVersionedChainId;
|
||||
var targetId: tTargetId;
|
||||
var nodeId: tNodeId;
|
||||
var replicaChain: tReplicaChain;
|
||||
var replicaChains: tReplicaChainMap;
|
||||
var serviceNodeIds: seq[tNodeId];
|
||||
var n: int;
|
||||
|
||||
n = 0;
|
||||
serviceNodeIds = keys(nodeTargets);
|
||||
vChainId = (chainId = 1, chainVer = 1);
|
||||
|
||||
while (vChainId.chainId <= numChains) {
|
||||
replicaChain = default(tReplicaChain);
|
||||
replicaChain.vChainId = vChainId;
|
||||
|
||||
while (sizeof(replicaChain.targets) < numReplicas) {
|
||||
nodeId = serviceNodeIds[n % sizeof(serviceNodeIds)];
|
||||
targetId = keys(nodeTargets[nodeId])[0];
|
||||
nodeTargets[nodeId] -= (targetId);
|
||||
print format("chain {0} added target {1} from node {2}", vChainId.chainId, targetId, nodeId);
|
||||
replicaChain.targets += (sizeof(replicaChain.targets), targetId);
|
||||
replicaChain.nodes += (targetId, nodeId);
|
||||
replicaChain.states += (targetId, PublicTargetState_SERVING);
|
||||
n = n + 1;
|
||||
}
|
||||
|
||||
print format("create new replica chain: {0}", replicaChain);
|
||||
replicaChains += (vChainId.chainId, replicaChain);
|
||||
vChainId.chainId = vChainId.chainId + 1;
|
||||
}
|
||||
|
||||
return replicaChains;
|
||||
}
|
||||
|
||||
fun CreateStorageServices(nodeTargets: tGlobalTargetMap, mgmtService: MgmtService)
|
||||
: tStorageServiceMap
|
||||
{
|
||||
var nodeId: tNodeId;
|
||||
var localTargets: tLocalTargetMap;
|
||||
var service: StorageService;
|
||||
var storageServices: tStorageServiceMap;
|
||||
|
||||
foreach (nodeId in keys(nodeTargets)) {
|
||||
service = new StorageService((nodeId = nodeId, localTargets = nodeTargets[nodeId], mgmtService = mgmtService));
|
||||
storageServices += (nodeId, service);
|
||||
}
|
||||
|
||||
return storageServices;
|
||||
}
|
||||
|
||||
fun CreateTestClients(numClients: int, numChains: int, numIters: int, failStorageServices: int, mgmtService: MgmtService, storageServices: tStorageServiceMap, systemMonitor: SystemMonitor)
|
||||
: tTestClientMap
|
||||
{
|
||||
var clientId: tNodeId;
|
||||
var client: TestClient;
|
||||
var testClients: tTestClientMap;
|
||||
|
||||
clientId = 1;
|
||||
while (clientId <= numClients) {
|
||||
client = new TestClient((
|
||||
clientId = clientId,
|
||||
chunkIdBegin = 789001,
|
||||
chunkIdEnd = 789000 + numChains * 2,
|
||||
numIters = numIters,
|
||||
failStorageServices = failStorageServices,
|
||||
mgmtService = mgmtService,
|
||||
storageServices = storageServices,
|
||||
systemMonitor = systemMonitor));
|
||||
testClients += (clientId, client);
|
||||
clientId = clientId + 1;
|
||||
}
|
||||
|
||||
return testClients;
|
||||
}
|
||||
|
||||
fun SetUpStorageSystem(testDriver: machine, config: tSystemConfig) {
|
||||
var numTargetsPerNode: int;
|
||||
var nodeTargets: tGlobalTargetMap;
|
||||
var replicaChains: tReplicaChainMap;
|
||||
var storageServices: tStorageServiceMap;
|
||||
var storageService: StorageService;
|
||||
var mgmtService: MgmtService;
|
||||
var testClients: tTestClientMap;
|
||||
var storageSystem: tStorageSystem;
|
||||
var systemMonitor: SystemMonitor;
|
||||
|
||||
print format("system config: {0}", config);
|
||||
announce eSystemConfig, (config = config,);
|
||||
|
||||
assert config.failStorageServices <= config.numStorageServices;
|
||||
assert config.numStorageServices >= config.numReplicas;
|
||||
assert config.numChains * config.numReplicas % config.numStorageServices == 0;
|
||||
assert config.chunkSize > config.numClients * config.numIters;
|
||||
numTargetsPerNode = config.numChains * config.numReplicas / config.numStorageServices;
|
||||
|
||||
nodeTargets = BuildNodeTargetMap(config.chunkSize, config.numStorageServices, numTargetsPerNode);
|
||||
print format("init nodeTargets {0}", nodeTargets);
|
||||
|
||||
replicaChains = BuildRepliaChainMap(config.numChains, config.numReplicas, nodeTargets);
|
||||
print format("init replicaChains {0}", replicaChains);
|
||||
|
||||
mgmtService = new MgmtService((nodeId = 9001, maxAttempts = config.failDetectionMaxAttempts,
|
||||
numStorageServices = config.numStorageServices, replicaChains = replicaChains));
|
||||
|
||||
storageServices = CreateStorageServices(nodeTargets, mgmtService);
|
||||
systemMonitor = new SystemMonitor((nodeId = 9002, numClients = config.numClients, mgmtService = mgmtService, storageServices = storageServices));
|
||||
testClients = CreateTestClients(config.numClients, config.numChains, config.numIters, config.failStorageServices, mgmtService, storageServices, systemMonitor);
|
||||
|
||||
storageSystem = (mgmt = mgmtService, storages = storageServices, clients = testClients);
|
||||
announce eStorageSystem, (system = storageSystem,);
|
||||
}
|
||||
|
||||
fun InitBytes(size: int, value: int): tBytes {
|
||||
var i: int;
|
||||
var bytes: tBytes;
|
||||
i = 0;
|
||||
while (i < size) {
|
||||
bytes += (i, value);
|
||||
i = i + 1;
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/* Service Monitor */
|
||||
|
||||
event eRestart: machine;
|
||||
event eStarted: machine;
|
||||
event eStartUp: machine;
|
||||
|
||||
event eShutDown: machine;
|
||||
event eStopped: machine;
|
||||
|
||||
machine SystemMonitor {
|
||||
var nodeId: tNodeId;
|
||||
var numClients: int;
|
||||
var mgmtService: MgmtService;
|
||||
var storageServices: tStorageServiceMap;
|
||||
var failStorageServices: int;
|
||||
var mgmtClient: MgmtClient;
|
||||
var timer: Timer;
|
||||
|
||||
var stoppedClients: set[tNodeId];
|
||||
var offlineTargets: set[tTargetId];
|
||||
var offlineServices: set[tNodeId];
|
||||
var restartedServices: set[tNodeId];
|
||||
|
||||
fun processRoutingInfo(routingInfo: tRoutingInfo) {
|
||||
var replicaChain: tReplicaChain;
|
||||
var targetId: tTargetId;
|
||||
|
||||
restartedServices = default(set[tNodeId]);
|
||||
|
||||
foreach (replicaChain in values(routingInfo.replicaChains)) {
|
||||
print format("{0}: replication chain: {1}", this, ReplicaChainToString(replicaChain));
|
||||
foreach (targetId in replicaChain.targets) {
|
||||
if (replicaChain.states[targetId] == PublicTargetState_OFFLINE ||
|
||||
replicaChain.states[targetId] == PublicTargetState_LASTSRV)
|
||||
{
|
||||
if (!(targetId in offlineTargets)) {
|
||||
offlineTargets += (targetId);
|
||||
offlineServices += (replicaChain.nodes[targetId]);
|
||||
}
|
||||
}
|
||||
else if (replicaChain.states[targetId] == PublicTargetState_SERVING ||
|
||||
replicaChain.states[targetId] == PublicTargetState_SYNCING ||
|
||||
replicaChain.states[targetId] == PublicTargetState_WAITING)
|
||||
{
|
||||
if (targetId in offlineTargets) {
|
||||
offlineTargets -= (targetId);
|
||||
offlineServices -= (replicaChain.nodes[targetId]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun restartOfflineServices() {
|
||||
var nodeId: tNodeId;
|
||||
|
||||
foreach (nodeId in offlineServices) {
|
||||
send storageServices[nodeId], eRestart, this;
|
||||
restartedServices += (nodeId);
|
||||
}
|
||||
}
|
||||
|
||||
start state Init {
|
||||
entry (args: (nodeId: tNodeId, numClients: int, mgmtService: MgmtService, storageServices: tStorageServiceMap)) {
|
||||
nodeId = args.nodeId;
|
||||
numClients = args.numClients;
|
||||
mgmtService = args.mgmtService;
|
||||
storageServices = args.storageServices;
|
||||
// failStorageServices = args.failStorageServices;
|
||||
mgmtClient = new MgmtClient((nodeId = nodeId, clientHost = this, mgmtService = mgmtService, sendHeartbeats = false));
|
||||
timer = CreateTimer(this);
|
||||
goto WaitUntilTestDone;
|
||||
}
|
||||
}
|
||||
|
||||
state WaitUntilTestDone {
|
||||
ignore eSendHeartbeat;
|
||||
|
||||
entry {
|
||||
print format("wait until test done: offlineTargets {0}, offlineServices {1}, restartedServices {2}, stoppedClients {3}",
|
||||
offlineTargets, offlineServices, restartedServices, stoppedClients);
|
||||
StartTimer(timer);
|
||||
}
|
||||
|
||||
// on eSendHeartbeat do (heartbeatConns: tHeartbeatConns) {
|
||||
// send heartbeatConns.mgmtService, eRegisterClientMsg, (from = heartbeatConns.mgmtClient, nodeId = nodeId, storageClient = this);
|
||||
// }
|
||||
|
||||
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
|
||||
processRoutingInfo(routingInfo);
|
||||
if (sizeof(offlineServices) > 0) {
|
||||
StartTimer(timer);
|
||||
}
|
||||
}
|
||||
|
||||
on eTimeOut do {
|
||||
restartOfflineServices();
|
||||
if (sizeof(offlineServices) > 0) {
|
||||
StartTimer(timer);
|
||||
} else {
|
||||
CancelTimer(timer);
|
||||
}
|
||||
}
|
||||
|
||||
on eTestClientDone do (clientId: tNodeId) {
|
||||
stoppedClients += (clientId);
|
||||
if (sizeof(stoppedClients) == numClients) {
|
||||
print format("all test clients stopped");
|
||||
send mgmtService, eStopFindNewFailures, 1;
|
||||
goto WaitUntilSyncDone;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state WaitUntilSyncDone {
|
||||
ignore eSendHeartbeat;
|
||||
|
||||
entry {
|
||||
print format("wait until sync done: offlineTargets {0}, offlineServices {1}, restartedServices {2}, stoppedClients {3}",
|
||||
offlineTargets, offlineServices, restartedServices, stoppedClients);
|
||||
StartTimer(timer);
|
||||
}
|
||||
|
||||
on eNewRoutingInfo do (routingInfo: tRoutingInfo) {
|
||||
processRoutingInfo(routingInfo);
|
||||
if (sizeof(offlineServices) > 0) {
|
||||
StartTimer(timer);
|
||||
} else {
|
||||
goto ShutdownSystem;
|
||||
}
|
||||
}
|
||||
|
||||
on eTimeOut do {
|
||||
restartOfflineServices();
|
||||
if (sizeof(offlineServices) > 0) {
|
||||
StartTimer(timer);
|
||||
} else {
|
||||
goto ShutdownSystem;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state ShutdownSystem {
|
||||
ignore eSendHeartbeat, eNewRoutingInfo, eTimeOut;
|
||||
|
||||
entry {
|
||||
var storageService: StorageService;
|
||||
|
||||
print format("{0}: all done, restartedServices: {1}", this, restartedServices);
|
||||
announce eStopMonitorTargetStates;
|
||||
CancelTimer(timer);
|
||||
|
||||
send mgmtClient, eShutDown, this;
|
||||
send mgmtService, eShutDown, this;
|
||||
receive {
|
||||
case eStopped: (mgmt: machine) {
|
||||
assert mgmt == mgmtService;
|
||||
}
|
||||
}
|
||||
|
||||
foreach (storageService in values(storageServices)) {
|
||||
send storageService, eShutDown, this;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Test Client */
|
||||
|
||||
// DONE: write different part of the chunk for each write to detect any error
|
||||
// DONE: stop a storage service more than once during test (stop it when it's syncing)
|
||||
// TODO: [new test] stop mgmt client of an alive storage service to simulate network partition
|
||||
// TODO: [new test] shut down storage service and then restart
|
||||
// TODO: [new test] make storage service crash during syncing
|
||||
|
||||
type tTestClientMap = map[tNodeId, TestClient];
|
||||
// type tTestStatus = (nodeId: tNodeId, done: bool);
|
||||
event eTestClientDone : tNodeId;
|
||||
// event eTestStatusReq : tTestStatus;
|
||||
// event eTestStatusResp : tTestStatus;
|
||||
|
||||
machine TestClient {
|
||||
var clientId: tNodeId;
|
||||
var chunkIdBegin: tChainId;
|
||||
var chunkIdEnd: tChainId;
|
||||
var numIters: int;
|
||||
var failStorageServices: int;
|
||||
|
||||
var storageClient: StorageClient;
|
||||
var storageServices: tStorageServiceMap;
|
||||
var systemMonitor: SystemMonitor;
|
||||
|
||||
var nextWritePos: int;
|
||||
var currIter: int;
|
||||
var currChunkId: tChunkId;
|
||||
var lastChunkVer: map[tChunkId, tChunkVer];
|
||||
|
||||
fun CreateNewWrite(chunkId: tChunkId, offset: int, length: int, value: int): tWriteArgs {
|
||||
var dataBytes: tBytes;
|
||||
var writeArgs: tWriteArgs;
|
||||
|
||||
dataBytes = InitBytes(length, value);
|
||||
print format("data bytes size {0}", sizeof(dataBytes));
|
||||
|
||||
writeArgs = (from = this, chunkId = chunkId, offset = offset, length = sizeof(dataBytes), dataBytes = dataBytes);
|
||||
print format("{0}: created a new write: {1}", this, writeArgs);
|
||||
|
||||
return writeArgs;
|
||||
}
|
||||
|
||||
fun CreateNewRemove(chunkId: tChunkId): tWriteArgs {
|
||||
var writeArgs: tWriteArgs;
|
||||
|
||||
writeArgs = (from = this, chunkId = chunkId, offset = 0, length = 0, dataBytes = default(tBytes));
|
||||
print format("{0}: created a new remove: {1}", this, writeArgs);
|
||||
|
||||
return writeArgs;
|
||||
}
|
||||
|
||||
fun CreateNewRead(chunkId: tChunkId, offset: int, length: int): tReadArgs {
|
||||
var readArgs: tReadArgs;
|
||||
|
||||
readArgs = (from = this, chunkId = chunkId, offset = offset, length = length);
|
||||
print format("{0}: created a new read: {1}", this, readArgs);
|
||||
|
||||
return readArgs;
|
||||
}
|
||||
|
||||
start state Init {
|
||||
// defer eTestStatusReq;
|
||||
|
||||
entry (args: (clientId: tNodeId, chunkIdBegin: int, chunkIdEnd: int, numIters: int, failStorageServices: int, mgmtService: MgmtService, storageServices: tStorageServiceMap, systemMonitor: SystemMonitor)) {
|
||||
assert args.chunkIdBegin < args.chunkIdEnd;
|
||||
clientId = args.clientId + 8000;
|
||||
chunkIdBegin = args.chunkIdBegin;
|
||||
chunkIdEnd = args.chunkIdEnd;
|
||||
numIters = args.numIters;
|
||||
failStorageServices = args.failStorageServices;
|
||||
storageServices = args.storageServices;
|
||||
systemMonitor = args.systemMonitor;
|
||||
nextWritePos = 0;
|
||||
storageClient = new StorageClient((clientId = clientId, mgmtService = args.mgmtService));
|
||||
send storageClient, eWaitConnected, this;
|
||||
}
|
||||
|
||||
on eClientConnected goto SendingWriteReq;
|
||||
}
|
||||
|
||||
state SendingWriteReq {
|
||||
entry {
|
||||
var offset: int;
|
||||
var length: int;
|
||||
var machineToFail: machine;
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
currChunkId
|
||||
---------------------------------------------------------------------
|
||||
client 8001 | client 8002 | client 8003 | ......
|
||||
---------------------------------------------------------------------
|
||||
<currIter> bytes | <currIter> bytes | <currIter> bytes | ......
|
||||
---------------------------------------------------------------------
|
||||
^ ^
|
||||
| |
|
||||
offset----|<---length-->|
|
||||
*/
|
||||
|
||||
currChunkId = chunkIdBegin + nextWritePos / numIters;
|
||||
currIter = nextWritePos % numIters + 1;
|
||||
offset = (clientId - 8001) * numIters + currIter - 1;
|
||||
length = numIters - currIter + 1;
|
||||
nextWritePos = nextWritePos + 1;
|
||||
|
||||
if (!(currChunkId in lastChunkVer))
|
||||
lastChunkVer += (currChunkId, 0);
|
||||
|
||||
send storageClient, eSubmitWrite, CreateNewWrite(currChunkId, offset, length, currIter);
|
||||
|
||||
if (failStorageServices > 0 && choose()) {
|
||||
machineToFail = choose(values(storageServices));
|
||||
send machineToFail, eShutDown, machineToFail;
|
||||
failStorageServices = failStorageServices - 1;
|
||||
}
|
||||
}
|
||||
|
||||
on eWriteComplete do (writeRes: tWriteRes) {
|
||||
assert writeRes.status == ErrorCode_SUCCESS, format("error: {0}", writeRes);
|
||||
// assert lastChunkVer[writeRes.chunkId] < writeRes.commitVer,
|
||||
// format("error: last chunk version {0} >= commit version {1}", lastChunkVer[writeRes.chunkId], writeRes.commitVer);
|
||||
|
||||
// lastChunkVer[writeRes.chunkId] = writeRes.commitVer;
|
||||
|
||||
if (nextWritePos >= numIters * (chunkIdEnd - chunkIdBegin)) {
|
||||
goto Done;
|
||||
} else if (nextWritePos % numIters == numIters / 2) {
|
||||
goto SendingRemoveReq;
|
||||
} else {
|
||||
goto SendingWriteReq;
|
||||
}
|
||||
}
|
||||
|
||||
// on eTestStatusReq do (from: machine) {
|
||||
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
|
||||
// }
|
||||
}
|
||||
|
||||
state SendingReadReq {
|
||||
entry {
|
||||
var offset: int;
|
||||
var length: int;
|
||||
|
||||
offset = (clientId - 101) * numIters;
|
||||
length = numIters;
|
||||
|
||||
send storageClient, eSubmitRead, CreateNewRead(currChunkId, offset, length);
|
||||
}
|
||||
|
||||
on eReadComplete do (readRes: tReadRes) {
|
||||
var i: int;
|
||||
|
||||
if (readRes.status == ErrorCode_CHUNK_NOT_FOUND) {
|
||||
print format("{0} chunk {1} removed by other client, re-create the chunk", this, currChunkId);
|
||||
goto SendingWriteReq;
|
||||
return;
|
||||
}
|
||||
|
||||
if (readRes.status == ErrorCode_TARGET_OFFLINE) {
|
||||
goto SendingReadReq;
|
||||
return;
|
||||
}
|
||||
|
||||
assert readRes.status == ErrorCode_SUCCESS, format("readRes.status {0}", readRes.status);
|
||||
assert readRes.chunkId == currChunkId, format("readRes.chunkId {0} != currChunkId {1}", readRes.chunkId, currChunkId);
|
||||
// assert lastChunkVer[currChunkId] <= readRes.chunkMetadata.commitVer,
|
||||
// format("lastChunkVer[currChunkId:{0}] {1} > readRes.chunkMetadata.commitVer {2}",
|
||||
// currChunkId, lastChunkVer[currChunkId], readRes.chunkMetadata.commitVer);
|
||||
|
||||
// if (lastChunkVer[currChunkId] == readRes.chunkMetadata.commitVer) {
|
||||
while (i < sizeof(readRes.dataBytes)) {
|
||||
assert readRes.dataBytes[i] <= Min(currIter, i + 1),
|
||||
format("readRes.dataBytes[i:{0}] {1} != {2}, nextWritePos {3}, currIter {4}",
|
||||
i, readRes.dataBytes[i], Min(currIter, i + 1), nextWritePos, currIter);
|
||||
i = i + 1;
|
||||
}
|
||||
// }
|
||||
|
||||
if (nextWritePos % numIters == 0) {
|
||||
goto SendingRemoveReq;
|
||||
} else {
|
||||
goto SendingWriteReq;
|
||||
}
|
||||
}
|
||||
|
||||
// on eTestStatusReq do (from: machine) {
|
||||
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
|
||||
// }
|
||||
}
|
||||
|
||||
state SendingRemoveReq {
|
||||
entry {
|
||||
send storageClient, eSubmitWrite, CreateNewRemove(currChunkId);
|
||||
}
|
||||
|
||||
on eWriteComplete do (writeRes: tWriteRes) {
|
||||
assert writeRes.status == ErrorCode_SUCCESS, format("error: {0}", writeRes);
|
||||
// assert lastChunkVer[writeRes.chunkId] < writeRes.commitVer,
|
||||
// format("error: last chunk version {0} >= commit version {1}", lastChunkVer[writeRes.chunkId], writeRes.commitVer);
|
||||
|
||||
// lastChunkVer -= (writeRes.chunkId);
|
||||
|
||||
// check if the chunk removed or re-created
|
||||
send storageClient, eSubmitRead, CreateNewRead(currChunkId, 0, numIters);
|
||||
receive {
|
||||
case eReadComplete: (readRes: tReadRes) {
|
||||
if (readRes.status == ErrorCode_CHUNK_NOT_FOUND) {
|
||||
print format("Chunk {0} removed, result: {1}", currChunkId, readRes);
|
||||
} else {
|
||||
print format("Chunk {0} re-created, result: {1}", currChunkId, readRes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nextWritePos < numIters * (chunkIdEnd - chunkIdBegin)) {
|
||||
goto SendingWriteReq;
|
||||
} else {
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
|
||||
// on eTestStatusReq do (from: machine) {
|
||||
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = false);
|
||||
// }
|
||||
}
|
||||
|
||||
state Done {
|
||||
entry {
|
||||
print format("{0}: all done", this);
|
||||
send systemMonitor, eTestClientDone, clientId;
|
||||
send storageClient, eShutDown, this;
|
||||
}
|
||||
|
||||
// on eTestStatusReq do (from: machine) {
|
||||
// send from, eTestStatusResp, (nodeId = clientId, nextWritePos = nextWritePos, done = true);
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
// no failure
|
||||
|
||||
machine OneClientWriteNoFailure {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 0,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 1,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine TwoClientsWriteNoFailure {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 0,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 2,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine ThreeClientsWriteNoFailure {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 0,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 3,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unreliable failure detector
|
||||
|
||||
machine OneClientWriteUnreliableDetector {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 1,
|
||||
failDetectionMaxAttempts = 7,
|
||||
numClients = 1,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine TwoClientsWriteUnreliableDetector {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 1,
|
||||
failDetectionMaxAttempts = 7,
|
||||
numClients = 2,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// with failures
|
||||
|
||||
machine OneClientWriteWithFailure {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 1,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 1,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine TwoClientsWriteWithFailure {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 1,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 2,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine OneClientWriteWithFailures {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 3,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 1,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine TwoClientsWriteWithFailures {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 3,
|
||||
numStorageServices = 3,
|
||||
failStorageServices = 3,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 2,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// short chain: two replicas
|
||||
|
||||
machine OneClientWriteShortChainWithFailure {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 2,
|
||||
numStorageServices = 2,
|
||||
failStorageServices = 1,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 1,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine TwoClientsWriteShortChainWithFailures {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 2,
|
||||
numStorageServices = 2,
|
||||
failStorageServices = 2,
|
||||
failDetectionMaxAttempts = 11,
|
||||
numClients = 2,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// long chain: four replicas
|
||||
|
||||
machine TwoClientsWriteLongChainWithFailures {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
config = (
|
||||
chunkSize = 16,
|
||||
numChains = 1,
|
||||
numReplicas = 4,
|
||||
numStorageServices = 4,
|
||||
failStorageServices = 2,
|
||||
failDetectionMaxAttempts = 23,
|
||||
numClients = 2,
|
||||
numIters = 2);
|
||||
SetUpStorageSystem(this, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
57
specs/DataStorage/PTst/TestScript.p
Normal file
57
specs/DataStorage/PTst/TestScript.p
Normal file
@@ -0,0 +1,57 @@
|
||||
// no failure
|
||||
|
||||
test tcOneClientWriteNoFailure [main = OneClientWriteNoFailure]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { OneClientWriteNoFailure };
|
||||
|
||||
test tcTwoClientsWriteNoFailure [main = TwoClientsWriteNoFailure]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { TwoClientsWriteNoFailure };
|
||||
|
||||
test tcThreeClientsWriteNoFailure [main = ThreeClientsWriteNoFailure]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { ThreeClientsWriteNoFailure };
|
||||
|
||||
// unreliable failure detector
|
||||
|
||||
test tcOneClientWriteUnreliableDetector [main = OneClientWriteUnreliableDetector]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated in
|
||||
union StorageSystem, { OneClientWriteUnreliableDetector };
|
||||
|
||||
test tcTwoClientsWriteUnreliableDetector [main = TwoClientsWriteUnreliableDetector]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated in
|
||||
union StorageSystem, { TwoClientsWriteUnreliableDetector };
|
||||
|
||||
// with failures
|
||||
|
||||
test tcOneClientWriteWithFailure [main = OneClientWriteWithFailure]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { OneClientWriteWithFailure };
|
||||
|
||||
test tcTwoClientsWriteWithFailure [main = TwoClientsWriteWithFailure]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { TwoClientsWriteWithFailure };
|
||||
|
||||
test tcOneClientWriteWithFailures [main = OneClientWriteWithFailures]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { OneClientWriteWithFailures };
|
||||
|
||||
test tcTwoClientsWriteWithFailures [main = TwoClientsWriteWithFailures]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { TwoClientsWriteWithFailures };
|
||||
|
||||
// short chain
|
||||
|
||||
test tcOneClientWriteShortChainWithFailure [main = OneClientWriteShortChainWithFailure]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { OneClientWriteShortChainWithFailure };
|
||||
|
||||
test tcTwoClientsWriteShortChainWithFailures [main = TwoClientsWriteShortChainWithFailures]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { TwoClientsWriteShortChainWithFailures };
|
||||
|
||||
// long chain
|
||||
|
||||
test tcTwoClientsWriteLongChainWithFailures [main = TwoClientsWriteLongChainWithFailures]:
|
||||
assert WriteComplete, MonotoneIncreasingVersionNumber, AllReplicasOnChainUpdated, AllReplicasInServingState in
|
||||
union StorageSystem, { TwoClientsWriteLongChainWithFailures };
|
||||
163
specs/RDMASocket/PSpec/SystemSpec.p
Normal file
163
specs/RDMASocket/PSpec/SystemSpec.p
Normal file
@@ -0,0 +1,163 @@
|
||||
spec RecvComplete observes eSendBytes, eRecvBytes, eRecvBytesResp {
|
||||
var pendingRecv: int;
|
||||
var pendingRecvBytes: int;
|
||||
var sentBytes: int;
|
||||
var recvBytes: int;
|
||||
|
||||
fun AddSendBytes(bytes: tBytes) {
|
||||
sentBytes = sentBytes + sizeof(bytes);
|
||||
}
|
||||
|
||||
fun AddRecvBytes(args: (from: machine, length: int)) {
|
||||
pendingRecv = pendingRecv + 1;
|
||||
pendingRecvBytes = pendingRecvBytes + args.length;
|
||||
}
|
||||
|
||||
start hot state NoPendingRecv {
|
||||
entry{
|
||||
assert pendingRecvBytes == 0, format("{0} pending recv bytes not equal to zero", pendingRecvBytes);
|
||||
assert recvBytes <= sentBytes, format("error: {0} recv bytes > {1} sent bytes", recvBytes, sentBytes);
|
||||
|
||||
if (recvBytes == sentBytes) {
|
||||
goto AllDataRecved;
|
||||
}
|
||||
}
|
||||
|
||||
on eSendBytes do AddSendBytes;
|
||||
|
||||
on eRecvBytes goto PendingRecv with AddRecvBytes;
|
||||
}
|
||||
|
||||
hot state PendingRecv {
|
||||
|
||||
on eSendBytes do AddSendBytes;
|
||||
|
||||
on eRecvBytes do AddRecvBytes;
|
||||
|
||||
on eRecvBytesResp do (bytes: tBytes) {
|
||||
recvBytes = recvBytes + sizeof(bytes);
|
||||
pendingRecv = pendingRecv - 1;
|
||||
pendingRecvBytes = pendingRecvBytes - sizeof(bytes);
|
||||
if (pendingRecv == 0)
|
||||
goto NoPendingRecv;
|
||||
}
|
||||
}
|
||||
|
||||
cold state AllDataRecved {
|
||||
entry {
|
||||
print format("all data received");
|
||||
}
|
||||
|
||||
on eSendBytes do AddSendBytes;
|
||||
|
||||
on eRecvBytes goto PendingRecv with AddRecvBytes;
|
||||
}
|
||||
}
|
||||
|
||||
spec NoDuplicatePostedBuffers observes ePostSend, ePostRecv, ePollSendCQReturn, ePollRecvCQReturn {
|
||||
var postedRecvBufs: set[int];
|
||||
var postedSendBufs: set[int];
|
||||
|
||||
start state Init {
|
||||
on ePostRecv do (wr: tWorkRequest) {
|
||||
assert wr.wrIdx >= 0, format("buffer index {0} < 0", wr.wrIdx);
|
||||
assert !(wr.wrIdx in postedRecvBufs), format("buffer with index {0} already posted", wr.wrIdx);
|
||||
postedRecvBufs += (wr.wrIdx);
|
||||
}
|
||||
|
||||
on ePostSend do (wr: tWorkRequest) {
|
||||
if (wr.wrIdx >= 0) {
|
||||
assert !(wr.wrIdx in postedSendBufs), format("buffer with index {0} already posted", wr.wrIdx);
|
||||
postedSendBufs += (wr.wrIdx);
|
||||
} else {
|
||||
assert wr.opcode == WROpCode_SEND_WITH_IMM && wr.imm > 0;
|
||||
}
|
||||
}
|
||||
|
||||
on ePollRecvCQReturn do (wc: tWorkComplete) {
|
||||
assert wc.wrIdx >= 0, format("buffer index {0} < 0", wc.wrIdx);
|
||||
assert wc.wrIdx in postedRecvBufs, format("unexpected buffer index {0} returned", wc.wrIdx);
|
||||
postedRecvBufs -= (wc.wrIdx);
|
||||
}
|
||||
|
||||
on ePollSendCQReturn do (wc: tWorkComplete) {
|
||||
if (wc.wrIdx >= 0) {
|
||||
assert wc.wrIdx in postedSendBufs, format("unexpected buffer index {0} returned", wc.wrIdx);
|
||||
postedSendBufs -= (wc.wrIdx);
|
||||
} else {
|
||||
assert wc.opcode == WCOpCode_SEND && wc.imm > 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
event eSystemConfig: (config: tSystemConfig);
|
||||
|
||||
spec AllIterationsProcessed observes eSendBytes, eRecvBytesResp, eSystemConfig {
|
||||
var config: tSystemConfig;
|
||||
var sendIters: tBytes;
|
||||
var recvIters: tBytes;
|
||||
|
||||
fun CheckStopCondition(iters: tBytes): bool {
|
||||
var i: int;
|
||||
i = 0;
|
||||
while (i < sizeof(iters)) {
|
||||
if (iters[i] != config.numIters) {
|
||||
return false;
|
||||
}
|
||||
i = i + 1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fun UpdateIters(iters: tBytes, expected: int): tBytes {
|
||||
var i: int;
|
||||
i = 0;
|
||||
while (i < sizeof(iters)) {
|
||||
if (expected == iters[i] + 1) {
|
||||
iters[i] = iters[i] + 1;
|
||||
return iters;
|
||||
}
|
||||
i = i + 1;
|
||||
}
|
||||
print format("failed to update iters to {0}", expected);
|
||||
return iters;
|
||||
}
|
||||
|
||||
start state Init {
|
||||
on eSystemConfig goto Communicating with (args: (config: tSystemConfig)) {
|
||||
var i: int;
|
||||
i = 0;
|
||||
config = args.config;
|
||||
while (i < config.numSenders) {
|
||||
print format("i {0}/{1}", i, config.numSenders);
|
||||
sendIters += (i, 0);
|
||||
recvIters += (i, 0);
|
||||
i = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hot state Communicating {
|
||||
on eSendBytes do (bytes: tBytes) {
|
||||
sendIters = UpdateIters(sendIters, bytes[sizeof(bytes)-1]);
|
||||
if (CheckStopCondition(sendIters) && CheckStopCondition(recvIters)) {
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
|
||||
on eRecvBytesResp do (bytes: tBytes) {
|
||||
recvIters = UpdateIters(recvIters, bytes[sizeof(bytes)-1]);
|
||||
if (CheckStopCondition(sendIters) && CheckStopCondition(recvIters)) {
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cold state Done {
|
||||
ignore eSendBytes, eRecvBytesResp;
|
||||
entry {
|
||||
print format("all iterations processed");
|
||||
}
|
||||
}
|
||||
}
|
||||
2
specs/RDMASocket/PSrc/RDMAModules.p
Normal file
2
specs/RDMASocket/PSrc/RDMAModules.p
Normal file
@@ -0,0 +1,2 @@
|
||||
// the rdma network module
|
||||
module RDMANetwork = { Network, QueuePair, RDMASocket };
|
||||
556
specs/RDMASocket/PSrc/RDMASocket.p
Normal file
556
specs/RDMASocket/PSrc/RDMASocket.p
Normal file
@@ -0,0 +1,556 @@
|
||||
type tBytes = seq[int];
|
||||
|
||||
fun InitBytes(size: int, value: int): tBytes {
|
||||
var i: int;
|
||||
var bytes: tBytes;
|
||||
i = 0;
|
||||
while (i < size) {
|
||||
bytes += (i, value);
|
||||
i = i + 1;
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
fun Append(a: tBytes, b: tBytes): tBytes {
|
||||
var i: int;
|
||||
i = 0;
|
||||
while (i < sizeof(b)) {
|
||||
a += (sizeof(a), b[i]);
|
||||
i = i + 1;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
enum tWROpCode {
|
||||
WROpCode_INVALID = 100,
|
||||
WROpCode_SEND = 101,
|
||||
WROpCode_SEND_WITH_IMM = 102
|
||||
}
|
||||
|
||||
enum tWCOpCode {
|
||||
WCOpCode_INVALID = 200,
|
||||
WCOpCode_SEND = 201,
|
||||
WCOpCode_RECV = 202,
|
||||
WCOpCode_RECV_WITH_IMM = 203 // no such opcode in ibv APIs, this is added to indicate a recv completion with wc_flags = IBV_WC_WITH_IMM
|
||||
}
|
||||
|
||||
fun ConvertWRToWCOpCode(opcode: tWROpCode): tWCOpCode {
|
||||
if (opcode == WROpCode_SEND) return WCOpCode_RECV;
|
||||
if (opcode == WROpCode_SEND_WITH_IMM) return WCOpCode_RECV_WITH_IMM;
|
||||
return WCOpCode_INVALID;
|
||||
}
|
||||
|
||||
enum tStatus {
|
||||
Status_OK,
|
||||
Status_ERR,
|
||||
Status_AGAIN
|
||||
}
|
||||
|
||||
type tXmitPacket = (opcode: tWROpCode, payload: tBytes, length: int, imm: int);
|
||||
type tGetPacketResp = (from: QueuePair, status: tStatus, packet: tXmitPacket);
|
||||
|
||||
event ePutPacket: tXmitPacket;
|
||||
event eGetPacket: Network;
|
||||
event eGetPacketResp: tGetPacketResp;
|
||||
event eWaitConnected: machine;
|
||||
event eWaitConnectedResp;
|
||||
event eNextExchangeIter;
|
||||
|
||||
machine Network {
|
||||
var qps: seq[QueuePair];
|
||||
var user: machine;
|
||||
|
||||
start state Init {
|
||||
entry (args: (sock: RDMASocket, peer: RDMASocket)) {
|
||||
send args.sock, eConnect, this;
|
||||
receive {
|
||||
case eConnectResp: (qp: QueuePair) { qps += (0, qp); }
|
||||
}
|
||||
|
||||
send args.peer, eConnect, this;
|
||||
receive {
|
||||
case eConnectResp: (qp: QueuePair) { qps += (1, qp); }
|
||||
}
|
||||
|
||||
print format("network connected {0}", qps);
|
||||
|
||||
if (user != null)
|
||||
send user, eWaitConnectedResp;
|
||||
|
||||
goto ExchangePackets;
|
||||
}
|
||||
|
||||
on eWaitConnected do (from: machine) {
|
||||
user = from;
|
||||
}
|
||||
}
|
||||
|
||||
state ExchangePackets {
|
||||
entry {
|
||||
var i: int;
|
||||
var n: int;
|
||||
i = 0;
|
||||
while (i < sizeof(qps)) {
|
||||
// exchange a nondeterministic number of packets between 1..4
|
||||
n = choose(3) + 1;
|
||||
while (n > 0) {
|
||||
send qps[i], eGetPacket, this;
|
||||
n = n - 1;
|
||||
}
|
||||
i = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
on eWaitConnected do (from: machine) {
|
||||
send from, eWaitConnectedResp;
|
||||
}
|
||||
|
||||
on eGetPacketResp do (resp: tGetPacketResp) {
|
||||
var i: int;
|
||||
|
||||
if (resp.status == Status_OK) {
|
||||
i = 0;
|
||||
while (i < 2) {
|
||||
if (qps[i] != resp.from)
|
||||
break;
|
||||
i = i + 1;
|
||||
}
|
||||
send qps[i], ePutPacket, resp.packet;
|
||||
}
|
||||
|
||||
send this, eNextExchangeIter;
|
||||
}
|
||||
|
||||
on eNextExchangeIter goto ExchangePackets;
|
||||
}
|
||||
}
|
||||
|
||||
type tWorkComplete = (wrIdx: int, opcode: tWCOpCode, payload: tBytes, length: int, imm: int, status: tStatus);
|
||||
type tWorkRequest = (wrIdx: int, opcode: tWROpCode, payload: tBytes, length: int, imm: int);
|
||||
|
||||
event ePostRecv: tWorkRequest;
|
||||
event ePostSend: tWorkRequest;
|
||||
event ePollRecvCQ: RDMASocket;
|
||||
event ePollSendCQ: RDMASocket;
|
||||
|
||||
machine QueuePair {
|
||||
var maxNumSendWRs: int;
|
||||
var maxNumRecvWRs: int;
|
||||
var postedRecvWRs: seq[tWorkRequest];
|
||||
var postedSendWRs: seq[tWorkRequest];
|
||||
var sendCompQueue: seq[tWorkComplete];
|
||||
var recvCompQueue: seq[tWorkComplete];
|
||||
var outboundQueue: seq[tXmitPacket];
|
||||
var inboundQueue: seq[tXmitPacket];
|
||||
|
||||
// users waiting on events
|
||||
var network: Network;
|
||||
var sockPollSendCQ: RDMASocket;
|
||||
var sockPollRecvCQ: RDMASocket;
|
||||
var pendingGetPkt: int;
|
||||
var pendingPollSend: int;
|
||||
var pendingPollRecv: int;
|
||||
|
||||
fun PushPacketToNetwork(net: Network) {
|
||||
var wr: tWorkRequest;
|
||||
var wc: tWorkComplete;
|
||||
var packet: tXmitPacket;
|
||||
|
||||
wr = postedSendWRs[0];
|
||||
postedSendWRs -= (0);
|
||||
print format("{0} -sizeof postedSendWRs {1}", this, sizeof(postedSendWRs));
|
||||
|
||||
wc = (wrIdx = wr.wrIdx,
|
||||
opcode = WCOpCode_SEND,
|
||||
payload = wr.payload,
|
||||
length = wr.length,
|
||||
imm = wr.imm,
|
||||
status = Status_OK);
|
||||
sendCompQueue += (sizeof(sendCompQueue), wc);
|
||||
print format("{0} +sizeof sendCompQueue {1}", this, sizeof(sendCompQueue));
|
||||
|
||||
if (pendingPollSend > 0) {
|
||||
NotifySendCQ(sockPollSendCQ);
|
||||
pendingPollSend = pendingPollSend - 1;
|
||||
if (pendingPollSend == 0)
|
||||
sockPollSendCQ = default(RDMASocket);
|
||||
}
|
||||
|
||||
packet = (opcode = wr.opcode,
|
||||
payload = wr.payload,
|
||||
length = wr.length,
|
||||
imm = wr.imm);
|
||||
send net, eGetPacketResp, (from = this, status = Status_OK, packet = packet);
|
||||
}
|
||||
|
||||
fun NotifyRecvCQ(sock: RDMASocket) {
|
||||
var wc: tWorkComplete;
|
||||
wc = recvCompQueue[0];
|
||||
recvCompQueue -= (0);
|
||||
print format("{0} -sizeof recvCompQueue {1}", this, sizeof(recvCompQueue));
|
||||
send sock, ePollRecvCQReturn, wc;
|
||||
}
|
||||
|
||||
fun NotifySendCQ(sock: RDMASocket) {
|
||||
var wc: tWorkComplete;
|
||||
wc = sendCompQueue[0];
|
||||
sendCompQueue -= (0);
|
||||
print format("{0} -sizeof sendCompQueue {1}", this, sizeof(sendCompQueue));
|
||||
send sock, ePollSendCQReturn, wc;
|
||||
}
|
||||
|
||||
start state Init {
|
||||
entry (args: (maxNumSendWRs: int, maxNumRecvWRs: int)) {
|
||||
print format("qp init start {0}", this);
|
||||
maxNumSendWRs = args.maxNumSendWRs;
|
||||
maxNumRecvWRs = args.maxNumRecvWRs;
|
||||
print format("qp init done {0}", this);
|
||||
goto WaitForEvents;
|
||||
}
|
||||
}
|
||||
|
||||
state WaitForEvents {
|
||||
on ePostRecv do (wr: tWorkRequest) {
|
||||
assert sizeof(postedRecvWRs) < maxNumRecvWRs;
|
||||
postedRecvWRs += (sizeof(postedRecvWRs), wr);
|
||||
print format("{0} +sizeof postedRecvWRs {1}", this, sizeof(postedRecvWRs));
|
||||
}
|
||||
|
||||
on ePostSend do (wr: tWorkRequest) {
|
||||
assert sizeof(postedSendWRs) < maxNumSendWRs;
|
||||
postedSendWRs += (sizeof(postedSendWRs), wr);
|
||||
print format("{0} +sizeof postedSendWRs {1}", this, sizeof(postedSendWRs));
|
||||
|
||||
if (pendingGetPkt > 0) {
|
||||
PushPacketToNetwork(network);
|
||||
pendingGetPkt = pendingGetPkt - 1;
|
||||
if (pendingGetPkt == 0)
|
||||
network = default(Network);
|
||||
}
|
||||
}
|
||||
|
||||
on ePutPacket do (packet: tXmitPacket) {
|
||||
var wr: tWorkRequest;
|
||||
var wc: tWorkComplete;
|
||||
var i: int;
|
||||
|
||||
assert sizeof(postedRecvWRs) > 0, "error: receive not ready";
|
||||
wr = postedRecvWRs[0];
|
||||
postedRecvWRs -= (0);
|
||||
print format("{0} -sizeof postedRecvWRs {1}", this, sizeof(postedRecvWRs));
|
||||
|
||||
assert packet.length <= wr.length;
|
||||
|
||||
wc = (wrIdx = wr.wrIdx,
|
||||
opcode = ConvertWRToWCOpCode(packet.opcode),
|
||||
payload = wr.payload,
|
||||
length = packet.length,
|
||||
imm = packet.imm,
|
||||
status = Status_OK);
|
||||
|
||||
while (i < packet.length) {
|
||||
wc.payload[i] = packet.payload[i];
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
recvCompQueue += (sizeof(recvCompQueue), wc);
|
||||
print format("{0} +sizeof recvCompQueue {1}", this, sizeof(recvCompQueue));
|
||||
|
||||
if (pendingPollRecv > 0) {
|
||||
NotifyRecvCQ(sockPollRecvCQ);
|
||||
pendingPollRecv = pendingPollRecv - 1;
|
||||
if (pendingPollRecv == 0)
|
||||
sockPollRecvCQ = default(RDMASocket);
|
||||
}
|
||||
}
|
||||
|
||||
on eGetPacket do (net: Network) {
|
||||
if (sizeof(postedSendWRs) == 0) {
|
||||
// send net, eGetPacketResp, (from = this, status = Status_AGAIN, packet = default(tXmitPacket));
|
||||
pendingGetPkt = pendingGetPkt + 1;
|
||||
if (pendingGetPkt > 1)
|
||||
assert network == net;
|
||||
else
|
||||
network = net;
|
||||
} else {
|
||||
PushPacketToNetwork(net);
|
||||
}
|
||||
}
|
||||
|
||||
on ePollRecvCQ do (sock: RDMASocket) {
|
||||
if (sizeof(recvCompQueue) == 0) {
|
||||
pendingPollRecv = pendingPollRecv + 1;
|
||||
if (pendingPollRecv > 1)
|
||||
assert sockPollRecvCQ == sock;
|
||||
else
|
||||
sockPollRecvCQ = sock;
|
||||
} else {
|
||||
NotifyRecvCQ(sock);
|
||||
}
|
||||
}
|
||||
|
||||
on ePollSendCQ do (sock: RDMASocket) {
|
||||
if (sizeof(sendCompQueue) == 0) {
|
||||
pendingPollSend = pendingPollSend + 1;
|
||||
if (pendingPollSend > 1)
|
||||
assert sockPollSendCQ == sock;
|
||||
else
|
||||
sockPollSendCQ = sock;
|
||||
} else {
|
||||
NotifySendCQ(sock);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type tTaggedBuffer = (bufIdx: int, payload: tBytes, length: int);
|
||||
|
||||
event ePollRecvCQReturn: tWorkComplete;
|
||||
event ePollSendCQReturn: tWorkComplete;
|
||||
event eRecvBytes: (from: machine, length: int);
|
||||
event eSendBytes: tBytes;
|
||||
event eRecvBytesResp: tBytes;
|
||||
event eConnect: Network;
|
||||
event eConnectResp: QueuePair;
|
||||
event eNextPollCQIter;
|
||||
|
||||
machine RDMASocket {
|
||||
var qp: QueuePair;
|
||||
var sockId: int;
|
||||
var bufSize: int;
|
||||
var bufNum: int; // assume the numbers of local and remote send/recv buffers are the same
|
||||
var flowCtrlBufNum: int;
|
||||
|
||||
var unusedSendBufs: seq[tTaggedBuffer];
|
||||
var remotePostedBufNum: int;
|
||||
var effectiveSendBufNum: int;
|
||||
var numRecvBeforeAck: int;
|
||||
var numRecvSinceLastAck: int;
|
||||
|
||||
var bytesToSend: tBytes;
|
||||
var bytesRecved: tBytes;
|
||||
|
||||
// pending recv
|
||||
var userWaited: machine;
|
||||
var recvedData: tBytes;
|
||||
var recvLength: int;
|
||||
|
||||
var pendingPollSendCQ: int;
|
||||
var pendingPollRecvCQ: int;
|
||||
|
||||
start state Init {
|
||||
entry (args: (sockId: int, bufSize: int, bufNum: int, numRecvBeforeAck: int)) {
|
||||
print format("socket init start {0}", this);
|
||||
|
||||
flowCtrlBufNum = (args.bufNum + args.numRecvBeforeAck - 1) / args.numRecvBeforeAck;
|
||||
qp = new QueuePair((
|
||||
maxNumSendWRs = args.bufNum + flowCtrlBufNum,
|
||||
maxNumRecvWRs = args.bufNum + flowCtrlBufNum));
|
||||
|
||||
sockId = args.sockId;
|
||||
bufSize = args.bufSize;
|
||||
bufNum = args.bufNum;
|
||||
|
||||
remotePostedBufNum = args.bufNum;
|
||||
effectiveSendBufNum = args.bufNum;
|
||||
numRecvBeforeAck = args.numRecvBeforeAck;
|
||||
numRecvSinceLastAck = 0;
|
||||
|
||||
print format("socket init done {0} with qp {1}", this, qp);
|
||||
goto BeforeConnect;
|
||||
}
|
||||
}
|
||||
|
||||
state BeforeConnect {
|
||||
entry {
|
||||
var i: int;
|
||||
print format("post {0} recv buffers in {1}", bufNum, this);
|
||||
|
||||
i = 0;
|
||||
while (i < bufNum + flowCtrlBufNum) {
|
||||
send qp, ePostRecv, (wrIdx = sockId * bufNum * 2 + i, opcode = WROpCode_INVALID, payload = InitBytes(bufSize, 0), length = bufSize, imm = 0);
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
print format("create {0} send buffers", effectiveSendBufNum);
|
||||
|
||||
i = 0;
|
||||
while (i < effectiveSendBufNum) {
|
||||
unusedSendBufs += (i, (bufIdx = sockId * bufNum * 2 + i, payload = InitBytes(bufSize, 0), length = bufSize));
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
goto WaitConnect;
|
||||
}
|
||||
}
|
||||
|
||||
state WaitConnect {
|
||||
on eConnect do (net: Network) {
|
||||
print format("{0} connected to {1}", this, net);
|
||||
send net, eConnectResp, qp;
|
||||
goto PollCQEvents;
|
||||
}
|
||||
}
|
||||
|
||||
state PollCQEvents {
|
||||
entry {
|
||||
var i: int;
|
||||
var sendBuf: tTaggedBuffer;
|
||||
|
||||
print format("{0} sizeof(bytesToSend) {1} && sizeof(unusedSendBufs) {2} && remotePostedBufNum {3}",
|
||||
this, sizeof(bytesToSend), sizeof(unusedSendBufs), remotePostedBufNum);
|
||||
|
||||
if (sizeof(bytesToSend) > 0 && remotePostedBufNum == 0) {
|
||||
print format("{0}: remote side not posted any recv buffer", this);
|
||||
}
|
||||
|
||||
if (sizeof(bytesToSend) > 0 && sizeof(unusedSendBufs) == 0) {
|
||||
print format("{0}: local side does not have send buffer", this);
|
||||
}
|
||||
|
||||
while (sizeof(bytesToSend) > 0 && sizeof(unusedSendBufs) > 0 && remotePostedBufNum > 0) {
|
||||
|
||||
remotePostedBufNum = remotePostedBufNum - 1;
|
||||
sendBuf = unusedSendBufs[0];
|
||||
unusedSendBufs -= (0);
|
||||
|
||||
i = 0;
|
||||
while (i < bufSize && sizeof(bytesToSend) > 0) {
|
||||
sendBuf.payload[i] = bytesToSend[0];
|
||||
bytesToSend -= (0);
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
send qp, ePostSend, (wrIdx = sendBuf.bufIdx, opcode = WROpCode_SEND, payload = sendBuf.payload, length = i, imm = 0);
|
||||
}
|
||||
|
||||
if (pendingPollRecvCQ < bufNum + flowCtrlBufNum) {
|
||||
i = 0;
|
||||
while (i < bufNum + flowCtrlBufNum - pendingPollRecvCQ) {
|
||||
send qp, ePollRecvCQ, this;
|
||||
pendingPollRecvCQ = pendingPollRecvCQ + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pendingPollSendCQ < bufNum) {
|
||||
i = 0;
|
||||
while (i < bufNum - pendingPollSendCQ) {
|
||||
send qp, ePollSendCQ, this;
|
||||
pendingPollSendCQ = pendingPollSendCQ + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
on ePollRecvCQReturn do (wc: tWorkComplete) {
|
||||
var i: int;
|
||||
|
||||
if (wc.status == Status_OK) {
|
||||
assert wc.opcode == WCOpCode_RECV_WITH_IMM || wc.opcode == WCOpCode_RECV;
|
||||
|
||||
if (wc.opcode == WCOpCode_RECV_WITH_IMM) {
|
||||
remotePostedBufNum = remotePostedBufNum + wc.imm;
|
||||
print format("{0} received flow control packet with imm {1}, remotePostedBufNum {2}", this, wc.imm, remotePostedBufNum);
|
||||
} else if (wc.opcode == WCOpCode_RECV) {
|
||||
i = 0;
|
||||
while (i < wc.length) {
|
||||
bytesRecved += (sizeof(bytesRecved), wc.payload[i]);
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
print format("recv cq returned, user {0} waited", userWaited);
|
||||
|
||||
if (userWaited != null) {
|
||||
print format("recvLength {0}, sizeof(recvedData) {1}, sizeof(bytesRecved) {2}",
|
||||
recvLength, sizeof(recvedData), sizeof(bytesRecved));
|
||||
|
||||
i = 0;
|
||||
while (sizeof(recvedData) < recvLength && sizeof(bytesRecved) > 0) {
|
||||
recvedData += (sizeof(recvedData), bytesRecved[0]);
|
||||
bytesRecved -= (0);
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
print format("copy recv data, copy length {0}, recvLength {1}, recvedData {2}", i, recvLength, recvedData);
|
||||
|
||||
if (sizeof(recvedData) == recvLength) {
|
||||
send userWaited, eRecvBytesResp, recvedData;
|
||||
userWaited = default(machine);
|
||||
recvedData = default(tBytes);
|
||||
recvLength = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
send qp, ePostRecv, (wrIdx = wc.wrIdx, opcode = WROpCode_INVALID, payload = wc.payload, length = bufSize, imm = 0);
|
||||
|
||||
if (wc.opcode == WCOpCode_RECV) {
|
||||
numRecvSinceLastAck = numRecvSinceLastAck + 1;
|
||||
if (numRecvSinceLastAck == numRecvBeforeAck) {
|
||||
send qp, ePostSend, (wrIdx = -1, opcode = WROpCode_SEND_WITH_IMM, payload = default(tBytes), length = 0, imm = numRecvSinceLastAck);
|
||||
numRecvSinceLastAck = 0;
|
||||
}
|
||||
}
|
||||
|
||||
assert pendingPollRecvCQ > 0;
|
||||
pendingPollRecvCQ = pendingPollRecvCQ - 1;
|
||||
send this, eNextPollCQIter;
|
||||
} else if (wc.status != Status_AGAIN) {
|
||||
assert false, "Unexpected wc status";
|
||||
}
|
||||
}
|
||||
|
||||
on ePollSendCQReturn do (wc: tWorkComplete) {
|
||||
var sendBuf: tTaggedBuffer;
|
||||
if (wc.status == Status_OK) {
|
||||
if (wc.opcode == WCOpCode_SEND) {
|
||||
if (wc.wrIdx >= 0) {
|
||||
sendBuf = (bufIdx = wc.wrIdx, payload = wc.payload, length = bufSize);
|
||||
unusedSendBufs += (sizeof(unusedSendBufs), sendBuf);
|
||||
}
|
||||
} else {
|
||||
assert false, "Unexpected wc opcode";
|
||||
}
|
||||
|
||||
assert pendingPollSendCQ > 0;
|
||||
pendingPollSendCQ = pendingPollSendCQ - 1;
|
||||
send this, eNextPollCQIter;
|
||||
} else if (wc.status != Status_AGAIN) {
|
||||
assert false, "Unexpected wc status";
|
||||
}
|
||||
}
|
||||
|
||||
on eRecvBytes do (args: (from: machine, length: int)) {
|
||||
var i: int;
|
||||
|
||||
print format("{0} requested to receive {1} bytes, sizeof(bytesRecved) {2}", args.from, args.length, sizeof(bytesRecved));
|
||||
|
||||
i = 0;
|
||||
while (i < args.length && sizeof(bytesRecved) > 0) {
|
||||
recvedData += (i, bytesRecved[0]);
|
||||
bytesRecved -= (0);
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
if (sizeof(recvedData) == args.length) {
|
||||
send args.from, eRecvBytesResp, recvedData;
|
||||
recvedData = default(tBytes);
|
||||
} else {
|
||||
userWaited = args.from;
|
||||
recvLength = args.length;
|
||||
}
|
||||
send this, eNextPollCQIter;
|
||||
}
|
||||
|
||||
on eSendBytes do (bytes: tBytes) {
|
||||
var i: int;
|
||||
i = 0;
|
||||
while (i < sizeof(bytes)) {
|
||||
bytesToSend += (sizeof(bytesToSend), bytes[i] % 256);
|
||||
i = i + 1;
|
||||
}
|
||||
send this, eNextPollCQIter;
|
||||
}
|
||||
|
||||
on eNextPollCQIter goto PollCQEvents;
|
||||
}
|
||||
}
|
||||
418
specs/RDMASocket/PTst/TestDriver.p
Normal file
418
specs/RDMASocket/PTst/TestDriver.p
Normal file
@@ -0,0 +1,418 @@
|
||||
fun ConvertToInt4Bytes(n: int): tBytes {
|
||||
var bytes: tBytes;
|
||||
var i: int;
|
||||
i = 0;
|
||||
while (i < 4) {
|
||||
bytes += (i, n % 256);
|
||||
n = n / 256;
|
||||
i = i + 1;
|
||||
}
|
||||
assert n == 0;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
fun Convert4BytesToInt(bytes: tBytes): int {
|
||||
var n: int;
|
||||
var i: int;
|
||||
var b: int;
|
||||
assert sizeof(bytes) >= 4;
|
||||
n = 0;
|
||||
i = 0;
|
||||
b = 1;
|
||||
while (i < 4) {
|
||||
n = n + bytes[i] * b;
|
||||
i = i + 1;
|
||||
b = b * 256;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
fun RecvBytes(user: machine, socket: RDMASocket, length: int): tBytes {
|
||||
var result: tBytes;
|
||||
var i: int;
|
||||
|
||||
send socket, eRecvBytes, (from = user, length = length);
|
||||
receive {
|
||||
case eRecvBytesResp: (bytes: tBytes) {
|
||||
assert sizeof(bytes) == length;
|
||||
result = bytes;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
fun RecvMessage(user: machine, socket: RDMASocket): tBytes {
|
||||
var header: tBytes;
|
||||
var message: tBytes;
|
||||
var msgLen: int;
|
||||
header = RecvBytes(user, socket, 4);
|
||||
msgLen = Convert4BytesToInt(header);
|
||||
print format("{0} try to receive message of length {1}", user, msgLen);
|
||||
message = RecvBytes(user, socket, msgLen);
|
||||
print format("{0} received message {1}", user, message);
|
||||
return message;
|
||||
}
|
||||
|
||||
fun SendMessage(user: machine, socket: RDMASocket, message: tBytes) {
|
||||
var header: tBytes;
|
||||
var response: tBytes;
|
||||
header = ConvertToInt4Bytes(sizeof(message));
|
||||
response = Append(header, message);
|
||||
send socket, eSendBytes, response;
|
||||
print format("{0} sent message {1}", user, message);
|
||||
}
|
||||
|
||||
type tSystemConfig = (
|
||||
bufSize: int,
|
||||
bufNum: int,
|
||||
numRecvBeforeAck: int,
|
||||
numIters: int,
|
||||
numSenders: int
|
||||
);
|
||||
|
||||
type tNetworkSystem = (
|
||||
sock: RDMASocket,
|
||||
peer: RDMASocket,
|
||||
net: Network
|
||||
);
|
||||
|
||||
fun CreateRDMASocketPair(user: machine, config: tSystemConfig): tNetworkSystem {
|
||||
var system: tNetworkSystem;
|
||||
system.sock = new RDMASocket((sockId = 1, bufSize = config.bufSize, bufNum = config.bufNum, numRecvBeforeAck = config.numRecvBeforeAck));
|
||||
system.peer = new RDMASocket((sockId = 2, bufSize = config.bufSize, bufNum = config.bufNum, numRecvBeforeAck = config.numRecvBeforeAck));
|
||||
system.net = new Network((sock = system.sock, peer = system.peer));
|
||||
print format("network system created {0}", system);
|
||||
send system.net, eWaitConnected, user;
|
||||
receive {
|
||||
case eWaitConnectedResp: { }
|
||||
}
|
||||
print format("network system connected {0}", system);
|
||||
return system;
|
||||
}
|
||||
|
||||
/* Ping-pong server and client */
|
||||
|
||||
machine PingPongServer {
|
||||
var socket: RDMASocket;
|
||||
|
||||
start state Init {
|
||||
entry (args: (socket: RDMASocket)) {
|
||||
print format("server init {0}", this);
|
||||
socket = args.socket;
|
||||
print format("server started {0}", this);
|
||||
goto ProcessPing;
|
||||
}
|
||||
}
|
||||
|
||||
state ProcessPing {
|
||||
entry {
|
||||
var message: tBytes;
|
||||
|
||||
message = RecvMessage(this, socket);
|
||||
if (sizeof(message) == 0) // client disconnected
|
||||
goto Stopped;
|
||||
|
||||
SendMessage(this, socket, message);
|
||||
goto ProcessPing;
|
||||
}
|
||||
}
|
||||
|
||||
state Stopped {
|
||||
entry {
|
||||
print format("{0} stopped", this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine PingPongClient {
|
||||
var config: tSystemConfig;
|
||||
var socket: RDMASocket;
|
||||
var server: PingPongServer;
|
||||
|
||||
var currIter: int;
|
||||
var message: tBytes;
|
||||
var response: tBytes;
|
||||
|
||||
start state Init {
|
||||
entry (args: (config: tSystemConfig, socket: RDMASocket, server: PingPongServer)) {
|
||||
print format("client init {0}", this);
|
||||
|
||||
config = args.config;
|
||||
socket = args.socket;
|
||||
server = args.server;
|
||||
currIter = 0;
|
||||
|
||||
print format("client init done {0}", this);
|
||||
goto SendPing;
|
||||
}
|
||||
}
|
||||
|
||||
state SendPing {
|
||||
entry {
|
||||
var msgLen: int;
|
||||
|
||||
currIter = currIter + 1;
|
||||
msgLen = choose(config.bufSize * config.bufNum * 2) + 1;
|
||||
message = InitBytes(msgLen, currIter % 256);
|
||||
print format("#{0} message {1}", currIter, message);
|
||||
|
||||
SendMessage(this, socket, message);
|
||||
goto WaitPong;
|
||||
}
|
||||
}
|
||||
|
||||
state WaitPong {
|
||||
entry {
|
||||
var i: int;
|
||||
|
||||
response = RecvMessage(this, socket);
|
||||
assert sizeof(message) == sizeof(response);
|
||||
|
||||
i = 0;
|
||||
while (i < sizeof(response)) {
|
||||
assert response[i] == message[i] && message[i] == currIter % 256;
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
if (currIter < config.numIters)
|
||||
goto SendPing;
|
||||
else
|
||||
goto Stopped;
|
||||
}
|
||||
}
|
||||
|
||||
state Stopped {
|
||||
entry {
|
||||
SendMessage(this, socket, default(tBytes)); // disconnect
|
||||
print format("{0} stopped", this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine PingPongTest {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
var system: tNetworkSystem;
|
||||
var server: PingPongServer;
|
||||
var client: PingPongClient;
|
||||
|
||||
print format("test init {0}", this);
|
||||
|
||||
config = (bufSize = 16, bufNum = 10, numRecvBeforeAck = 4, numIters = 10, numSenders = 2);
|
||||
announce eSystemConfig, (config = config,);
|
||||
|
||||
system = CreateRDMASocketPair(this, config);
|
||||
server = new PingPongServer((socket = system.peer,));
|
||||
client = new PingPongClient((config = config, socket = system.sock, server = server));
|
||||
|
||||
print format("test init done {0}", this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* One-way communication */
|
||||
|
||||
machine OneWayReceiver {
|
||||
var socket: RDMASocket;
|
||||
|
||||
start state Init {
|
||||
entry (args: (socket: RDMASocket)) {
|
||||
print format("receiver init {0}", this);
|
||||
socket = args.socket;
|
||||
print format("receiver started {0}", this);
|
||||
goto Receiving;
|
||||
}
|
||||
}
|
||||
|
||||
state Receiving {
|
||||
entry {
|
||||
var message: tBytes;
|
||||
|
||||
message = RecvMessage(this, socket);
|
||||
|
||||
if (sizeof(message) == 0) // client disconnected
|
||||
goto Stopped;
|
||||
else
|
||||
goto Receiving;
|
||||
}
|
||||
}
|
||||
|
||||
state Stopped {
|
||||
entry {
|
||||
print format("{0} stopped", this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine OneWaySender {
|
||||
var config: tSystemConfig;
|
||||
var socket: RDMASocket;
|
||||
var receiver: OneWayReceiver;
|
||||
|
||||
var currIter: int;
|
||||
var message: tBytes;
|
||||
var response: tBytes;
|
||||
|
||||
start state Init {
|
||||
entry (args: (config: tSystemConfig, socket: RDMASocket, receiver: OneWayReceiver)) {
|
||||
print format("sender init {0}", this);
|
||||
|
||||
config = args.config;
|
||||
socket = args.socket;
|
||||
receiver = args.receiver;
|
||||
currIter = 0;
|
||||
|
||||
print format("sender init done {0}", this);
|
||||
goto Sending;
|
||||
}
|
||||
}
|
||||
|
||||
state Sending {
|
||||
entry {
|
||||
var msgLen: int;
|
||||
|
||||
currIter = currIter + 1;
|
||||
msgLen = choose(config.bufSize * config.bufNum * 2) + 1;
|
||||
message = InitBytes(msgLen, currIter % 256);
|
||||
print format("#{0} message {1}", currIter, message);
|
||||
|
||||
SendMessage(this, socket, message);
|
||||
|
||||
if (currIter < config.numIters)
|
||||
goto Sending;
|
||||
else
|
||||
goto Stopped;
|
||||
}
|
||||
}
|
||||
|
||||
state Stopped {
|
||||
entry {
|
||||
SendMessage(this, socket, default(tBytes)); // disconnect
|
||||
print format("{0} stopped", this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
machine OneWayCommunication {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
var system: tNetworkSystem;
|
||||
var receiver: OneWayReceiver;
|
||||
var sender: OneWaySender;
|
||||
|
||||
config = (bufSize = 16, bufNum = 10, numRecvBeforeAck = 4, numIters = 10, numSenders = 1);
|
||||
announce eSystemConfig, (config = config,);
|
||||
|
||||
system = CreateRDMASocketPair(this, config);
|
||||
receiver = new OneWayReceiver((socket = system.peer,));
|
||||
sender = new OneWaySender((config = config, socket = system.sock, receiver = receiver));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Two-way communication */
|
||||
|
||||
event eSetPeer: (peer: TwoWaySenderReceiver);
|
||||
event eNextSendAndRecvIter;
|
||||
|
||||
machine TwoWaySenderReceiver {
|
||||
var config: tSystemConfig;
|
||||
var socket: RDMASocket;
|
||||
var peer: TwoWaySenderReceiver;
|
||||
|
||||
var currIter: int;
|
||||
var recvLen: int;
|
||||
var message: tBytes;
|
||||
var response: tBytes;
|
||||
|
||||
fun ProcessRecv (bytes: tBytes) {
|
||||
if (recvLen < 0) {
|
||||
assert sizeof(bytes) == 4;
|
||||
recvLen = Convert4BytesToInt(bytes);
|
||||
if (recvLen > 0)
|
||||
send socket, eRecvBytes, (from = this, length = recvLen);
|
||||
} else {
|
||||
assert sizeof(bytes) == recvLen;
|
||||
recvLen = -1;
|
||||
send socket, eRecvBytes, (from = this, length = 4);
|
||||
}
|
||||
}
|
||||
|
||||
start state Init {
|
||||
entry (args: (config: tSystemConfig, socket: RDMASocket)) {
|
||||
print format("two-way sender/receiver init {0}", this);
|
||||
|
||||
config = args.config;
|
||||
socket = args.socket;
|
||||
currIter = 0;
|
||||
recvLen = -1;
|
||||
|
||||
print format("two-way sender/receiver init done {0}", this);
|
||||
send socket, eRecvBytes, (from = this, length = 4);
|
||||
goto WaitPeer;
|
||||
}
|
||||
}
|
||||
|
||||
state WaitPeer {
|
||||
on eSetPeer goto SendAndRecv with (args: (peer: TwoWaySenderReceiver)) {
|
||||
peer = args.peer;
|
||||
}
|
||||
|
||||
on eRecvBytesResp do ProcessRecv;
|
||||
}
|
||||
|
||||
state SendAndRecv {
|
||||
entry {
|
||||
var msgLen: int;
|
||||
|
||||
currIter = currIter + 1;
|
||||
msgLen = choose(config.bufSize * config.bufNum * 2) + 1;
|
||||
message = InitBytes(msgLen, currIter % 256);
|
||||
print format("#{0} message {1}", currIter, message);
|
||||
|
||||
SendMessage(this, socket, message);
|
||||
|
||||
if (currIter == config.numIters)
|
||||
goto StopSend;
|
||||
else
|
||||
send this, eNextSendAndRecvIter;
|
||||
}
|
||||
|
||||
on eNextSendAndRecvIter goto SendAndRecv;
|
||||
|
||||
on eRecvBytesResp do ProcessRecv;
|
||||
}
|
||||
|
||||
state StopSend {
|
||||
entry {
|
||||
SendMessage(this, socket, default(tBytes)); // disconnect
|
||||
print format("{0} stopped sending", this);
|
||||
}
|
||||
|
||||
on eRecvBytesResp do ProcessRecv;
|
||||
}
|
||||
}
|
||||
|
||||
machine TwoWayCommunication {
|
||||
start state Init {
|
||||
entry {
|
||||
var config: tSystemConfig;
|
||||
var system: tNetworkSystem;
|
||||
var first: TwoWaySenderReceiver;
|
||||
var second: TwoWaySenderReceiver;
|
||||
|
||||
config = (bufSize = 16, bufNum = 10, numRecvBeforeAck = 4, numIters = 10, numSenders = 2);
|
||||
announce eSystemConfig, (config = config,);
|
||||
|
||||
system = CreateRDMASocketPair(this, config);
|
||||
first = new TwoWaySenderReceiver((config = config, socket = system.peer));
|
||||
second = new TwoWaySenderReceiver((config = config, socket = system.sock));
|
||||
|
||||
send first, eSetPeer, (peer = second,);
|
||||
send second, eSetPeer, (peer = first,);
|
||||
}
|
||||
}
|
||||
}
|
||||
11
specs/RDMASocket/PTst/TestScript.p
Normal file
11
specs/RDMASocket/PTst/TestScript.p
Normal file
@@ -0,0 +1,11 @@
|
||||
test tcPingPong [main = PingPongTest]:
|
||||
assert RecvComplete, NoDuplicatePostedBuffers, AllIterationsProcessed in
|
||||
(union RDMANetwork, { PingPongServer, PingPongClient, PingPongTest });
|
||||
|
||||
test tcOneWay [main = OneWayCommunication]:
|
||||
assert RecvComplete, NoDuplicatePostedBuffers, AllIterationsProcessed in
|
||||
(union RDMANetwork, { OneWayReceiver, OneWaySender, OneWayCommunication });
|
||||
|
||||
test tcTwoWay [main = TwoWayCommunication]:
|
||||
assert RecvComplete, NoDuplicatePostedBuffers, AllIterationsProcessed in
|
||||
(union RDMANetwork, { TwoWaySenderReceiver, TwoWayCommunication });
|
||||
15
specs/RDMASocket/RDMASocket.csproj
Normal file
15
specs/RDMASocket/RDMASocket.csproj
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netcoreapp3.1</TargetFramework>
|
||||
<ApplicationIcon />
|
||||
<OutputType>Exe</OutputType>
|
||||
<StartupObject />
|
||||
<LangVersion>latest</LangVersion>
|
||||
<OutputPath>POutput/</OutputPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Coyote" Version="1.0.5"/>
|
||||
<PackageReference Include="PCSharpRuntime" Version="*"/>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
10
specs/RDMASocket/RDMASocket.pproj
Normal file
10
specs/RDMASocket/RDMASocket.pproj
Normal file
@@ -0,0 +1,10 @@
|
||||
<!-- P project file for rdma socket -->
|
||||
<Project>
|
||||
<ProjectName>RDMASocket</ProjectName>
|
||||
<InputFiles>
|
||||
<PFile>./PSrc/</PFile>
|
||||
<PFile>./PSpec/</PFile>
|
||||
<PFile>./PTst/</PFile>
|
||||
</InputFiles>
|
||||
<OutputDir>./PGenerated/</OutputDir>
|
||||
</Project>
|
||||
71
specs/README.md
Normal file
71
specs/README.md
Normal file
@@ -0,0 +1,71 @@
|
||||
# P specifications
|
||||
|
||||
## Build prerequisites
|
||||
|
||||
Follow the [offcial guide](https://p-org.github.io/P/getstarted/install/) to install the [P](https://github.com/p-org/P) framework.
|
||||
|
||||
Or if `dotnet` has been installed, run the following command to store the `p` command.
|
||||
```
|
||||
dotnet tool restore
|
||||
```
|
||||
|
||||
## Run tests
|
||||
|
||||
A helper script [`RunTests.ps1`](RunTests.ps1), implemented in [PowerShell](https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell), is used to run tests and summarize the results.
|
||||
|
||||
[`DataStorage`](DataStorage) specifies the CRAQ implementation in 3FS.
|
||||
|
||||
```powershell
|
||||
PS > cd DataStorage
|
||||
PS > ..\RunTests.ps1
|
||||
|
||||
...
|
||||
|
||||
-----------------------
|
||||
Summary of test results
|
||||
-----------------------
|
||||
[02/26/2025 10:57:58] Elapsed time: 372.4s
|
||||
|
||||
test status seed schedules seconds min avg max
|
||||
---- ------ ---- --------- ------- --- --- ---
|
||||
tcOneClientWriteNoFailure[0] pass 1402445568 10 15.8 -1 -1 -1
|
||||
tcTwoClientsWriteNoFailure[0] pass 189933208 10 23.6 -1 -1 -1
|
||||
tcThreeClientsWriteNoFailure[0] pass 3060254145 10 40.7 -1 -1 -1
|
||||
tcOneClientWriteUnreliableDetector[0] pass 2016460916 10 17.7 -1 -1 -1
|
||||
tcTwoClientsWriteUnreliableDetector[0] pass 18777396 10 24.7 -1 -1 -1
|
||||
tcOneClientWriteWithFailure[0] pass 2559323541 10 15.7 -1 -1 -1
|
||||
tcTwoClientsWriteWithFailure[0] pass 1199246267 10 29.9 -1 -1 -1
|
||||
tcOneClientWriteWithFailures[0] pass 672618818 10 15.4 -1 -1 -1
|
||||
tcTwoClientsWriteWithFailures[0] pass 1908913074 10 32.3 -1 -1 -1
|
||||
tcOneClientWriteShortChainWithFailure[0] pass 3031701162 10 6.3 -1 -1 -1
|
||||
tcTwoClientsWriteShortChainWithFailures[0] pass 2907349611 10 16.6 -1 -1 -1
|
||||
tcTwoClientsWriteLongChainWithFailures[0] pass 260515276 10 67.0 -1 -1 -1
|
||||
|
||||
[02/26/2025 10:57:58] All tests passed
|
||||
```
|
||||
|
||||
[`RDMASocket`](RDMASocket) verifies the RDMA socket implementation in 3FS.
|
||||
|
||||
```powershell
|
||||
PS > cd RDMASocket
|
||||
PS > ..\RunTests.ps1
|
||||
|
||||
...
|
||||
|
||||
|
||||
-----------------------
|
||||
Summary of test results
|
||||
-----------------------
|
||||
[02/26/2025 11:19:22] Elapsed time: 40.6s
|
||||
|
||||
test status seed schedules seconds min avg max
|
||||
---- ------ ---- --------- ------- --- --- ---
|
||||
tcPingPong[0] pass 3776118231 10 9.8 -1 -1 -1
|
||||
tcOneWay[0] pass 200216558 10 3.6 -1 -1 -1
|
||||
tcTwoWay[0] pass 1923093627 10 7.1 -1 -1 -1
|
||||
|
||||
[02/26/2025 11:19:22] All tests passed
|
||||
```
|
||||
|
||||
[`Timer`](Timer) includes modified portions of the following open-source project:
|
||||
- The [original implementation](https://github.com/p-org/P/tree/master/Tutorial/Common/Timer) of `Timer` is part of [P tutorials](https://p-org.github.io/P/tutsoutline/) licensed under MIT License.
|
||||
237
specs/RunTests.ps1
Normal file
237
specs/RunTests.ps1
Normal file
@@ -0,0 +1,237 @@
|
||||
param (
|
||||
[String] $ProjectFilter = "*.pproj",
|
||||
[Alias('ms')]
|
||||
[Int] $MaxSteps = 200000,
|
||||
[Alias('i')]
|
||||
[Int] $NumIters = 10,
|
||||
[Alias('p')]
|
||||
[Int] $Parallel = 1,
|
||||
[Alias('s')]
|
||||
[Int64] $Seed = -1,
|
||||
[ValidateSet('random', 'pos', 'feedback', 'feedbackpos')]
|
||||
[Alias('sch')]
|
||||
[String] $Scheduling = "pos",
|
||||
[Alias('v')]
|
||||
[Switch] $Verbose,
|
||||
[Alias('m')]
|
||||
[String[]] $TestMethods = @(),
|
||||
[Alias('t')]
|
||||
[String] $TestFilter = ".*",
|
||||
[Alias('c')]
|
||||
[Switch] $ContinueOnFailure,
|
||||
[Alias('k')]
|
||||
[Switch] $SkipBuildProject,
|
||||
[Alias('w')]
|
||||
[Int] $StartTaskDelayMilliSecs = 100,
|
||||
[Alias('o')]
|
||||
[Int] $TimeoutSecs = 0
|
||||
);
|
||||
|
||||
|
||||
try {
|
||||
$projectPath = Get-ChildItem -Filter $ProjectFilter | Select-Object -First 1
|
||||
$projectFolder = Split-Path -Parent $projectPath.FullName
|
||||
[xml]$projectObj = Get-Content -Path $projectPath.FullName
|
||||
$projectName = $projectObj.Project.ProjectName
|
||||
} catch {
|
||||
Write-Error "Cannot load project file: $ProjectFilter"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if ($SkipBuildProject) {
|
||||
Write-Host -ForegroundColor DarkYellow "Skip building project"
|
||||
$LASTEXITCODE = 0
|
||||
} else {
|
||||
try {
|
||||
Push-Location $projectFolder
|
||||
Write-Host -ForegroundColor DarkYellow "Building project: $projectName"
|
||||
dotnet tool run p compile --pproj $projectPath.FullName
|
||||
} finally {
|
||||
Pop-Location
|
||||
}
|
||||
}
|
||||
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
exit $LASTEXITCODE
|
||||
}
|
||||
|
||||
if ($TestMethods.Count -eq 0) {
|
||||
$TestMethods = dotnet tool run p check --list-tests | Select-String -SimpleMatch tc
|
||||
Write-Host -ForegroundColor Blue "Test methods: {$TestMethods}"
|
||||
}
|
||||
|
||||
$exitCode = 0
|
||||
$testResults = @();
|
||||
$failedTasks = @();
|
||||
$startTime = Get-Date
|
||||
$outputRoot = Join-Path "PCheckerOutput" $startTime.ToString("yyyy-MM-dd_HH-mm-ss")
|
||||
|
||||
foreach ($testMethod in $TestMethods) {
|
||||
if (!($testMethod -match $TestFilter)) {
|
||||
Write-Host -ForegroundColor Blue "Skipped test: $testMethod"
|
||||
continue;
|
||||
}
|
||||
|
||||
$testParams = "--fail-on-maxsteps --max-steps $MaxSteps --schedules $NumIters --sch-$Scheduling --testcase $testMethod"
|
||||
if ($Seed -ge 0) { $testParams += " --seed $Seed" }
|
||||
if ($Verbose) { $testParams += " --verbose" }
|
||||
|
||||
Write-Host -ForegroundColor DarkYellow "Running test: $testMethod"
|
||||
|
||||
# start the test tasks
|
||||
$testStart = Get-Date
|
||||
$testTasks = @{}
|
||||
$testOutputs = @{}
|
||||
|
||||
for ($taskId = 0; $taskId -lt $Parallel; $taskId++) {
|
||||
Start-Sleep -Milliseconds $StartTaskDelayMilliSecs
|
||||
$outputPath = Join-Path $outputRoot "$testMethod" "t$taskId"
|
||||
New-Item -ItemType Directory -Force $outputPath | Out-Null
|
||||
$testOutput = New-Item (Join-Path $outputPath "test.log")
|
||||
Write-Host "Test task output: $($testOutput.FullName)"
|
||||
$testTask = Start-Process -NoNewWindow -Passthru -RedirectStandardOutput $testOutput -FilePath "dotnet" -ArgumentList "tool run p check $testParams --outdir $outputPath"
|
||||
$testTasks[$taskId] = $testTask
|
||||
$testOutputs[$taskId] = $testOutput
|
||||
}
|
||||
|
||||
while ($true) {
|
||||
$runningTasks = @()
|
||||
for ($taskId = 0; $taskId -lt $Parallel; $taskId++) {
|
||||
if (!$testTasks[$taskId].HasExited) { $runningTasks += $taskId }
|
||||
}
|
||||
|
||||
if ($runningTasks.Count -eq 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
Write-Host "[$(Get-Date)] Found $($runningTasks.Count) running test processes: $($runningTasks)"
|
||||
Start-Sleep -Seconds 10
|
||||
|
||||
if ($TimeoutSecs -gt 0) {
|
||||
$elapsedSecs = (New-TimeSpan -Start $testStart -End (Get-Date)).TotalSeconds
|
||||
if ($elapsedSecs -ge $TimeoutSecs) {
|
||||
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] Elapsed time ($($elapsedSecs.ToString("#.#"))s) exceeds timeout ($($timeoutSecs)s), stopping tests..."
|
||||
for ($taskId = 0; $taskId -lt $Parallel; $taskId++) {
|
||||
Stop-Process -Force $testTasks[$taskId]
|
||||
$testTasks[$taskId].WaitForExit()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$elapsedSecs = (New-TimeSpan -Start $testStart -End (Get-Date)).TotalSeconds
|
||||
Write-Host "[$(Get-Date)] Test processes stopped after running for $($elapsedSecs.ToString("#.#"))s"
|
||||
|
||||
# print outputs of test processes
|
||||
for ($taskId = 0; $taskId -lt $Parallel; $taskId++) {
|
||||
if ($Verbose) {
|
||||
Copy-Item -Verbose -Force $testOutputs[$taskId] ($testMethod + ".$taskId.txt")
|
||||
}
|
||||
|
||||
$numSchedPoints = Get-Content $testOutputs[$taskId] | Select-String -Pattern "(\d+) \(min\), (\d+) \(avg\), (\d+) \(max\)" |
|
||||
Foreach-Object {
|
||||
$min, $avg, $max = $_.Matches[0].Groups[1..3].Value
|
||||
[PSCustomObject] @{
|
||||
min = $min
|
||||
avg = $avg
|
||||
max = $max
|
||||
}
|
||||
} | Select-Object -Last 1
|
||||
|
||||
$taskName = "$testMethod[$taskId]"
|
||||
|
||||
$foundBug = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Checker found a bug" |
|
||||
Foreach-Object { ($_.Matches[0].Groups[0].Value) } | Select-Object -First 1
|
||||
|
||||
$testStatus = if ($null -eq $foundBug) { "pass" } else { "fail" }
|
||||
|
||||
$testSeed = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Checker is using '[A-Za-z]+' strategy \(seed:(\d+)\)" |
|
||||
Foreach-Object { [Int64]($_.Matches[0].Groups[1].Value) } | Select-Object -First 1
|
||||
|
||||
$testSchedules = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Explored ([\d.]+) schedules" |
|
||||
Foreach-Object { [Int64]($_.Matches[0].Groups[1].Value) } | Select-Object -First 1
|
||||
|
||||
$testSeconds = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Elapsed ([\d.]+) sec" |
|
||||
Foreach-Object { [Double]($_.Matches[0].Groups[1].Value) } | Select-Object -First 1
|
||||
|
||||
$testResults += [PSCustomObject] @{
|
||||
test = $taskName
|
||||
status = $testStatus
|
||||
seed = $testSeed
|
||||
schedules = $testSchedules
|
||||
seconds = $testSeconds.ToString("0.0")
|
||||
min = if ($null -ne $numSchedPoints) { $numSchedPoints.min } else { -1 }
|
||||
avg = if ($null -ne $numSchedPoints) { $numSchedPoints.avg } else { -1 }
|
||||
max = if ($null -ne $numSchedPoints) { $numSchedPoints.max } else { -1 }
|
||||
};
|
||||
|
||||
if ($null -ne $foundBug) {
|
||||
Write-Host -ForegroundColor DarkYellow "Test process output #${taskId}/${Parallel}:"
|
||||
Get-Content $testOutputs[$taskId]
|
||||
|
||||
$checkerOutputPath = Get-Content $testOutputs[$taskId] | Select-String -Pattern "Writing (.*)" |
|
||||
Foreach-Object { ($_.Matches[0].Groups[1].Value) } | Select-String -SimpleMatch -Pattern ".txt" | Select-Object -First 1
|
||||
|
||||
# filter key messages from output
|
||||
$checkerOutputFile = Get-Item $checkerOutputPath
|
||||
$debugLogFile = (Join-Path $checkerOutputFile.DirectoryName $checkerOutputFile.BaseName) + ".$testMethod" + ".csv"
|
||||
$logPattern = @(
|
||||
"sent event 'eWriteWork with payload", "sent event 'eWriteWorkDone with payload",
|
||||
"sent event 'eCommitWork with payload", "sent event 'eCommitWorkDone with payload",
|
||||
"sent event 'eWriteReq with payload", "sent event 'eWriteResp with payload",
|
||||
"sent event 'eUpdateMsg with payload", "sent event 'eCommitMsg with payload",
|
||||
"sent event 'eGetTargetSyncInfoResult with payload",
|
||||
"sent event 'eSyncStartResp with payload",
|
||||
"sent event 'eSyncDoneResp with payload",
|
||||
"sent event 'eHaltReq with payload",
|
||||
"sent event 'eShutDown with payload",
|
||||
"sent event 'eRestart with payload",
|
||||
# "dequeued event 'eUpdateTargetStateMsg with payload",
|
||||
"dequeued event 'eNewRoutingInfo with payload",
|
||||
"set its targets offline",
|
||||
"replication chain updated",
|
||||
"start write process",
|
||||
"updatesOfChunkReplica",
|
||||
# "aliveStorageServices",
|
||||
"<ErrorLog>"
|
||||
)
|
||||
Select-String -Path $checkerOutputFile -SimpleMatch -CaseSensitive -Pattern $logPattern |
|
||||
Select-Object -Property 'Line' -First 100000 |
|
||||
Out-File -Width 10000 -Encoding utf8 $debugLogFile
|
||||
Write-Host -ForegroundColor DarkYellow "Debug log: $debugLogFile"
|
||||
# save reprod script to file
|
||||
$outputPath = Join-Path $outputRoot "$testMethod" "t$taskId"
|
||||
$reprodCmdstr = "dotnet tool run p check $testParams -s $testSeed --outdir $outputPath"
|
||||
$reprodScriptFile = (Join-Path $checkerOutputFile.DirectoryName $checkerOutputFile.BaseName) + ".$testMethod" + ".ps1"
|
||||
Set-Content -Path $reprodScriptFile -Value $reprodCmdstr
|
||||
Write-Host -ForegroundColor DarkYellow "Reprod script: $reprodScriptFile"
|
||||
Write-Host -ForegroundColor DarkYellow "Reprod command: $reprodCmdstr"
|
||||
|
||||
# set exit code to indicate the failure
|
||||
$exitCode = -1
|
||||
$failedTasks += $taskName
|
||||
}
|
||||
}
|
||||
|
||||
if (($exitCode -ne 0) -and (-not $ContinueOnFailure)) {
|
||||
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] Test $testMethod failed, stopping..."
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$elapsedTime = New-TimeSpan -Start $startTime -End (Get-Date)
|
||||
Write-Host -ForegroundColor DarkYellow "-----------------------"
|
||||
Write-Host -ForegroundColor DarkYellow "Summary of test results"
|
||||
Write-Host -ForegroundColor DarkYellow "-----------------------"
|
||||
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] Elapsed time: $($elapsedTime.TotalSeconds.ToString(`"#.#`"))s"
|
||||
|
||||
Format-Table -AutoSize -InputObject $testResults
|
||||
$testResults | Export-Csv -NoTypeInformation -Path (Join-Path $outputRoot "test_results.csv")
|
||||
|
||||
if ($exitCode -eq 0) {
|
||||
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] All tests passed"
|
||||
} else {
|
||||
Write-Host -ForegroundColor DarkYellow "[$(Get-Date)] Failed test tasks: $failedTasks"
|
||||
}
|
||||
|
||||
exit $exitCode
|
||||
20
specs/Timer/LICENSE.txt
Normal file
20
specs/Timer/LICENSE.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
The MIT License
|
||||
|
||||
Copyright (c) 2015 P Developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
64
specs/Timer/PSrc/Timer.p
Normal file
64
specs/Timer/PSrc/Timer.p
Normal file
@@ -0,0 +1,64 @@
|
||||
/*****************************************************************************************
|
||||
The timer state machine models the non-deterministic behavior of an OS timer
|
||||
******************************************************************************************/
|
||||
machine Timer
|
||||
{
|
||||
// user of the timer
|
||||
var client: machine;
|
||||
start state Init {
|
||||
entry (_client : machine){
|
||||
client = _client;
|
||||
goto WaitForTimerRequests;
|
||||
}
|
||||
}
|
||||
|
||||
state WaitForTimerRequests {
|
||||
on eStartTimer goto TimerStarted;
|
||||
ignore eCancelTimer, eDelayedTimeOut;
|
||||
}
|
||||
|
||||
state TimerStarted {
|
||||
defer eStartTimer;
|
||||
entry {
|
||||
if($)
|
||||
{
|
||||
send client, eTimeOut;
|
||||
goto WaitForTimerRequests;
|
||||
}
|
||||
else
|
||||
{
|
||||
send this, eDelayedTimeOut;
|
||||
}
|
||||
}
|
||||
on eDelayedTimeOut goto TimerStarted;
|
||||
on eCancelTimer goto WaitForTimerRequests;
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************
|
||||
Events used to interact with the timer machine
|
||||
************************************************/
|
||||
event eStartTimer;
|
||||
event eCancelTimer;
|
||||
event eTimeOut;
|
||||
event eDelayedTimeOut;
|
||||
/************************************************
|
||||
Functions or API's to interact with the OS Timer
|
||||
*************************************************/
|
||||
// create timer
|
||||
fun CreateTimer(client: machine) : Timer
|
||||
{
|
||||
return new Timer(client);
|
||||
}
|
||||
|
||||
// start timer
|
||||
fun StartTimer(timer: Timer)
|
||||
{
|
||||
send timer, eStartTimer;
|
||||
}
|
||||
|
||||
// cancel timer
|
||||
fun CancelTimer(timer: Timer)
|
||||
{
|
||||
send timer, eCancelTimer;
|
||||
}
|
||||
2
specs/Timer/PSrc/TimerModules.p
Normal file
2
specs/Timer/PSrc/TimerModules.p
Normal file
@@ -0,0 +1,2 @@
|
||||
/* Create the timer module which consists of only the timer machine */
|
||||
module Timer = { Timer };
|
||||
6
specs/Timer/README.md
Normal file
6
specs/Timer/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
# Timer
|
||||
|
||||
This project includes modified portions of the following open-source project:
|
||||
|
||||
- [**P**](https://github.com/p-org/P) Licensed under the [MIT License](LICENSE.txt).
|
||||
- Source: the original implementation of [`Timer`](https://github.com/p-org/P/tree/master/Tutorial/Common/Timer).
|
||||
8
specs/Timer/Timer.pproj
Normal file
8
specs/Timer/Timer.pproj
Normal file
@@ -0,0 +1,8 @@
|
||||
<!-- P Project file for the Timer Module-->
|
||||
<Project>
|
||||
<ProjectName>Timer</ProjectName>
|
||||
<InputFiles>
|
||||
<PFile>./PSrc/</PFile>
|
||||
</InputFiles>
|
||||
<OutputDir>./PGenerated/</OutputDir>
|
||||
</Project>
|
||||
Reference in New Issue
Block a user