# Field Transfer Threads Performance

## Import 

In [None]:
from Muscat.Containers.MeshCreationTools import CreateCube
from Muscat.FE.Fields.FEField import FEField
from Muscat.Containers.ConstantRectilinearMeshTools import CreateConstantRectilinearMesh
from Muscat.LinAlg.Transform import Transform
from Muscat.Helpers.TextFormatHelper import TFormat
from Muscat.Containers.NativeTransfer import NativeTransfer
import numpy as np
import time
import pyvista
pyvista.global_theme._jupyter_backend = 'panel' # remove this line to get interactive 3D plots


## Mesh and Target Points Generation

In [None]:
N = 50
inputmesh = CreateCube(dimensions=[N, N, N], origin=[-1.0] * 3, spacing=[2 / (N - 1), 2 / (N - 1), 2 / (N - 1)])
inputFEField = FEField(name="3DTo3DNative", mesh=inputmesh)

N = 60
outmesh = CreateConstantRectilinearMesh(dimensions=[N, N, N], origin=[-1.0] * 3, spacing=[2 / (N - 1), 2 / (N - 1), 2 / (N - 1)])
print("Input mesh:")
print(inputmesh)
print("Output mesh:")
print(outmesh)


## Deform output mesh 

Make the problem more realistic

In [None]:
op = Transform()
op.keepNormalized = False
op.keepOrthogonal = False
op.SetFirst([1.4, 0.56, 0])
op.SetSecond([-1.135, 1.42486, 1.6102])

outmesh.nodes = np.ascontiguousarray(op.ApplyTransform(outmesh.nodes))


## Generate Data

In [None]:
x = inputmesh.nodes[:, 0]
y = inputmesh.nodes[:, 1]
data = (x - 0.5) ** 2 - y * 0.5 + x * y * 0.25


## Start transfer timing...

In [None]:
print(f"Number of elements in the origin mesh = {inputmesh.GetNumberOfElements()}")
print(f"Number of points in the target cloud point = {outmesh.GetNumberOfNodes()}")


## Some utilities 

In [None]:
def PrintRow(datarow):
 res = "|"
 for d in datarow:
 if type(d) is str:
 res += TFormat.Center(str(d), fill=" ", width=20) + "|"
 else:
 res += TFormat.Center("%.4f" % d, fill=" ", width=20) + "|"
 print(res)


## Initialization

In [None]:
setFieldTime = time.time()
nt = NativeTransfer()
nt.SetVerbose(False)
searchStrategies = ((True,False,False,False),
 (False,True,False,False),
 (False,True,True,False),
 (False,False,False,True),
 (True,True,False,True))

for ss in searchStrategies:
 nt.SetUsePointSearch(ss[0])
 nt.SetUseElementSearch(ss[1])
 nt.SetUseElementSearchFast(ss[2])
 nt.SetUseEdgeSearch(ss[3])
 setFieldTime = time.time()
 nt.SetSourceFEField(inputFEField)
 st = time.time() - setFieldTime
 print(f"Set Up time (SetSourceFEField) for {ss} : {st} [s]")


## Threads Scalability

In [None]:
nt.SetUsePointSearch(True)
nt.SetUseElementSearch(False)
nt.SetUseElementSearchFast(False)
nt.SetUseEdgeSearch(False)
nt.SetSourceFEField(inputFEField)


In [None]:
from Muscat.Helpers.CPU import GetNumberOfAvailableCores
print(f"GetNumberOfAvailableCores: {GetNumberOfAvailableCores()}")
nbCores = (np.arange(int(np.ceil(np.sqrt(GetNumberOfAvailableCores()))))**2+1)
nbCores[-1] = GetNumberOfAvailableCores()


In [None]:
print(f"Set Up time (SetSourceFEField) {st} [s]")
print("_"*(8*20+9))
output = ["method"]
output.extend(f"cpp [s] {th} thread " for th in nbCores)
output.extend(f"speedup {th}/1 " for th in nbCores[1:])
PrintRow(output)
for method in ["Interp/Nearest", "Nearest/Nearest", "Interp/Clamp", "Interp/Extrap"]:
 nt.SetTransferMethod(method)
 nt.SetTargetPoints(outmesh.nodes)
 times = []
 for nbThreads in nbCores:
 nt.SetMaxConcurrency(nbThreads)
 cppStartTime = time.time()
 nt.Compute()
 cppStopTime = time.time()
 op = nt.GetOperator()
 times.append(cppStopTime - cppStartTime)

 output = [method]
 output.extend(times)
 output.extend([times[0] / (t if t > 0 else 1) for t in times[1:]])

 PrintRow(output)
