Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F97183522
Dataloader.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Jan 3, 06:07
Size
9 KB
Mime Type
text/x-python
Expires
Sun, Jan 5, 06:07 (2 d)
Engine
blob
Format
Raw Data
Handle
23348453
Attached To
R13225 LPBF Acoustic Dynamics of in-situ alloying of Titanium-Fe
Dataloader.py
View Options
# -*- coding: utf-8 -*-
"""
@author: srpv
contact: vigneashwara.solairajapandiyan@empa.ch, vigneashpandiyan@gmail.com
The codes in this following script will be used for the publication of the following work
"Acoustic emission signature of martensitic transformation in Laser Powder Bed Fusion of Ti6Al4V-Fe, supported by operando X-ray diffraction"
@any reuse of this code should be authorized by the first owner, code author
"""
# libraries to import
from
torch.utils.data
import
DataLoader
,
Dataset
import
numpy
as
np
import
random
import
os
import
pandas
as
pd
from
torchvision
import
transforms
from
sklearn.model_selection
import
train_test_split
# implementing train-test-split
import
matplotlib.pyplot
as
plt
class
contraster_dataset
(
Dataset
):
"""
Dataset class for contrastive loss.
Args:
df (pandas.DataFrame): The input dataframe containing image data and labels.
train (bool): Indicates whether the dataset is for training or not.
transform (callable, optional): Optional transform to be applied on a sample.
Returns:
tuple: A tuple containing the anchor image, positive image, anchor label, and label.
- anchor_img (numpy.ndarray): The anchor image.
- positive_img (numpy.ndarray): The positive image.
- anchor_label (int): The anchor label.
- label (int): The label.
"""
def
__init__
(
self
,
df
,
train
,
transform
=
None
):
self
.
is_train
=
train
self
.
transform
=
transform
# self.to_pil = transforms.ToPILImage()
if
self
.
is_train
:
self
.
images
=
df
.
iloc
[:,
1
:]
.
values
.
astype
(
np
.
uint8
)
self
.
labels
=
df
.
iloc
[:,
0
]
.
values
self
.
index
=
df
.
index
.
values
else
:
self
.
images
=
df
.
iloc
[:,
1
:]
.
values
.
astype
(
np
.
uint8
)
self
.
labels
=
df
.
iloc
[:,
0
]
.
values
self
.
index
=
df
.
index
.
values
def
__len__
(
self
):
return
len
(
self
.
images
)
def
__getitem__
(
self
,
item
):
#anchor_img = self.images[item].reshape(28, 28, 1)
anchor_img
=
self
.
images
[
item
]
# print(item)
should_get_same_class
=
random
.
randint
(
0
,
1
)
if
self
.
is_train
:
if
should_get_same_class
:
label
=
self
.
labels
[
item
]
anchor_label
=
self
.
labels
[
item
]
# print(anchor_label)
positive_list
=
self
.
index
[
self
.
index
!=
item
][
self
.
labels
[
self
.
index
!=
item
]
==
anchor_label
]
positive_item
=
random
.
choice
(
positive_list
)
positive_img
=
self
.
images
[
positive_item
]
anchor_label
=
1
return
anchor_img
,
positive_img
,
anchor_label
,
label
else
:
anchor_label
=
self
.
labels
[
item
]
label
=
self
.
labels
[
item
]
# print(anchor_label)
negative_list
=
self
.
index
[
self
.
index
!=
item
][
self
.
labels
[
self
.
index
!=
item
]
!=
anchor_label
]
negative_item
=
random
.
choice
(
negative_list
)
#negative_img = self.images[negative_item].reshape(28, 28, 1)
negative_img
=
self
.
images
[
negative_item
]
anchor_label
=
0
return
anchor_img
,
negative_img
,
anchor_label
,
label
else
:
# if self.transform:
# anchor_img = self.transform(self.to_pil(anchor_img))
label
=
self
.
labels
[
item
]
return
anchor_img
,
label
def
dataprocessing
(
df
):
"""
Preprocesses the input dataframe by standardizing its values.
Args:
df (pandas.DataFrame): The input dataframe.
Returns:
pandas.DataFrame: The preprocessed dataframe with standardized values.
"""
database
=
df
print
(
database
.
shape
)
database
=
database
.
apply
(
lambda
x
:
(
x
-
np
.
mean
(
x
))
/
np
.
std
(
x
),
axis
=
1
)
# anomaly_database=anomaly_database.to_numpy().astype(np.float64)
return
database
def
data_extract
(
datapath
,
Exptype
):
"""
Extracts data from the given datapath based on the experiment type.
Parameters:
datapath (str): The path to the data directory.
Exptype (str): The experiment type.
Returns:
train_df (DataFrame): The training data as a pandas DataFrame.
test_df (DataFrame): The testing data as a pandas DataFrame.
"""
classfile
=
Exptype
+
'_Class_label.npy'
classfile
=
os
.
path
.
join
(
datapath
,
classfile
)
rawfile
=
Exptype
+
'_Rawspace.npy'
rawfile
=
os
.
path
.
join
(
datapath
,
rawfile
)
classspace
=
np
.
load
(
classfile
)
.
astype
(
np
.
int64
)
rawspace
=
np
.
load
(
rawfile
)
.
astype
(
np
.
float64
)
rawspace
=
pd
.
DataFrame
(
rawspace
)
rawspace
=
dataprocessing
(
rawspace
)
rawspace
=
rawspace
.
to_numpy
()
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
rawspace
,
classspace
,
test_size
=
0.20
,
random_state
=
66
)
Training
=
np
.
concatenate
((
y_train
,
X_train
),
axis
=
1
)
Testing
=
np
.
concatenate
((
y_test
,
X_test
),
axis
=
1
)
train_df
=
pd
.
DataFrame
(
Training
)
test_df
=
pd
.
DataFrame
(
Testing
)
train_df
.
head
()
return
train_df
,
test_df
def
data_plot
(
datapath
,
Exptype
,
folder_created
):
"""
Plots the data for visualization.
Args:
datapath (str): The path to the data files.
Exptype (str): The experiment type.
folder_created (str): The path to the folder where the graph will be saved.
Returns:
None
"""
classfile
=
Exptype
+
'_Class_label.npy'
classfile
=
os
.
path
.
join
(
datapath
,
classfile
)
rawfile
=
Exptype
+
'_Rawspace.npy'
rawfile
=
os
.
path
.
join
(
datapath
,
rawfile
)
classspace
=
np
.
load
(
classfile
)
.
astype
(
np
.
int64
)
rawspace
=
np
.
load
(
rawfile
)
.
astype
(
np
.
float64
)
rawspace
=
pd
.
DataFrame
(
rawspace
)
rawspace
=
dataprocessing
(
rawspace
)
rawspace
=
rawspace
.
to_numpy
()
rawspace
=
pd
.
DataFrame
(
rawspace
)
classspace
=
pd
.
DataFrame
(
classspace
)
data
=
pd
.
concat
([
rawspace
,
classspace
],
axis
=
1
)
new_columns
=
list
(
data
.
columns
)
new_columns
[
-
1
]
=
'target'
data
.
columns
=
new_columns
data
.
target
.
value_counts
()
data
=
data
.
sample
(
frac
=
1.0
)
class_names
=
[
'Ti64'
,
'Ti64_3Fe'
,
'Ti64_6Fe'
]
colour
=
[
'green'
,
'red'
,
'blue'
,
'cyan'
,
'orange'
,
'purple'
]
graphname
=
Exptype
+
'_data'
+
'_Visualize'
+
'.png'
classes
=
data
.
target
.
unique
()
classes
=
np
.
sort
(
classes
)
fig
,
axs
=
plt
.
subplots
(
nrows
=
3
,
ncols
=
1
,
sharey
=
False
,
figsize
=
(
8
,
7
),
dpi
=
800
)
for
i
,
cls
in
enumerate
(
classes
):
ax
=
axs
.
flat
[
i
]
df
=
data
[
data
.
target
==
cls
]
.
drop
(
labels
=
'target'
,
axis
=
1
)
.
mean
(
axis
=
0
)
.
to_numpy
()
plot_time_series
(
df
,
class_names
[
i
],
ax
,
colour
[
i
],
i
)
fig
.
tight_layout
()
plt
.
savefig
(
os
.
path
.
join
(
folder_created
,
graphname
),
bbox_inches
=
'tight'
,
pad_inches
=
0.1
,
dpi
=
800
)
plt
.
show
()
plt
.
clf
()
def
plot_time_series
(
data
,
class_name
,
ax
,
colour
,
i
,
n_steps
=
10
):
"""
Plots a time series data with rolling mean and standard deviation.
Args:
data (list or numpy array): The time series data to be plotted.
class_name (str): The name of the class.
ax (matplotlib.axes.Axes): The axes object to plot the data on.
colour (str): The color of the plot.
i (int): The index of the plot.
n_steps (int, optional): The number of steps for rolling mean and standard deviation. Defaults to 10.
Returns:
None
"""
time_series_df
=
pd
.
DataFrame
(
data
)
smooth_path
=
time_series_df
.
rolling
(
n_steps
)
.
mean
()
path_deviation
=
3
*
time_series_df
.
rolling
(
n_steps
)
.
std
()
under_line
=
(
smooth_path
-
path_deviation
)[
0
]
over_line
=
(
smooth_path
+
path_deviation
)[
0
]
ax
.
plot
(
smooth_path
,
color
=
colour
,
linewidth
=
3
)
ax
.
fill_between
(
path_deviation
.
index
,
under_line
,
over_line
,
alpha
=.
450
)
ax
.
set_title
(
class_name
)
ax
.
set_ylim
([
-
0.2
,
0.2
])
ax
.
set_ylabel
(
'Amplitude (V)'
)
ax
.
set_xlabel
(
'Window size (μs)'
)
def
torch_loader
(
batch_size
,
train_df
,
test_df
):
"""
Loads and returns the train and test data loaders for the contrastive loss model.
Parameters:
batch_size (int): The batch size for the data loaders.
train_df (pandas.DataFrame): The training data as a pandas DataFrame.
test_df (pandas.DataFrame): The test data as a pandas DataFrame.
Returns:
train_loader (torch.utils.data.DataLoader): The data loader for the training data.
test_loader (torch.utils.data.DataLoader): The data loader for the test data.
"""
train_ds
=
contraster_dataset
(
train_df
,
train
=
True
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
()
]))
train_loader
=
DataLoader
(
train_ds
,
batch_size
=
batch_size
,
shuffle
=
True
,
num_workers
=
0
)
test_ds
=
contraster_dataset
(
test_df
,
train
=
False
,
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
()
]))
test_loader
=
DataLoader
(
test_ds
,
batch_size
=
batch_size
,
shuffle
=
False
,
num_workers
=
0
)
return
train_loader
,
test_loader
Event Timeline
Log In to Comment