diff --git a/subject2-pre/.keep b/subject2-pre/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/subject2-pre/Figs/.keep b/subject2-pre/Figs/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/subject2-pre/Figs/cpu.png b/subject2-pre/Figs/cpu.png
new file mode 100644
index 0000000000000000000000000000000000000000..b8dcb9f6576e33103eca4eb7e75b05d387d174de
Binary files /dev/null and b/subject2-pre/Figs/cpu.png differ
diff --git a/subject2-pre/Figs/framework.png b/subject2-pre/Figs/framework.png
new file mode 100644
index 0000000000000000000000000000000000000000..e4195d861c5474608eec34808519b4da09e6bbf5
Binary files /dev/null and b/subject2-pre/Figs/framework.png differ
diff --git a/subject2-pre/Figs/mem.png b/subject2-pre/Figs/mem.png
new file mode 100644
index 0000000000000000000000000000000000000000..3d4272efc1f3b2ea8db5c0d11579cd037f1429b5
Binary files /dev/null and b/subject2-pre/Figs/mem.png differ
diff --git a/subject2-pre/Figs/ourgan.png b/subject2-pre/Figs/ourgan.png
new file mode 100644
index 0000000000000000000000000000000000000000..d1507144287c349fbbfa28fac3c65e8c5b76663a
Binary files /dev/null and b/subject2-pre/Figs/ourgan.png differ
diff --git a/subject2-pre/README.md b/subject2-pre/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e6c61b96726b95689d224a51dd1a12d1c6d575de
--- /dev/null
+++ b/subject2-pre/README.md
@@ -0,0 +1,88 @@
+# JC-workload-predictor
+
+
+
+This repository provides the implementation of the paper "Joint Privacy-Preserving Cloud Workload Prediction Based on Federated Learning", which is published in XXX.
+In this paper, we propose a joint privacy-preserving cloud workload prediction framework based on Federated Learning (FL), which enables the collaborative training of a workload prediction model across cloud providers with heterogeneous workloads and model preferences without exposing private workload data.
+we first design a Generative Adversarial Network (GAN) based virtual workload dataset generation method that incorporates workloads' temporal and resource-usage-plan-related features via temporal-aware feature calibration.
+Then, we design a FL approach based on a hybrid local model composed of a homogeneous public model and a heterogeneous personal model.
+Finally, we design a Knowledge Distillation (KD) based approach to post-train the personal model with both private workload knowledge and shared workload knowledge learned from the trained global public model.
+Extensive experiments on various real-world workloads demonstrate that compared with the state-of-the-art, our framework improves workload prediction accuracy by 45.5\% in average over all cloud providers.
+
+
+
+  |
+  |
+
+
+ | MSEs of memory utilization prediction results (lower is better). |
+ MSEs of CPU utilization prediction results (lower is better). |
+
+
+
+Our framework consists of the following three key components:
+
+
+
+
+
+
+## 1. Virtual Workload Dataset Generation
+
+
+
+
+
+
+
+The code in the folder [code_generate_data](https://gitee.com/jointcloud_computing/joint-cloud-computing/tree/subject2/subject2-pre/all-process/generate_data) is the code to generate virtual data, including GAN, VHL and our TVHL.
+The input is the path of the dataset.
+
+
+
+
+## 2. Federated Learning with Virtual Workload Datasets
+
+
+FedAvg was used for training, using different datasets.
+
+Folder [code_fedavg](https://gitee.com/jointcloud_computing/joint-cloud-computing/tree/subject2/subject2-pre/all-process/fedavg) is using the actual dataset.
+
+Folder [code_ourfl-nokd](https://gitee.com/jointcloud_computing/joint-cloud-computing/tree/subject2/subject2-pre/all-process/ourfl-nokd) is the use of virtual generated dataset.
+
+
+
+## 3. Post Training of Personal Models.
+
+
+The code in the folde [code_ourfl_kd](https://gitee.com/jointcloud_computing/joint-cloud-computing/tree/subject2/subject2-pre/all-process/ourfl_kd) shows the training of the individual model after the local public model is trained.
+
+
+
+## Prerequisites
+
+To run the code, it needs some libraies:
+
+- Python >= 3.7
+- Pytorch >= 2.0.1
+- torchvision >= 0.15.2
+
+Our environment is shown in the file, named [environment.yaml](https://gitee.com/jointcloud_computing/joint-cloud-computing/edit/subject2/subject2-pre/environment.yaml).
+
+## Citing
+
+
+
+If you use this repository, please cite:
+```bibtex
+@article{,
+ title={},
+ author={Bin yang and Yan, Li},
+ journal={},
+ volume={},
+ year={},
+ publisher={}
+}
+```
+
+
diff --git a/subject2-pre/all-process/.keep b/subject2-pre/all-process/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/subject2-pre/all-process/fedavg/.keep b/subject2-pre/all-process/fedavg/.keep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/subject2-pre/all-process/fedavg/fed_gan.py b/subject2-pre/all-process/fedavg/fed_gan.py
new file mode 100644
index 0000000000000000000000000000000000000000..abfa9cb3789aa4ec951a5b7cf2ba41b6b2e4cf59
--- /dev/null
+++ b/subject2-pre/all-process/fedavg/fed_gan.py
@@ -0,0 +1,228 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import MinMaxScaler
+import torch.optim as optim # 添加导入torch.optim模块
+
+
+# 定义配置类
+class Config():
+ timestep = 4
+ batch_size = 32
+ feature_size = 1
+ hidden_size = 256
+ output_size = 1
+ num_layers = 2
+ epochs = 4
+ learning_rate = 0.0005
+ model_name = 'gru'
+ save_path = './{}.pth'.format(model_name)
+
+config = Config()
+
+# 定义GRU模型
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size
+ self.num_layers = num_layers
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ output, h_0 = self.gru(x, h_0)
+
+ batch_size, timestep, hidden_size = output.shape
+
+ output = output.reshape(-1, hidden_size)
+
+ output = self.fc(output)
+
+ output = output.reshape(batch_size, timestep, -1)
+
+ return output[:, -1, :]
+
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = []
+ dataY = []
+
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0])
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ train_size = int(np.round(0.8 * dataX.shape[0]))
+
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+
+data1 = pd.read_csv(r'F:\ourfl\genedate\GAN\gan_alibaba.csv')
+data2 = pd.read_csv(r'F:\ourfl\genedate\GAN\gan_google.csv')
+data3 = pd.read_csv(r'F:\ourfl\genedate\GAN\gan_mircosoft.csv')
+data4 = pd.read_csv(r'F:\ourfl\genedate\GAN\gan_sugan1.csv')
+
+# 提取avg_mem列
+df1 = data1[['mem_use']]
+df2 = data2[['mem_use']]
+df3 = data3[['mem_use']]
+df4 = data4[['mem_use']]
+# 标准化数据
+scaler = MinMaxScaler(feature_range=(0, 1))
+data1 = scaler.fit_transform(df1.values)
+data2 = scaler.fit_transform(df2.values)
+data3 = scaler.fit_transform(df3.values)
+data4 = scaler.fit_transform(df4.values)
+
+# 定义时间步数和特征数
+timestep = 4 # 您可以根据实际情况调整时间步数
+feature_size = 1 # 由于您已经提取了"mem_use"列作为特征,所以特征数为1
+
+# 划分数据集
+x_train1, y_train1, x_test1, y_test1 = split_data(data1, timestep, feature_size)
+x_train2, y_train2, x_test2, y_test2 = split_data(data2, timestep, feature_size)
+x_train3, y_train3, x_test3, y_test3 = split_data(data3, timestep, feature_size)
+x_train4, y_train4, x_test4, y_test4 = split_data(data4, timestep, feature_size)
+
+
+class ClientGRU(nn.Module):
+ def __init__(self, config):
+ super(ClientGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers, batch_first=True)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output[:, -1, :]) # 调整输出维度
+ return output
+
+
+
+ #定义全局GRU模型
+class GlobalGRU(nn.Module):
+ def __init__(self, config):
+ super(GlobalGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ # 自定义GlobalGRU类的前向传播过程,使用GRU和Linear层
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output)
+ return output, hidden
+
+
+
+
+
+config = Config()
+global_model = GlobalGRU(config)
+client_models = [ClientGRU(config) for _ in range(4)]
+
+
+
+# 定义客户端训练函数
+def client_train(client_model, data, global_model_parameters, config):
+ # 将数据转换为PyTorch张量
+ x_train, y_train, _, _ = data
+ x_train = torch.tensor(x_train, dtype=torch.float32)
+ y_train = torch.tensor(y_train, dtype=torch.float32)
+ # 设置优化器和损失函数
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+ # 设置初始隐藏状态(可根据需要调整)
+ hidden = None
+ # 将全局模型参数加载到客户端模型中
+ client_model.load_state_dict(global_model_parameters)
+ # 进行本地训练
+ for epoch in range(config.epochs):
+ optimizer.zero_grad()
+ # 前向传播
+ output, hidden = client_model(x_train, hidden)
+ loss = criterion(output, y_train)
+ # 反向传播和参数更新
+ loss.backward()
+ optimizer.step()
+ # 返回更新后的客户端模型参数
+ return client_model.state_dict()
+
+
+
+# 定义全局模型参数平均函数
+def global_average_parameters(client_parameters_list):
+ num_parameters = len(list(client_parameters_list[0].values()))
+ sum_parameters = [torch.zeros_like(param) for param in list(client_parameters_list[0].values())]
+
+ for client_parameters in client_parameters_list:
+ for i, param in enumerate(client_parameters.values()):
+ sum_parameters[i] += param
+
+ averaged_parameters = [param_sum / len(client_parameters_list) for param_sum in sum_parameters]
+ return averaged_parameters
+
+# ... (其他代码保持不变)
+
+# 设置全局epoch数
+global_epoch = 20
+# 将数据转换为PyTorch张量
+# 将数据转换为PyTorch张量并指定数据类型为float32
+x_train_list = [torch.tensor(x_train1, dtype=torch.float32),
+ torch.tensor(x_train2, dtype=torch.float32),
+ torch.tensor(x_train3, dtype=torch.float32),
+ torch.tensor(x_train4, dtype=torch.float32)]
+y_train_list = [torch.tensor(y_train1, dtype=torch.float32),
+ torch.tensor(y_train2, dtype=torch.float32),
+ torch.tensor(y_train3, dtype=torch.float32),
+ torch.tensor(y_train4, dtype=torch.float32)]
+
+# 进行联邦学习的全局epoch循环
+for global_epoch in range(global_epoch):
+ updated_client_parameters_list = []
+
+ # 客户端训练并更新模型参数
+ for client_model, x_train, y_train in zip(client_models, x_train_list, y_train_list):
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+
+ # 进行本地模型的训练
+ for local_epoch in range(config.epochs):
+ optimizer.zero_grad()
+ output = client_model(x_train) # 不再解包
+ loss = criterion(output, y_train)
+ loss.backward()
+ optimizer.step()
+
+ # 获取更新后的客户端模型参数
+ updated_client_parameters_list.append(client_model.state_dict())
+
+ # 全局参数平均化
+ averaged_parameters = global_average_parameters(updated_client_parameters_list)
+
+ # 更新全局模型的参数为平均后的参数
+ with torch.no_grad():
+ for global_param, averaged_param in zip(global_model.parameters(), averaged_parameters):
+ global_param.copy_(averaged_param)
+
+
+# 保存每个客户端模型
+for i, client_model in enumerate(client_models):
+ model_path = f"client_model_{i+1}.pth"
+ torch.save(client_model.state_dict(), model_path)
+
diff --git a/subject2-pre/all-process/fedavg/fed_tvhl.py b/subject2-pre/all-process/fedavg/fed_tvhl.py
new file mode 100644
index 0000000000000000000000000000000000000000..de7ca27ca2004fce462f2abe2173582152188634
--- /dev/null
+++ b/subject2-pre/all-process/fedavg/fed_tvhl.py
@@ -0,0 +1,225 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import MinMaxScaler
+import torch.optim as optim # 添加导入torch.optim模块
+
+
+# 定义配置类
+class Config():
+ timestep = 4
+ batch_size = 32
+ feature_size = 1
+ hidden_size = 256
+ output_size = 1
+ num_layers = 2
+ epochs = 10
+ learning_rate = 0.0001
+ model_name = 'gru'
+ save_path = './{}.pth'.format(model_name)
+
+config = Config()
+
+# 定义GRU模型
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size
+ self.num_layers = num_layers
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ output, h_0 = self.gru(x, h_0)
+
+ batch_size, timestep, hidden_size = output.shape
+
+ output = output.reshape(-1, hidden_size)
+
+ output = self.fc(output)
+
+ output = output.reshape(batch_size, timestep, -1)
+
+ return output[:, -1, :]
+
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = []
+ dataY = []
+
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0])
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+data1 = pd.read_csv(r'F:\ourfl\genedate\TVHL\tvhl_alibaba.csv')
+data2 = pd.read_csv(r'F:\ourfl\genedate\TVHL\tvhl_google.csv')
+data3 = pd.read_csv(r'F:\ourfl\genedate\TVHL\tvhl_mircosoft.csv')
+data4 = pd.read_csv(r'F:\ourfl\genedate\TVHL\tvhl_sugan1.csv')
+# 提取avg_mem列
+df1 = data1[['mem_use']]
+df2 = data2[['mem_use']]
+df3 = data3[['mem_use']]
+df4 = data4[['mem_use']]
+# 标准化数据
+scaler = MinMaxScaler(feature_range=(0, 1))
+data1 = scaler.fit_transform(df1.values)
+data2 = scaler.fit_transform(df2.values)
+data3 = scaler.fit_transform(df3.values)
+data4 = scaler.fit_transform(df4.values)
+
+# 定义时间步数和特征数
+timestep = 4 # 您可以根据实际情况调整时间步数
+feature_size = 1 # 由于您已经提取了"mem_use"列作为特征,所以特征数为1
+
+# 划分数据集
+x_train1, y_train1, x_test1, y_test1 = split_data(data1, timestep, feature_size)
+x_train2, y_train2, x_test2, y_test2 = split_data(data2, timestep, feature_size)
+x_train3, y_train3, x_test3, y_test3 = split_data(data3, timestep, feature_size)
+x_train4, y_train4, x_test4, y_test4 = split_data(data4, timestep, feature_size)
+
+
+class ClientGRU(nn.Module):
+ def __init__(self, config):
+ super(ClientGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers, batch_first=True)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output[:, -1, :]) # 调整输出维度
+ return output
+
+
+
+ #定义全局GRU模型
+class GlobalGRU(nn.Module):
+ def __init__(self, config):
+ super(GlobalGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ # 自定义GlobalGRU类的前向传播过程,使用GRU和Linear层
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output)
+ return output, hidden
+
+
+
+
+
+config = Config()
+global_model = GlobalGRU(config)
+client_models = [ClientGRU(config) for _ in range(4)]
+
+
+
+# 定义客户端训练函数
+def client_train(client_model, data, global_model_parameters, config):
+ # 将数据转换为PyTorch张量
+ x_train, y_train, _, _ = data
+ x_train = torch.tensor(x_train, dtype=torch.float32)
+ y_train = torch.tensor(y_train, dtype=torch.float32)
+ # 设置优化器和损失函数
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+ # 设置初始隐藏状态(可根据需要调整)
+ hidden = None
+ # 将全局模型参数加载到客户端模型中
+ client_model.load_state_dict(global_model_parameters)
+ # 进行本地训练
+ for epoch in range(config.epochs):
+ optimizer.zero_grad()
+ # 前向传播
+ output, hidden = client_model(x_train, hidden)
+ loss = criterion(output, y_train)
+ # 反向传播和参数更新
+ loss.backward()
+ optimizer.step()
+ # 返回更新后的客户端模型参数
+ return client_model.state_dict()
+
+
+
+# 定义全局模型参数平均函数
+def global_average_parameters(client_parameters_list):
+ num_parameters = len(list(client_parameters_list[0].values()))
+ sum_parameters = [torch.zeros_like(param) for param in list(client_parameters_list[0].values())]
+
+ for client_parameters in client_parameters_list:
+ for i, param in enumerate(client_parameters.values()):
+ sum_parameters[i] += param
+
+ averaged_parameters = [param_sum / len(client_parameters_list) for param_sum in sum_parameters]
+ return averaged_parameters
+
+# ... (其他代码保持不变)
+
+# 设置全局epoch数
+global_epoch = 10
+# 将数据转换为PyTorch张量
+# 将数据转换为PyTorch张量并指定数据类型为float32
+x_train_list = [torch.tensor(x_train1, dtype=torch.float32),
+ torch.tensor(x_train2, dtype=torch.float32),
+ torch.tensor(x_train3, dtype=torch.float32),
+ torch.tensor(x_train4, dtype=torch.float32)]
+y_train_list = [torch.tensor(y_train1, dtype=torch.float32),
+ torch.tensor(y_train2, dtype=torch.float32),
+ torch.tensor(y_train3, dtype=torch.float32),
+ torch.tensor(y_train4, dtype=torch.float32)]
+
+# 进行联邦学习的全局epoch循环
+for global_epoch in range(global_epoch):
+ updated_client_parameters_list = []
+ # 客户端训练并更新模型参数
+ for client_model, x_train, y_train in zip(client_models, x_train_list, y_train_list):
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+
+ # 进行本地模型的训练
+ for local_epoch in range(config.epochs):
+ optimizer.zero_grad()
+ output = client_model(x_train) # 不再解包
+ loss = criterion(output, y_train)
+ loss.backward()
+ optimizer.step()
+
+ # 获取更新后的客户端模型参数
+ updated_client_parameters_list.append(client_model.state_dict())
+
+ # 全局参数平均化
+ averaged_parameters = global_average_parameters(updated_client_parameters_list)
+
+ # 更新全局模型的参数为平均后的参数
+ with torch.no_grad():
+ for global_param, averaged_param in zip(global_model.parameters(), averaged_parameters):
+ global_param.copy_(averaged_param)
+
+
+# 保存每个客户端模型
+for i, client_model in enumerate(client_models):
+ model_path = f"tvhl_client_model_{i+1}.pth"
+ torch.save(client_model.state_dict(), model_path)
+
diff --git a/subject2-pre/all-process/fedavg/fed_vhl.py b/subject2-pre/all-process/fedavg/fed_vhl.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b153f6215a5fae59686e6cbbbc837a387dc63e4
--- /dev/null
+++ b/subject2-pre/all-process/fedavg/fed_vhl.py
@@ -0,0 +1,227 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import MinMaxScaler
+import torch.optim as optim # 添加导入torch.optim模块
+
+
+# 定义配置类
+class Config():
+ timestep = 1
+ batch_size = 32
+ feature_size = 1
+ hidden_size = 256
+ output_size = 1
+ num_layers = 2
+ epochs = 10
+ learning_rate = 0.0001
+ model_name = 'gru'
+ save_path = './{}.pth'.format(model_name)
+
+config = Config()
+
+# 定义GRU模型
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size
+ self.num_layers = num_layers
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ output, h_0 = self.gru(x, h_0)
+
+ batch_size, timestep, hidden_size = output.shape
+
+ output = output.reshape(-1, hidden_size)
+
+ output = self.fc(output)
+
+ output = output.reshape(batch_size, timestep, -1)
+
+ return output[:, -1, :]
+
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = []
+ dataY = []
+
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0])
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+
+data1 = pd.read_csv(r'F:\ourfl\genedate\VHL\vhl_alibaba.csv')
+data2 = pd.read_csv(r'F:\ourfl\genedate\VHL\vhl_google.csv')
+data3 = pd.read_csv(r'F:\ourfl\genedate\VHL\vhl_mircosoft.csv')
+data4 = pd.read_csv(r'F:\ourfl\genedate\VHL\vhl_sugan1.csv')
+
+# 提取avg_mem列
+df1 = data1[['mem_use']]
+df2 = data2[['mem_use']]
+df3 = data3[['mem_use']]
+df4 = data4[['mem_use']]
+# 标准化数据
+scaler = MinMaxScaler(feature_range=(0, 1))
+data1 = scaler.fit_transform(df1.values)
+data2 = scaler.fit_transform(df2.values)
+data3 = scaler.fit_transform(df3.values)
+data4 = scaler.fit_transform(df4.values)
+
+# 定义时间步数和特征数
+timestep = 1 # 您可以根据实际情况调整时间步数
+feature_size = 1 # 由于您已经提取了"mem_use"列作为特征,所以特征数为1
+
+# 划分数据集
+x_train1, y_train1, x_test1, y_test1 = split_data(data1, timestep, feature_size)
+x_train2, y_train2, x_test2, y_test2 = split_data(data2, timestep, feature_size)
+x_train3, y_train3, x_test3, y_test3 = split_data(data3, timestep, feature_size)
+x_train4, y_train4, x_test4, y_test4 = split_data(data4, timestep, feature_size)
+
+
+class ClientGRU(nn.Module):
+ def __init__(self, config):
+ super(ClientGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers, batch_first=True)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output[:, -1, :]) # 调整输出维度
+ return output
+
+
+
+ #定义全局GRU模型
+class GlobalGRU(nn.Module):
+ def __init__(self, config):
+ super(GlobalGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ # 自定义GlobalGRU类的前向传播过程,使用GRU和Linear层
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output)
+ return output, hidden
+
+
+
+
+
+config = Config()
+global_model = GlobalGRU(config)
+client_models = [ClientGRU(config) for _ in range(4)]
+
+
+
+# 定义客户端训练函数
+def client_train(client_model, data, global_model_parameters, config):
+ # 将数据转换为PyTorch张量
+ x_train, y_train, _, _ = data
+ x_train = torch.tensor(x_train, dtype=torch.float32)
+ y_train = torch.tensor(y_train, dtype=torch.float32)
+ # 设置优化器和损失函数
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+ # 设置初始隐藏状态(可根据需要调整)
+ hidden = None
+ # 将全局模型参数加载到客户端模型中
+ client_model.load_state_dict(global_model_parameters)
+ # 进行本地训练
+ for epoch in range(config.epochs):
+ optimizer.zero_grad()
+ # 前向传播
+ output, hidden = client_model(x_train, hidden)
+ loss = criterion(output, y_train)
+ # 反向传播和参数更新
+ loss.backward()
+ optimizer.step()
+ # 返回更新后的客户端模型参数
+ return client_model.state_dict()
+
+
+
+# 定义全局模型参数平均函数
+def global_average_parameters(client_parameters_list):
+ num_parameters = len(list(client_parameters_list[0].values()))
+ sum_parameters = [torch.zeros_like(param) for param in list(client_parameters_list[0].values())]
+
+ for client_parameters in client_parameters_list:
+ for i, param in enumerate(client_parameters.values()):
+ sum_parameters[i] += param
+
+ averaged_parameters = [param_sum / len(client_parameters_list) for param_sum in sum_parameters]
+ return averaged_parameters
+
+# ... (其他代码保持不变)
+
+# 设置全局epoch数
+global_epoch = 10
+# 将数据转换为PyTorch张量
+# 将数据转换为PyTorch张量并指定数据类型为float32
+x_train_list = [torch.tensor(x_train1, dtype=torch.float32),
+ torch.tensor(x_train2, dtype=torch.float32),
+ torch.tensor(x_train3, dtype=torch.float32),
+ torch.tensor(x_train4, dtype=torch.float32)]
+y_train_list = [torch.tensor(y_train1, dtype=torch.float32),
+ torch.tensor(y_train2, dtype=torch.float32),
+ torch.tensor(y_train3, dtype=torch.float32),
+ torch.tensor(y_train4, dtype=torch.float32)]
+
+# 进行联邦学习的全局epoch循环
+for global_epoch in range(global_epoch):
+ updated_client_parameters_list = []
+ # 客户端训练并更新模型参数
+ for client_model, x_train, y_train in zip(client_models, x_train_list, y_train_list):
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+
+ # 进行本地模型的训练
+ for local_epoch in range(config.epochs):
+ optimizer.zero_grad()
+ output = client_model(x_train)
+ loss = criterion(output, y_train)
+ loss.backward()
+ optimizer.step()
+
+ # 获取更新后的客户端模型参数
+ updated_client_parameters_list.append(client_model.state_dict())
+
+ # 全局参数平均化
+ averaged_parameters = global_average_parameters(updated_client_parameters_list)
+
+ # 更新全局模型的参数为平均后的参数
+ with torch.no_grad():
+ for global_param, averaged_param in zip(global_model.parameters(), averaged_parameters):
+ global_param.copy_(averaged_param)
+
+
+# 保存每个客户端模型
+for i, client_model in enumerate(client_models):
+ model_path = f"vhl_client_model_{i+1}.pth"
+ torch.save(client_model.state_dict(), model_path)
+
diff --git a/subject2-pre/all-process/fedavg/fedavg.py b/subject2-pre/all-process/fedavg/fedavg.py
new file mode 100644
index 0000000000000000000000000000000000000000..e43a00bd32ce813deda78bbfaa6939493e486448
--- /dev/null
+++ b/subject2-pre/all-process/fedavg/fedavg.py
@@ -0,0 +1,224 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import MinMaxScaler
+import torch.optim as optim # 添加导入torch.optim模块
+
+
+# 定义配置类
+class Config():
+ timestep = 1
+ batch_size = 32
+ feature_size = 1
+ hidden_size = 256
+ output_size = 1
+ num_layers = 2
+ epochs = 20
+ learning_rate = 0.0001
+ model_name = 'gru'
+ save_path = './{}.pth'.format(model_name)
+
+config = Config()
+
+# 定义GRU模型
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size
+ self.num_layers = num_layers
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ output, h_0 = self.gru(x, h_0)
+
+ batch_size, timestep, hidden_size = output.shape
+
+ output = output.reshape(-1, hidden_size)
+
+ output = self.fc(output)
+
+ output = output.reshape(batch_size, timestep, -1)
+
+ return output[:, -1, :]
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = []
+ dataY = []
+
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0])
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+data1 = pd.read_csv(r"F:\ourfl\data\processed_alibaba.csv")
+
+data2 = pd.read_csv(r"F:\ourfl\data\pre_google.csv")
+data3 = pd.read_csv(r"F:\ourfl\data\pre_microsoft.csv")
+data4 = pd.read_csv(r"F:\ourfl\data\processed_hefei.csv")
+# 提取avg_mem列
+df1 = data1[['mem_use']]
+df2 = data2[['mem_use']]
+df3 = data3[['mem_use']]
+df4 = data4[['mem_use']]
+# 标准化数据
+scaler = MinMaxScaler(feature_range=(0, 1))
+data1 = scaler.fit_transform(df1.values)
+data2 = scaler.fit_transform(df2.values)
+data3 = scaler.fit_transform(df3.values)
+data4 = scaler.fit_transform(df4.values)
+
+# 定义时间步数和特征数
+timestep = 1 # 您可以根据实际情况调整时间步数
+feature_size = 1 # 由于您已经提取了"mem_use"列作为特征,所以特征数为1
+
+# 划分数据集
+x_train1, y_train1, x_test1, y_test1 = split_data(data1, timestep, feature_size)
+x_train2, y_train2, x_test2, y_test2 = split_data(data2, timestep, feature_size)
+x_train3, y_train3, x_test3, y_test3 = split_data(data3, timestep, feature_size)
+x_train4, y_train4, x_test4, y_test4 = split_data(data4, timestep, feature_size)
+
+
+class ClientGRU(nn.Module):
+ def __init__(self, config):
+ super(ClientGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers, batch_first=True)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output[:, -1, :]) # 调整输出维度
+ return output
+
+
+
+ #定义全局GRU模型
+class GlobalGRU(nn.Module):
+ def __init__(self, config):
+ super(GlobalGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ # 自定义GlobalGRU类的前向传播过程,使用GRU和Linear层
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output)
+ return output, hidden
+
+
+
+
+config = Config()
+global_model = GlobalGRU(config)
+client_models = [ClientGRU(config) for _ in range(4)]
+
+
+
+# 定义客户端训练函数
+def client_train(client_model, data, global_model_parameters, config):
+ # 将数据转换为PyTorch张量
+ x_train, y_train, _, _ = data
+ x_train = torch.tensor(x_train, dtype=torch.float32)
+ y_train = torch.tensor(y_train, dtype=torch.float32)
+ # 设置优化器和损失函数
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+ # 设置初始隐藏状态(可根据需要调整)
+ hidden = None
+ # 将全局模型参数加载到客户端模型中
+ client_model.load_state_dict(global_model_parameters)
+ # 进行本地训练
+ for epoch in range(config.epochs):
+ optimizer.zero_grad()
+ # 前向传播
+ output, hidden = client_model(x_train, hidden)
+ loss = criterion(output, y_train)
+ # 反向传播和参数更新
+ loss.backward()
+ optimizer.step()
+ # 返回更新后的客户端模型参数
+ return client_model.state_dict()
+
+
+
+# 定义全局模型参数平均函数
+def global_average_parameters(client_parameters_list):
+ num_parameters = len(list(client_parameters_list[0].values()))
+ sum_parameters = [torch.zeros_like(param) for param in list(client_parameters_list[0].values())]
+
+ for client_parameters in client_parameters_list:
+ for i, param in enumerate(client_parameters.values()):
+ sum_parameters[i] += param
+
+ averaged_parameters = [param_sum / len(client_parameters_list) for param_sum in sum_parameters]
+ return averaged_parameters
+
+# ... (其他代码保持不变)
+
+# 设置全局epoch数
+global_epoch = 30
+# 将数据转换为PyTorch张量
+# 将数据转换为PyTorch张量并指定数据类型为float32
+x_train_list = [torch.tensor(x_train1, dtype=torch.float32),
+ torch.tensor(x_train2, dtype=torch.float32),
+ torch.tensor(x_train3, dtype=torch.float32),
+ torch.tensor(x_train4, dtype=torch.float32)]
+y_train_list = [torch.tensor(y_train1, dtype=torch.float32),
+ torch.tensor(y_train2, dtype=torch.float32),
+ torch.tensor(y_train3, dtype=torch.float32),
+ torch.tensor(y_train4, dtype=torch.float32)]
+
+# 进行联邦学习的全局epoch循环
+for global_epoch in range(global_epoch):
+ updated_client_parameters_list = []
+ # 客户端训练并更新模型参数
+ for client_model, x_train, y_train in zip(client_models, x_train_list, y_train_list):
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+
+ # 进行本地模型的训练
+ for local_epoch in range(config.epochs):
+ optimizer.zero_grad()
+ output = client_model(x_train) # 不再解包
+ loss = criterion(output, y_train)
+ loss.backward()
+ optimizer.step()
+
+ # 获取更新后的客户端模型参数
+ updated_client_parameters_list.append(client_model.state_dict())
+
+ # 全局参数平均化
+ averaged_parameters = global_average_parameters(updated_client_parameters_list)
+
+ # 更新全局模型的参数为平均后的参数
+ with torch.no_grad():
+ for global_param, averaged_param in zip(global_model.parameters(), averaged_parameters):
+ global_param.copy_(averaged_param)
+
+
+# 保存每个客户端模型
+for i, client_model in enumerate(client_models):
+ model_path = f"client_model_{i+1}.pth"
+ torch.save(client_model.state_dict(), model_path)
+
diff --git a/subject2-pre/all-process/generate_data/GAN/gan_alibaba.py b/subject2-pre/all-process/generate_data/GAN/gan_alibaba.py
new file mode 100644
index 0000000000000000000000000000000000000000..f11342c7fe5e34dc8468ec4593a90fc04ff7e2a2
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/GAN/gan_alibaba.py
@@ -0,0 +1,119 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+
+
+data = pd.read_csv(r'F:\ourfl\data\processed_alibaba.csv')
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+data['time'] = data['time'] % 24
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features].values
+
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+
+def train_gan(noise_data, real_data, epochs=3000, batch_size=32):
+ for epoch in range(epochs):
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+ # 创建标签(真实数据样本为1,合成数据样本为0)
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ # 训练判别器
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ # 训练生成器
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+noise = np.random.normal(0, 1, size=(num_samples, 7))
+# 使用 data1 作为噪声数据集,data2 作为训练数据集训练 GAN 模型
+train_gan(noise, data2_scaled, epochs=3000, batch_size=32)
+
+num_samples = 20000
+num_rounds = 30
+num_hours_per_round = 24
+
+# 生成虚拟数据
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\GAN\gan_alibaba.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/GAN/gan_google.py b/subject2-pre/all-process/generate_data/GAN/gan_google.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbf52078f1f101496f93f63e4420bc726915c0d0
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/GAN/gan_google.py
@@ -0,0 +1,118 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+
+
+data = pd.read_csv(r'F:\ourfl\data\pre_google.csv')
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+data['time'] = data['time'] % 24
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features].values
+
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+
+def train_gan(noise_data, real_data, epochs=3000, batch_size=32):
+ for epoch in range(epochs):
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+ # 创建标签(真实数据样本为1,合成数据样本为0)
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ # 训练判别器
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ # 训练生成器
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+noise = np.random.normal(0, 1, size=(num_samples, 7))
+# 使用 data1 作为噪声数据集,data2 作为训练数据集训练 GAN 模型
+train_gan(noise, data2_scaled, epochs=3000, batch_size=32)
+
+num_samples = 20000
+num_rounds = 30
+num_hours_per_round = 24
+
+# 生成虚拟数据
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\GAN\gan_google.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/GAN/gan_hpc.py b/subject2-pre/all-process/generate_data/GAN/gan_hpc.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cf3b33f7fc5a9dd5311d7ea186aec9a8b9dbbfd
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/GAN/gan_hpc.py
@@ -0,0 +1,119 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+
+
+data = pd.read_csv(r'F:\ourfl\data\processed_hefei.csv')
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+data['time'] = data['time'] % 24
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features].values
+
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+
+def train_gan(noise_data, real_data, epochs=3000, batch_size=32):
+ for epoch in range(epochs):
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+ # 创建标签(真实数据样本为1,合成数据样本为0)
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ # 训练判别器
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ # 训练生成器
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+noise = np.random.normal(0, 1, size=(num_samples, 7))
+# 使用 data1 作为噪声数据集,data2 作为训练数据集训练 GAN 模型
+train_gan(noise, data2_scaled, epochs=3000, batch_size=32)
+
+num_samples = 20000
+num_rounds = 30
+num_hours_per_round = 24
+
+# 生成虚拟数据
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\GAN\gan_sugan1.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/GAN/gan_mircosoft.py b/subject2-pre/all-process/generate_data/GAN/gan_mircosoft.py
new file mode 100644
index 0000000000000000000000000000000000000000..a771474f68b34e4b131060eb3f131747d1cb2c60
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/GAN/gan_mircosoft.py
@@ -0,0 +1,119 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+
+
+data = pd.read_csv(r'F:\ourfl\data\pre_microsoft.csv')
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+data['time'] = data['time'] % 24
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features].values
+
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+
+def train_gan(noise_data, real_data, epochs=3000, batch_size=32):
+ for epoch in range(epochs):
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+ # 创建标签(真实数据样本为1,合成数据样本为0)
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ # 训练判别器
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ # 训练生成器
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+noise = np.random.normal(0, 1, size=(num_samples, 7))
+# 使用 data1 作为噪声数据集,data2 作为训练数据集训练 GAN 模型
+train_gan(noise, data2_scaled, epochs=3000, batch_size=32)
+
+num_samples = 20000
+num_rounds = 30
+num_hours_per_round = 24
+
+# 生成虚拟数据
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\GAN\gan_mircosoft.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/TVHL/tvhl_alibaba.py b/subject2-pre/all-process/generate_data/TVHL/tvhl_alibaba.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5687054ff2badbf49b97c1b10b681c827e3ca4c
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/TVHL/tvhl_alibaba.py
@@ -0,0 +1,137 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+from scipy.spatial.distance import cdist
+
+
+
+data = pd.read_csv(r'F:\ourfl\data\processed_alibaba.csv')
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+
+
+data['time'] = data['time'] % 24
+
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features].values
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ # Extract the 'relative_time' column
+ real_relative_time = real_samples[:, 0]
+ fake_relative_time = fake_samples[:, 0]
+
+ # Remove the 'relative_time' column from the samples
+ real_samples = real_samples[:, 1:]
+ fake_samples = fake_samples[:, 1:]
+
+ # Calculate the Euclidean distance between samples without the 'relative_time' feature
+ distance =0.1* cdist(real_samples, fake_samples, metric='euclidean')
+
+ # Compute the joint distribution loss with 'relative_time' as a condition
+ conditional_distance = np.abs(real_relative_time - fake_relative_time)
+ joint_distribution_loss = np.mean(distance * conditional_distance)
+
+ return joint_distribution_loss
+
+#训练模型
+def train_gan(noise_data, real_data, epochs, batch_size):
+ for epoch in range(epochs):
+ noise = noise_data
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss + 0.15 * joint_distribution_loss
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+#设置高斯噪声(全局统一)
+noise = np.random.normal(0, 1, size=(10000, 7))
+#训练模型
+train_gan(data, noise, epochs=3000, batch_size=32)
+# 数据生成
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\TVHL\tvhl_alibaba.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/TVHL/tvhl_google.py b/subject2-pre/all-process/generate_data/TVHL/tvhl_google.py
new file mode 100644
index 0000000000000000000000000000000000000000..c12d4e8608321cfbb4938048e6ce602038e72984
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/TVHL/tvhl_google.py
@@ -0,0 +1,136 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+from scipy.spatial.distance import cdist
+
+
+
+data = pd.read_csv(r'F:\ourfl\data\pre_google.csv')
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+
+
+data['time'] = data['time'] % 24
+
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features].values
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ # Extract the 'relative_time' column
+ real_relative_time = real_samples[:, 0]
+ fake_relative_time = fake_samples[:, 0]
+
+ # Remove the 'relative_time' column from the samples
+ real_samples = real_samples[:, 1:]
+ fake_samples = fake_samples[:, 1:]
+
+ # Calculate the Euclidean distance between samples without the 'relative_time' feature
+ distance =0.1* cdist(real_samples, fake_samples, metric='euclidean')
+
+ # Compute the joint distribution loss with 'relative_time' as a condition
+ conditional_distance = np.abs(real_relative_time - fake_relative_time)
+ joint_distribution_loss = np.mean(distance * conditional_distance)
+
+ return joint_distribution_loss
+
+#训练模型
+def train_gan(noise_data, real_data, epochs, batch_size):
+ for epoch in range(epochs):
+ noise = noise_data
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss + 0.15 * joint_distribution_loss
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+#设置高斯噪声(全局统一)
+noise = np.random.normal(0, 1, size=(10000, 7))
+#训练模型
+train_gan(data, noise, epochs=3000, batch_size=32)
+# 数据生成
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+generated_data.to_csv(r'F:\ourfl\genedate\TVHL\tvhl_google.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/TVHL/tvhl_mircosoft.py b/subject2-pre/all-process/generate_data/TVHL/tvhl_mircosoft.py
new file mode 100644
index 0000000000000000000000000000000000000000..12d96bb5522d303b7c1d839d5b14f915c5bab2ca
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/TVHL/tvhl_mircosoft.py
@@ -0,0 +1,137 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+from scipy.spatial.distance import cdist
+
+
+
+data = pd.read_csv(r'F:\ourfl\data\pre_microsoft.csv')
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+
+
+data['time'] = data['time'] % 24
+
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features].values
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ # Extract the 'relative_time' column
+ real_relative_time = real_samples[:, 0]
+ fake_relative_time = fake_samples[:, 0]
+
+ # Remove the 'relative_time' column from the samples
+ real_samples = real_samples[:, 1:]
+ fake_samples = fake_samples[:, 1:]
+
+ # Calculate the Euclidean distance between samples without the 'relative_time' feature
+ distance =0.1* cdist(real_samples, fake_samples, metric='euclidean')
+
+ # Compute the joint distribution loss with 'relative_time' as a condition
+ conditional_distance = np.abs(real_relative_time - fake_relative_time)
+ joint_distribution_loss = np.mean(distance * conditional_distance)
+
+ return joint_distribution_loss
+
+#训练模型
+def train_gan(noise_data, real_data, epochs, batch_size):
+ for epoch in range(epochs):
+ noise = noise_data
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss + 0.15 * joint_distribution_loss
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+#设置高斯噪声(全局统一)
+noise = np.random.normal(0, 1, size=(10000, 7))
+#训练模型
+train_gan(data, noise, epochs=3000, batch_size=32)
+# 数据生成
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\VHL\vhl_microsoft.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/TVHL/tvhl_sugan.py b/subject2-pre/all-process/generate_data/TVHL/tvhl_sugan.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fda7eeb1921865e0f2b850b1feb234b71e080e3
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/TVHL/tvhl_sugan.py
@@ -0,0 +1,137 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+from scipy.spatial.distance import cdist
+
+
+
+data = pd.read_csv(r'F:\ourfl\data\processed_hefei.csv')
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+
+
+data['time'] = data['time'] % 24
+
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features].values
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ # Extract the 'relative_time' column
+ real_relative_time = real_samples[:, 0]
+ fake_relative_time = fake_samples[:, 0]
+
+ # Remove the 'relative_time' column from the samples
+ real_samples = real_samples[:, 1:]
+ fake_samples = fake_samples[:, 1:]
+
+ # Calculate the Euclidean distance between samples without the 'relative_time' feature
+ distance =0.1* cdist(real_samples, fake_samples, metric='euclidean')
+
+ # Compute the joint distribution loss with 'relative_time' as a condition
+ conditional_distance = np.abs(real_relative_time - fake_relative_time)
+ joint_distribution_loss = np.mean(distance * conditional_distance)
+
+ return joint_distribution_loss
+
+#训练模型
+def train_gan(noise_data, real_data, epochs, batch_size):
+ for epoch in range(epochs):
+ noise = noise_data
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss + 0.15 * joint_distribution_loss
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+#设置高斯噪声(全局统一)
+noise = np.random.normal(0, 1, size=(10000, 7))
+#训练模型
+train_gan(data, noise, epochs=3000, batch_size=32)
+# 数据生成
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\VHL\vhl_HPC.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/VHL/vhl_alibaba.py b/subject2-pre/all-process/generate_data/VHL/vhl_alibaba.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2ebbd2134fabe889662347eb384e4c9bca6f665
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/VHL/vhl_alibaba.py
@@ -0,0 +1,141 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+from scipy.spatial.distance import cdist
+
+
+
+data = pd.read_csv(r'F:\ourfl\data\processed_alibaba.csv')
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+
+
+data['time'] = data['time'] % 24
+
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data1[features].values
+
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ # 计算真实样本和生成样本之间的欧氏距离
+ distance = cdist(real_samples, fake_samples, metric='euclidean')
+ # 计算联合分布差异项
+ joint_distribution_loss = distance.mean()
+ return joint_distribution_loss
+
+def train_gan(noise_data, real_data, epochs=3000, batch_size=16):
+ for epoch in range(epochs):
+
+ noise = noise_data
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+
+
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+
+ # 创建标签(真实数据样本为1,合成数据样本为0)
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+
+ # 训练判别器
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+
+ # 训练生成器
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss + 0.1 * joint_distribution_loss
+
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+noise = np.random.normal(0, 1, size=(num_samples, 7))
+# 使用 data1 作为噪声数据集,data2 作为训练数据集训练 GAN 模型
+train_gan(data1_scaled, data2_scaled, epochs=3000, batch_size=32)
+
+num_samples = 20000
+num_rounds = 30
+num_hours_per_round = 24
+
+# 生成虚拟数据
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\VHL\vhl_alibaba.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/VHL/vhl_google.py b/subject2-pre/all-process/generate_data/VHL/vhl_google.py
new file mode 100644
index 0000000000000000000000000000000000000000..70f2259bce30b8ae66306405bfb4f604b453b408
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/VHL/vhl_google.py
@@ -0,0 +1,141 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+from scipy.spatial.distance import cdist
+
+
+
+data = pd.read_csv(r'F:\ourfl\data\pre_google.csv')
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+
+
+data['time'] = data['time'] % 24
+
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data1[features].values
+
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ # 计算真实样本和生成样本之间的欧氏距离
+ distance = cdist(real_samples, fake_samples, metric='euclidean')
+ # 计算联合分布差异项
+ joint_distribution_loss = distance.mean()
+ return joint_distribution_loss
+
+def train_gan(noise_data, real_data, epochs=3000, batch_size=16):
+ for epoch in range(epochs):
+
+ noise = noise_data
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+
+
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+
+ # 创建标签(真实数据样本为1,合成数据样本为0)
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+
+ # 训练判别器
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+
+ # 训练生成器
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss + 0.1 * joint_distribution_loss
+
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+noise = np.random.normal(0, 1, size=(num_samples, 7))
+# 使用 data1 作为噪声数据集,data2 作为训练数据集训练 GAN 模型
+train_gan(data1_scaled, data2_scaled, epochs=3000, batch_size=32)
+
+num_samples = 20000
+num_rounds = 30
+num_hours_per_round = 24
+
+# 生成虚拟数据
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+
+generated_data.to_csv(r'F:\ourfl\genedate\VHL\vhl_google.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/VHL/vhl_hpc.py b/subject2-pre/all-process/generate_data/VHL/vhl_hpc.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f057a4037d2d2d91a5385af2cbe84d2d36904bc
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/VHL/vhl_hpc.py
@@ -0,0 +1,141 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+from scipy.spatial.distance import cdist
+
+
+data2 = pd.read_csv(r'F:\ourfl\data\processed_hpc.csv')
+
+
+data = data[['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']]
+
+
+data['time'] = data['time'] % 24
+
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data1[features].values
+
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ # 计算真实样本和生成样本之间的欧氏距离
+ distance = cdist(real_samples, fake_samples, metric='euclidean')
+ # 计算联合分布差异项
+ joint_distribution_loss = distance.mean()
+ return joint_distribution_loss
+
+def train_gan(noise_data, real_data, epochs=3000, batch_size=16):
+ for epoch in range(epochs):
+
+ noise = noise_data
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+
+
+ generated_samples = generator.predict(noise)
+ X = np.concatenate((real_samples, generated_samples))
+
+ # 创建标签(真实数据样本为1,合成数据样本为0)
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+
+ # 训练判别器
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+
+ # 训练生成器
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss + 0.1 * joint_distribution_loss
+
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+noise = np.random.normal(0, 1, size=(num_samples, 7))
+# 使用 data1 作为噪声数据集,data2 作为训练数据集训练 GAN 模型
+train_gan(data1_scaled, data2_scaled, epochs=3000, batch_size=32)
+
+num_samples = 20000
+num_rounds = 30
+num_hours_per_round = 24
+
+# 生成虚拟数据
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+for i in range(1, 31):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data,empty_df.iloc[j*24+i:j*24+i+1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\VHL\vhl_hpc.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/generate_data/VHL/vhl_mircosoft.py b/subject2-pre/all-process/generate_data/VHL/vhl_mircosoft.py
new file mode 100644
index 0000000000000000000000000000000000000000..922209158a24ff730806b60f36a67f88b7cff4d1
--- /dev/null
+++ b/subject2-pre/all-process/generate_data/VHL/vhl_mircosoft.py
@@ -0,0 +1,152 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from tensorflow.keras.losses import MeanSquaredError
+from tensorflow.keras.metrics import RootMeanSquaredError
+from scipy.spatial.distance import cdist
+
+data1 = pd.read_csv(r'F:\ourfl\data\processed_alibaba.csv')
+data2 = pd.read_csv(r'F:\ourfl\data\pre_microsoft.csv')
+
+
+
+# 检查 data2 是否缺少特征,如果是,则用 data1 中相应特征的平均值来填充
+for column in data1.columns:
+ if column not in data2.columns:
+ data2[column] = data1[column].mean()
+
+# data2 = data2['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data1 = data1[features].values
+data2 = data2[features].values
+
+
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data1_scaled = scaler.fit_transform(data1)
+data2_scaled = scaler.fit_transform(data2)
+
+
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+
+generator = build_generator()
+
+
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+
+discriminator = build_discriminator()
+
+discriminator.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='mean_squared_error', optimizer='adam')
+
+
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ # 计算真实样本和生成样本之间的欧氏距离
+ distance = cdist(real_samples, fake_samples, metric='euclidean')
+ # 计算联合分布差异项
+ joint_distribution_loss = distance.mean()
+ return joint_distribution_loss
+
+
+def train_gan(noise_data, real_data, epochs=3000, batch_size=32):
+ for epoch in range(epochs):
+
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+
+
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+
+
+ generated_samples = generator.predict(noise)
+
+
+ X = np.concatenate((real_samples, generated_samples))
+
+ # 创建标签(真实数据样本为1,合成数据样本为0)
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+
+ # 训练判别器
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+
+ # 训练生成器
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss + 0.1 * joint_distribution_loss
+
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+
+
+num_samples = 20000
+num_rounds = 30
+num_hours_per_round = 24
+
+
+noise = np.random.normal(0, 1, size=(num_samples, 7))
+generated_data = generator.predict(noise)
+generated_data = pd.DataFrame(generated_data, columns=features)
+
+# 将时间恢复到24小时制
+generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+data_by_hour = {}
+# 将generated_data按时间分组,放入字典中对应的键中
+for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+
+empty_df = pd.DataFrame()
+# 遍历1到23小时,检查每个小时的数据是否为空
+for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+
+for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+# 复制empty_df的第一行数据,并赋值给time_series_data
+time_series_data = empty_df.iloc[:1].copy()
+for i in range(0, 30):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data, empty_df.iloc[i * 24 + j:i * 24 + j + 1].copy()])
+generated_data = time_series_data
+
+generated_data.to_csv(r'F:\ourfl\genedate\VHL\vhl_mircosoft.csv', index=False)
+
+
diff --git a/subject2-pre/all-process/ourfl-nokd/ourfl_gan.py b/subject2-pre/all-process/ourfl-nokd/ourfl_gan.py
new file mode 100644
index 0000000000000000000000000000000000000000..849332afdd759e6903657537b116bb4cf70c7506
--- /dev/null
+++ b/subject2-pre/all-process/ourfl-nokd/ourfl_gan.py
@@ -0,0 +1,237 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import MinMaxScaler
+import torch.optim as optim # 添加导入torch.optim模块
+
+
+class Config():
+ timestep = 1 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 4 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 2 # gru的层数
+ epochs = 20 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0005 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+config = Config()
+
+
+# 7.定义GRU网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size # 隐层大小
+ self.num_layers = num_layers # gru层数
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ # 初始化隐层状态
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ # GRU运算
+ output, h_0 = self.gru(x, h_0)
+
+ # 获取GRU输出的维度信息
+ batch_size, timestep, hidden_size = output.shape
+
+ # 将output变成 batch_size * timestep, hidden_dim
+ output = output.reshape(-1, hidden_size)
+
+ # 全连接层
+ output = self.fc(output)
+
+ # 转换维度,用于输出
+ output = output.reshape(batch_size, timestep, -1)
+
+ # 返回最后一个时间步的数据
+ return output[:, -1, :]
+
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = []
+ dataY = []
+
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0])
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+
+data1 = pd.read_csv(r"F:\ourfl\data\processed_alibaba.csv")
+data2 = pd.read_csv(r"F:\ourfl\data\pre_google.csv")
+data3 = pd.read_csv(r"F:\ourfl\data\pre_microsoft.csv")
+data4 = pd.read_csv(r"F:\ourfl\data\processed_hefei.csv")
+
+# 提取avg_mem列
+df1 = data1[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df2 = data2[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df3 = data3[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df4 = data4[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+
+# 标准化数据
+scaler = MinMaxScaler(feature_range=(0, 1))
+data1 = scaler.fit_transform(df1.values)
+data2 = scaler.fit_transform(df2.values)
+data3 = scaler.fit_transform(df3.values)
+data4 = scaler.fit_transform(df4.values)
+
+# 定义时间步数和特征数
+timestep = 1 # 您可以根据实际情况调整时间步数
+feature_size = 4 # 由于您已经提取了"mem_use"列作为特征,所以特征数为1
+
+# 划分数据集
+x_train1, y_train1, x_test1, y_test1 = split_data(data1, timestep, feature_size)
+x_train2, y_train2, x_test2, y_test2 = split_data(data2, timestep, feature_size)
+x_train3, y_train3, x_test3, y_test3 = split_data(data3, timestep, feature_size)
+x_train4, y_train4, x_test4, y_test4 = split_data(data4, timestep, feature_size)
+
+
+class ClientGRU(nn.Module):
+ def __init__(self, config):
+ super(ClientGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers, batch_first=True)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output[:, -1, :]) # 调整输出维度
+ return output
+
+
+
+ #定义全局GRU模型
+class GlobalGRU(nn.Module):
+ def __init__(self, config):
+ super(GlobalGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ # 自定义GlobalGRU类的前向传播过程,使用GRU和Linear层
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output)
+ return output, hidden
+
+
+
+
+# 使用您之前定义的Config类来配置全局GRU模型和客户端GRU模型
+config = Config()
+global_model = GlobalGRU(config)
+client_models = [ClientGRU(config) for _ in range(4)]
+
+
+
+# 定义客户端训练函数
+def client_train(client_model, data, global_model_parameters, config):
+ # 将数据转换为PyTorch张量
+ x_train, y_train, _, _ = data
+ x_train = torch.tensor(x_train, dtype=torch.float32)
+ y_train = torch.tensor(y_train, dtype=torch.float32)
+ # 设置优化器和损失函数
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+ # 设置初始隐藏状态(可根据需要调整)
+ hidden = None
+ # 将全局模型参数加载到客户端模型中
+ client_model.load_state_dict(global_model_parameters)
+ # 进行本地训练
+ for epoch in range(config.epochs):
+ optimizer.zero_grad()
+ # 前向传播
+ output, hidden = client_model(x_train, hidden)
+ loss = criterion(output, y_train)
+ # 反向传播和参数更新
+ loss.backward()
+ optimizer.step()
+ # 返回更新后的客户端模型参数
+ return client_model.state_dict()
+
+
+
+# 定义全局模型参数平均函数
+def global_average_parameters(client_parameters_list):
+ num_parameters = len(list(client_parameters_list[0].values()))
+ sum_parameters = [torch.zeros_like(param) for param in list(client_parameters_list[0].values())]
+
+ for client_parameters in client_parameters_list:
+ for i, param in enumerate(client_parameters.values()):
+ sum_parameters[i] += param
+
+ averaged_parameters = [param_sum / len(client_parameters_list) for param_sum in sum_parameters]
+ return averaged_parameters
+
+# ... (其他代码保持不变)
+
+# 设置全局epoch数
+global_epoch = 30
+# 将数据转换为PyTorch张量
+# 将数据转换为PyTorch张量并指定数据类型为float32
+x_train_list = [torch.tensor(x_train1, dtype=torch.float32),
+ torch.tensor(x_train2, dtype=torch.float32),
+ torch.tensor(x_train3, dtype=torch.float32),
+ torch.tensor(x_train4, dtype=torch.float32)]
+y_train_list = [torch.tensor(y_train1, dtype=torch.float32),
+ torch.tensor(y_train2, dtype=torch.float32),
+ torch.tensor(y_train3, dtype=torch.float32),
+ torch.tensor(y_train4, dtype=torch.float32)]
+
+# 进行联邦学习的全局epoch循环
+for global_epoch in range(global_epoch):
+ updated_client_parameters_list = []
+
+ # 客户端训练并更新模型参数
+ for client_model, x_train, y_train in zip(client_models, x_train_list, y_train_list):
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+
+ # 进行本地模型的训练
+ for local_epoch in range(config.epochs):
+ optimizer.zero_grad()
+ output = client_model(x_train) # 不再解包
+ loss = criterion(output, y_train)
+ loss.backward()
+ optimizer.step()
+
+ # 获取更新后的客户端模型参数
+ updated_client_parameters_list.append(client_model.state_dict())
+
+ # 全局参数平均化
+ averaged_parameters = global_average_parameters(updated_client_parameters_list)
+
+ # 更新全局模型的参数为平均后的参数
+ with torch.no_grad():
+ for global_param, averaged_param in zip(global_model.parameters(), averaged_parameters):
+ global_param.copy_(averaged_param)
+
+
+# 保存每个客户端模型
+for i, client_model in enumerate(client_models):
+ model_path = f"flclient_model_{i+1}.pth"
+ torch.save(client_model.state_dict(), model_path)
+
diff --git a/subject2-pre/all-process/ourfl-nokd/ourfl_tvhl.py b/subject2-pre/all-process/ourfl-nokd/ourfl_tvhl.py
new file mode 100644
index 0000000000000000000000000000000000000000..7781db241dc2a9842a7c569efd25a088e949f506
--- /dev/null
+++ b/subject2-pre/all-process/ourfl-nokd/ourfl_tvhl.py
@@ -0,0 +1,238 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import MinMaxScaler
+import torch.optim as optim # 添加导入torch.optim模块
+
+
+class Config():
+ data_path = './data/wind_dataset.csv'
+ timestep = 4 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 4 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 2 # gru的层数
+ epochs = 20 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0001 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+config = Config()
+
+
+# 7.定义GRU网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size # 隐层大小
+ self.num_layers = num_layers # gru层数
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ # 初始化隐层状态
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ # GRU运算
+ output, h_0 = self.gru(x, h_0)
+
+ # 获取GRU输出的维度信息
+ batch_size, timestep, hidden_size = output.shape
+
+ # 将output变成 batch_size * timestep, hidden_dim
+ output = output.reshape(-1, hidden_size)
+
+ # 全连接层
+ output = self.fc(output)
+
+ # 转换维度,用于输出
+ output = output.reshape(batch_size, timestep, -1)
+
+ # 返回最后一个时间步的数据
+ return output[:, -1, :]
+
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = []
+ dataY = []
+
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0])
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+
+data1 = pd.read_csv(r'F:\ourfl\genedate\TVHL\tvhl_alibaba.csv')
+data2 = pd.read_csv(r'F:\ourfl\genedate\TVHL\tvhl_google.csv')
+data3 = pd.read_csv(r'F:\ourfl\genedate\TVHL\tvhl_mircosoft.csv')
+data4 = pd.read_csv(r'F:\ourfl\genedate\TVHL\tvhl_sugan1.csv')
+
+# 提取avg_mem列
+df1 = data1[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df2 = data2[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df3 = data3[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df4 = data4[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+
+# 标准化数据
+scaler = MinMaxScaler(feature_range=(0, 1))
+data1 = scaler.fit_transform(df1.values)
+data2 = scaler.fit_transform(df2.values)
+data3 = scaler.fit_transform(df3.values)
+data4 = scaler.fit_transform(df4.values)
+
+# 定义时间步数和特征数
+timestep = 4 # 您可以根据实际情况调整时间步数
+feature_size = 4 # 由于您已经提取了"mem_use"列作为特征,所以特征数为1
+
+# 划分数据集
+x_train1, y_train1, x_test1, y_test1 = split_data(data1, timestep, feature_size)
+x_train2, y_train2, x_test2, y_test2 = split_data(data2, timestep, feature_size)
+x_train3, y_train3, x_test3, y_test3 = split_data(data3, timestep, feature_size)
+x_train4, y_train4, x_test4, y_test4 = split_data(data4, timestep, feature_size)
+
+
+class ClientGRU(nn.Module):
+ def __init__(self, config):
+ super(ClientGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers, batch_first=True)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output[:, -1, :]) # 调整输出维度
+ return output
+
+
+
+ #定义全局GRU模型
+class GlobalGRU(nn.Module):
+ def __init__(self, config):
+ super(GlobalGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ # 自定义GlobalGRU类的前向传播过程,使用GRU和Linear层
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output)
+ return output, hidden
+
+
+
+
+# 使用您之前定义的Config类来配置全局GRU模型和客户端GRU模型
+config = Config()
+global_model = GlobalGRU(config)
+client_models = [ClientGRU(config) for _ in range(4)]
+
+
+
+# 定义客户端训练函数
+def client_train(client_model, data, global_model_parameters, config):
+ # 将数据转换为PyTorch张量
+ x_train, y_train, _, _ = data
+ x_train = torch.tensor(x_train, dtype=torch.float32)
+ y_train = torch.tensor(y_train, dtype=torch.float32)
+ # 设置优化器和损失函数
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+ # 设置初始隐藏状态(可根据需要调整)
+ hidden = None
+ # 将全局模型参数加载到客户端模型中
+ client_model.load_state_dict(global_model_parameters)
+ # 进行本地训练
+ for epoch in range(config.epochs):
+ optimizer.zero_grad()
+ # 前向传播
+ output, hidden = client_model(x_train, hidden)
+ loss = criterion(output, y_train)
+ # 反向传播和参数更新
+ loss.backward()
+ optimizer.step()
+ # 返回更新后的客户端模型参数
+ return client_model.state_dict()
+
+
+
+# 定义全局模型参数平均函数
+def global_average_parameters(client_parameters_list):
+ num_parameters = len(list(client_parameters_list[0].values()))
+ sum_parameters = [torch.zeros_like(param) for param in list(client_parameters_list[0].values())]
+
+ for client_parameters in client_parameters_list:
+ for i, param in enumerate(client_parameters.values()):
+ sum_parameters[i] += param
+
+ averaged_parameters = [param_sum / len(client_parameters_list) for param_sum in sum_parameters]
+ return averaged_parameters
+
+# ... (其他代码保持不变)
+
+# 设置全局epoch数
+global_epoch = 30
+# 将数据转换为PyTorch张量
+# 将数据转换为PyTorch张量并指定数据类型为float32
+x_train_list = [torch.tensor(x_train1, dtype=torch.float32),
+ torch.tensor(x_train2, dtype=torch.float32),
+ torch.tensor(x_train3, dtype=torch.float32),
+ torch.tensor(x_train4, dtype=torch.float32)]
+y_train_list = [torch.tensor(y_train1, dtype=torch.float32),
+ torch.tensor(y_train2, dtype=torch.float32),
+ torch.tensor(y_train3, dtype=torch.float32),
+ torch.tensor(y_train4, dtype=torch.float32)]
+
+# 进行联邦学习的全局epoch循环
+for global_epoch in range(global_epoch):
+ updated_client_parameters_list = []
+
+ # 客户端训练并更新模型参数
+ for client_model, x_train, y_train in zip(client_models, x_train_list, y_train_list):
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+
+ # 进行本地模型的训练
+ for local_epoch in range(config.epochs):
+ optimizer.zero_grad()
+ output = client_model(x_train) # 不再解包
+ loss = criterion(output, y_train)
+ loss.backward()
+ optimizer.step()
+
+ # 获取更新后的客户端模型参数
+ updated_client_parameters_list.append(client_model.state_dict())
+
+ # 全局参数平均化
+ averaged_parameters = global_average_parameters(updated_client_parameters_list)
+
+ # 更新全局模型的参数为平均后的参数
+ with torch.no_grad():
+ for global_param, averaged_param in zip(global_model.parameters(), averaged_parameters):
+ global_param.copy_(averaged_param)
+
+
+# 保存每个客户端模型
+for i, client_model in enumerate(client_models):
+ model_path = f"fltvhlclient_model_{i+1}.pth"
+ torch.save(client_model.state_dict(), model_path)
+
diff --git a/subject2-pre/all-process/ourfl-nokd/ourfl_vhl.py b/subject2-pre/all-process/ourfl-nokd/ourfl_vhl.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b36f5fcbf0084ea9835d635ee8b3dce04e69f8a
--- /dev/null
+++ b/subject2-pre/all-process/ourfl-nokd/ourfl_vhl.py
@@ -0,0 +1,238 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import MinMaxScaler
+import torch.optim as optim # 添加导入torch.optim模块
+
+
+class Config():
+ data_path = './data/wind_dataset.csv'
+ timestep = 1 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 4 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 2 # gru的层数
+ epochs = 10 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0003 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+config = Config()
+
+
+# 7.定义GRU网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size # 隐层大小
+ self.num_layers = num_layers # gru层数
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ # 初始化隐层状态
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ # GRU运算
+ output, h_0 = self.gru(x, h_0)
+
+ # 获取GRU输出的维度信息
+ batch_size, timestep, hidden_size = output.shape
+
+ # 将output变成 batch_size * timestep, hidden_dim
+ output = output.reshape(-1, hidden_size)
+
+ # 全连接层
+ output = self.fc(output)
+
+ # 转换维度,用于输出
+ output = output.reshape(batch_size, timestep, -1)
+
+ # 返回最后一个时间步的数据
+ return output[:, -1, :]
+
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = []
+ dataY = []
+
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0])
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+
+data1 = pd.read_csv(r'F:\ourfl\genedate\VHL\vhl_alibaba.csv')
+data2 = pd.read_csv(r'F:\ourfl\genedate\VHL\vhl_google.csv')
+data3 = pd.read_csv(r'F:\ourfl\genedate\VHL\vhl_mircosoft.csv')
+data4 = pd.read_csv(r'F:\ourfl\genedate\VHL\vhl_sugan1.csv')
+
+# 提取avg_mem列
+df1 = data1[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df2 = data2[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df3 = data3[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+df4 = data4[['mem_use','cpu_plan','gpu_plan','mem_plan']]
+
+# 标准化数据
+scaler = MinMaxScaler(feature_range=(0, 1))
+data1 = scaler.fit_transform(df1.values)
+data2 = scaler.fit_transform(df2.values)
+data3 = scaler.fit_transform(df3.values)
+data4 = scaler.fit_transform(df4.values)
+
+# 定义时间步数和特征数
+timestep = 1 # 您可以根据实际情况调整时间步数
+feature_size = 4 # 由于您已经提取了"mem_use"列作为特征,所以特征数为1
+
+# 划分数据集
+x_train1, y_train1, x_test1, y_test1 = split_data(data1, timestep, feature_size)
+x_train2, y_train2, x_test2, y_test2 = split_data(data2, timestep, feature_size)
+x_train3, y_train3, x_test3, y_test3 = split_data(data3, timestep, feature_size)
+x_train4, y_train4, x_test4, y_test4 = split_data(data4, timestep, feature_size)
+
+
+class ClientGRU(nn.Module):
+ def __init__(self, config):
+ super(ClientGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers, batch_first=True)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output[:, -1, :]) # 调整输出维度
+ return output
+
+
+
+ #定义全局GRU模型
+class GlobalGRU(nn.Module):
+ def __init__(self, config):
+ super(GlobalGRU, self).__init__()
+ self.gru = nn.GRU(config.feature_size, config.hidden_size, config.num_layers)
+ self.fc = nn.Linear(config.hidden_size, config.output_size)
+
+ def forward(self, x, hidden=None):
+ # 自定义GlobalGRU类的前向传播过程,使用GRU和Linear层
+ output, hidden = self.gru(x, hidden)
+ output = self.fc(output)
+ return output, hidden
+
+
+
+
+# 使用您之前定义的Config类来配置全局GRU模型和客户端GRU模型
+config = Config()
+global_model = GlobalGRU(config)
+client_models = [ClientGRU(config) for _ in range(4)]
+
+
+
+# 定义客户端训练函数
+def client_train(client_model, data, global_model_parameters, config):
+ # 将数据转换为PyTorch张量
+ x_train, y_train, _, _ = data
+ x_train = torch.tensor(x_train, dtype=torch.float32)
+ y_train = torch.tensor(y_train, dtype=torch.float32)
+ # 设置优化器和损失函数
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+ # 设置初始隐藏状态(可根据需要调整)
+ hidden = None
+ # 将全局模型参数加载到客户端模型中
+ client_model.load_state_dict(global_model_parameters)
+ # 进行本地训练
+ for epoch in range(config.epochs):
+ optimizer.zero_grad()
+ # 前向传播
+ output, hidden = client_model(x_train, hidden)
+ loss = criterion(output, y_train)
+ # 反向传播和参数更新
+ loss.backward()
+ optimizer.step()
+ # 返回更新后的客户端模型参数
+ return client_model.state_dict()
+
+
+
+# 定义全局模型参数平均函数
+def global_average_parameters(client_parameters_list):
+ num_parameters = len(list(client_parameters_list[0].values()))
+ sum_parameters = [torch.zeros_like(param) for param in list(client_parameters_list[0].values())]
+
+ for client_parameters in client_parameters_list:
+ for i, param in enumerate(client_parameters.values()):
+ sum_parameters[i] += param
+
+ averaged_parameters = [param_sum / len(client_parameters_list) for param_sum in sum_parameters]
+ return averaged_parameters
+
+# ... (其他代码保持不变)
+
+# 设置全局epoch数
+global_epoch = 50
+# 将数据转换为PyTorch张量
+# 将数据转换为PyTorch张量并指定数据类型为float32
+x_train_list = [torch.tensor(x_train1, dtype=torch.float32),
+ torch.tensor(x_train2, dtype=torch.float32),
+ torch.tensor(x_train3, dtype=torch.float32),
+ torch.tensor(x_train4, dtype=torch.float32)]
+y_train_list = [torch.tensor(y_train1, dtype=torch.float32),
+ torch.tensor(y_train2, dtype=torch.float32),
+ torch.tensor(y_train3, dtype=torch.float32),
+ torch.tensor(y_train4, dtype=torch.float32)]
+
+# 进行联邦学习的全局epoch循环
+for global_epoch in range(global_epoch):
+ updated_client_parameters_list = []
+
+ # 客户端训练并更新模型参数
+ for client_model, x_train, y_train in zip(client_models, x_train_list, y_train_list):
+ optimizer = optim.Adam(client_model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+
+ # 进行本地模型的训练
+ for local_epoch in range(config.epochs):
+ optimizer.zero_grad()
+ output = client_model(x_train) # 不再解包
+ loss = criterion(output, y_train)
+ loss.backward()
+ optimizer.step()
+
+ # 获取更新后的客户端模型参数
+ updated_client_parameters_list.append(client_model.state_dict())
+
+ # 全局参数平均化
+ averaged_parameters = global_average_parameters(updated_client_parameters_list)
+
+ # 更新全局模型的参数为平均后的参数
+ with torch.no_grad():
+ for global_param, averaged_param in zip(global_model.parameters(), averaged_parameters):
+ global_param.copy_(averaged_param)
+
+
+# 保存每个客户端模型
+for i, client_model in enumerate(client_models):
+ model_path = f"flvhlclient_model_{i+1}.pth"
+ torch.save(client_model.state_dict(), model_path)
+
diff --git a/subject2-pre/all-process/ourfl_kd/a1.py b/subject2-pre/all-process/ourfl_kd/a1.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5ddab0c7210cde31c9b474e424c3505b4fb31dc
--- /dev/null
+++ b/subject2-pre/all-process/ourfl_kd/a1.py
@@ -0,0 +1,300 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from torch.utils.data import TensorDataset
+from tqdm import tqdm
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+
+
+data1 = pd.read_csv(r'F:\ourfl\data\processed_alibaba.csv')
+df = data1[['cpu_use']]
+# 2.将数据进行标准化
+scaler = MinMaxScaler(feature_range=(0, 1))
+data = scaler.fit_transform(df.values)
+
+
+
+class Config():
+
+ timestep = 2 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 1 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 2 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0001 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+config = Config()
+
+#新模型
+class newConfig():
+
+ timestep = 2 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 1 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 512 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 2 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0001 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+newconfig = newConfig()
+
+# 形成训练数据
+def split_data1(data, timestep, feature_size):
+ dataX = [] # 保存X
+ dataY = [] # 保存Y
+
+ # 将整个窗口的数据保存到X中,将未来一天保存到Y中
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0]) # 目标列avg_mem为第5列,索引为4
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ return dataX, dataY
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = [] # 保存X
+ dataY = [] # 保存Y
+
+ # 将整个窗口的数据保存到X中,将未来一天保存到Y中
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0]) # 目标列avg_mem为第5列,索引为4
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ # 获取训练集大小
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ # 划分训练集、测试集
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+# 定义GRU网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size # 隐层大小
+ self.num_layers = num_layers # gru层数
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ # 初始化隐层状态
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ # GRU运算
+ output, h_0 = self.gru(x, h_0)
+
+ # 获取GRU输出的维度信息
+ batch_size, timestep, hidden_size = output.shape
+
+ # 将output变成 batch_size * timestep, hidden_dim
+ output = output.reshape(-1, hidden_size)
+
+ # 全连接层
+ output = self.fc(output)
+
+ # 转换维度,用于输出
+ output = output.reshape(batch_size, timestep, -1)
+
+ # 返回最后一个时间步的数据
+ return output[:, -1, :]
+
+
+# 获取训练数据
+x_train, y_train, x_test, y_test = split_data(data, config.timestep, config.feature_size)
+
+x_test1, y_test1 = split_data1(data, config.timestep, config.feature_size)
+
+# 4.将数据转为tensor
+x_train_tensor = torch.from_numpy(x_train).to(torch.float32)
+y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
+x_test_tensor = torch.from_numpy(x_test).to(torch.float32)
+y_test_tensor = torch.from_numpy(y_test).to(torch.float32)
+
+# 5.形成训练数据集
+train_data = TensorDataset(x_train_tensor, y_train_tensor)
+test_data = TensorDataset(x_test_tensor, y_test_tensor)
+
+# 6.将数据加载成迭代器
+train_loader = torch.utils.data.DataLoader(train_data,
+ config.batch_size,
+ shuffle=False)
+
+test_loader = torch.utils.data.DataLoader(test_data,
+ config.batch_size,
+ shuffle=False)
+
+# 假设您的模型类是GRU,config中包含相关的模型配置信息
+pre_trained_model = GRU(config.feature_size, config.hidden_size, config.num_layers, config.output_size)
+
+# 加载预训练模型的参数
+# model_path = r"F:\ourfl\fedavg\tvhl_client_model_1.pth"
+model_path = r"F:\ourfl\fedavg\client_model_1.pth"
+
+pre_trained_model.load_state_dict(torch.load(model_path))
+
+transfer_model = GRU(newconfig.feature_size, newconfig.hidden_size, newconfig.num_layers, newconfig.output_size) # 定义GRU网络
+loss_function = nn.MSELoss() # 定义损失函数
+optimizer_transfer = torch.optim.AdamW(transfer_model.parameters(), lr= newconfig.learning_rate) # 定义优化器
+
+# 定义生成随机矩阵的数量
+num_random_matrices = 30
+
+# 获取预训练模型和新模型的参数
+pre_trained_params = pre_trained_model.state_dict()
+transfer_params = transfer_model.state_dict() # 使用之前定义的模型 model,而不是新的 transfer_model
+
+# 计算预训练模型参数的 L2 范数
+pre_trained_l2_norm = torch.norm(torch.stack([torch.norm(param) for param in pre_trained_params.values()]))
+
+# 计算新模型参数的 L2 范数
+transfer_l2_norm = torch.norm(torch.stack([torch.norm(param) for param in transfer_params.values()]))
+
+# 初始化最小 L2 范数和对应的随机矩阵
+min_l2_norm = float('inf')
+min_l2_norm_random_matrix = None
+
+# 循环生成随机矩阵,并计算 L2 范数
+for i in range(num_random_matrices):
+ # 生成随机矩阵
+ random_matrix = torch.rand(pre_trained_l2_norm.size())
+
+ # 应用随机矩阵到预训练模型参数
+ for name, param in pre_trained_params.items():
+ if "gru" in name:
+ name = name.replace("gru.", "") # 移除模型名称前缀
+ if name in transfer_params:
+ transfer_params[name].copy_(param * random_matrix)
+
+ # 计算使用当前随机矩阵映射后的新模型参数的 L2 范数
+ transfer_l2_norm_after_mapping = torch.norm(torch.stack([torch.norm(param) for param in transfer_params.values()]))
+
+ # 更新最小 L2 范数和对应的随机矩阵
+ if transfer_l2_norm_after_mapping < min_l2_norm:
+ min_l2_norm = transfer_l2_norm_after_mapping
+ min_l2_norm_random_matrix = random_matrix
+
+# 将最优的随机矩阵应用于预训练模型参数
+for name, param in pre_trained_params.items():
+ if "gru" in name:
+ name = name.replace("gru.", "") # 移除模型名称前缀
+ if name in transfer_params:
+ transfer_params[name].copy_(param * min_l2_norm_random_matrix)
+
+
+num_transfer_epochs = 30 # 根据需要设置合适的训练轮数
+
+
+for param in pre_trained_model.parameters():
+ param.requires_grad = False
+
+
+for epoch in range(num_transfer_epochs):
+ transfer_model.train()
+ running_loss = 0
+ train_bar = tqdm(train_loader) # 使用之前定义的数据加载器 train_loader
+ for data in train_bar:
+ x_train, y_train = data
+ optimizer_transfer.zero_grad()
+ y_train_pred = transfer_model(x_train)
+ y_pretrained_pred = pre_trained_model(x_train)
+ loss = loss_function(y_train_pred, y_train) + loss_function(y_train_pred, y_pretrained_pred)
+ loss.backward()
+ optimizer_transfer.step()
+
+ running_loss += loss.item()
+ train_bar.set_description("Transfer Learning epoch[{}/{}] loss:{:.3f}".format(epoch + 1, num_transfer_epochs, loss.item()))
+
+
+
+transfer_model_save_path = './transfer_model.pth'
+torch.save(transfer_model.state_dict(), transfer_model_save_path)
+
+print('Finished Transfer Learning')
+
+transfer_model.eval()
+
+# Combine the training and test data to use the entire dataset
+x_combined = np.concatenate([x_train, x_test], axis=0)
+y_combined = np.concatenate([y_train, y_test], axis=0)
+
+# Convert the combined data to tensors
+x_combined_tensor = torch.from_numpy(x_combined).to(torch.float32)
+y_combined_tensor = torch.from_numpy(y_combined).to(torch.float32)
+
+# Create a new DataLoader for the combined data
+combined_data = TensorDataset(x_combined_tensor, y_combined_tensor)
+combined_loader = torch.utils.data.DataLoader(combined_data, batch_size=config.batch_size, shuffle=False)
+
+with torch.no_grad():
+ y_true = [] # Ground truth labels
+ y_pred = [] # Predicted labels
+ for data in test_loader: # Using the test data loader
+ x_test1, y_test1 = data
+ predictions = transfer_model(x_test1)
+ y_true.extend(y_test1.numpy().flatten())
+ y_pred.extend(predictions.numpy().flatten())
+
+# Convert the lists to numpy arrays for easier calculations
+y_true = np.array(y_true)
+y_pred = np.array(y_pred)
+
+# Calculate evaluation metrics
+mse = mean_squared_error(y_true, y_pred)
+rmse = np.sqrt(mse)
+mae = mean_absolute_error(y_true, y_pred)
+
+# Calculate MAPE and SMAPE with checks for division by zero
+mape_denominator = np.maximum(np.abs(y_true), 1e-8) # Avoid division by zero
+mape = np.mean(np.abs((y_true - y_pred) / mape_denominator)) * 100
+
+smape_denominator = np.maximum(np.abs(y_true) + np.abs(y_pred), 1e-8) # Avoid division by zero
+smape = 2.0 * np.mean(np.abs(y_pred - y_true) / smape_denominator) * 100
+
+
+print("Mean Squared Error (MSE):", mse)
+print("Root Mean Squared Error (RMSE):", rmse)
+print("Mean Absolute Error (MAE):", mae)
+print("Mean Absolute Percentage Error (MAPE):", mape)
+print("Symmetric Mean Absolute Percentage Error (SMAPE):", smape)
+
+# Save predictions and metrics to CSV
+result_df = pd.DataFrame({'Truth': y_true, 'Predicted': y_pred})
+result_df.to_csv('transfer_learning_predictions.csv', index=False)
+
+metrics_df = pd.DataFrame({
+ 'MSE': [mse],
+ 'RMSE': [rmse],
+ 'MAE': [mae],
+ 'MAPE': [mape],
+ 'SMAPE': [smape]
+})
+metrics_df.to_csv('transfer_learning_metrics.csv', index=False)
+
+print("Transfer learning predictions and metrics saved successfully!")
diff --git a/subject2-pre/all-process/ourfl_kd/g1.py b/subject2-pre/all-process/ourfl_kd/g1.py
new file mode 100644
index 0000000000000000000000000000000000000000..46aef3e49ff0f6e8d5f933ddc8c7fefc2b3b08a3
--- /dev/null
+++ b/subject2-pre/all-process/ourfl_kd/g1.py
@@ -0,0 +1,301 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from torch.utils.data import TensorDataset
+from tqdm import tqdm
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+
+
+data1 = pd.read_csv(r'F:\ourfl\data\pre_google.csv')
+df = data1[['cpu_use']]
+# 2.将数据进行标准化
+scaler = MinMaxScaler(feature_range=(0, 1))
+data = scaler.fit_transform(df.values)
+
+u = 0.5
+
+num_transfer_epochs = 30 # 根据需要设置合适的训练轮数
+
+
+class Config():
+
+ timestep = 2 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 1 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 2 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0005 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+config = Config()
+
+#新模型
+class newConfig():
+
+ timestep = 2 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 1 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 512 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 4 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0001 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+newconfig = newConfig()
+
+# 形成训练数据
+def split_data1(data, timestep, feature_size):
+ dataX = [] # 保存X
+ dataY = [] # 保存Y
+
+ # 将整个窗口的数据保存到X中,将未来一天保存到Y中
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0]) # 目标列avg_mem为第5列,索引为4
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ return dataX, dataY
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = [] # 保存X
+ dataY = [] # 保存Y
+
+ # 将整个窗口的数据保存到X中,将未来一天保存到Y中
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0]) # 目标列avg_mem为第5列,索引为4
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ # 获取训练集大小
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ # 划分训练集、测试集
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+# 定义GRU网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size # 隐层大小
+ self.num_layers = num_layers # gru层数
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ # 初始化隐层状态
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ # GRU运算
+ output, h_0 = self.gru(x, h_0)
+
+ # 获取GRU输出的维度信息
+ batch_size, timestep, hidden_size = output.shape
+
+ # 将output变成 batch_size * timestep, hidden_dim
+ output = output.reshape(-1, hidden_size)
+
+ # 全连接层
+ output = self.fc(output)
+
+ # 转换维度,用于输出
+ output = output.reshape(batch_size, timestep, -1)
+
+ # 返回最后一个时间步的数据
+ return output[:, -1, :]
+
+
+# 获取训练数据
+x_train, y_train, x_test, y_test = split_data(data, config.timestep, config.feature_size)
+
+x_test1, y_test1 = split_data1(data, config.timestep, config.feature_size)
+
+# 4.将数据转为tensor
+x_train_tensor = torch.from_numpy(x_train).to(torch.float32)
+y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
+x_test_tensor = torch.from_numpy(x_test).to(torch.float32)
+y_test_tensor = torch.from_numpy(y_test).to(torch.float32)
+
+# 5.形成训练数据集
+train_data = TensorDataset(x_train_tensor, y_train_tensor)
+test_data = TensorDataset(x_test_tensor, y_test_tensor)
+
+# 6.将数据加载成迭代器
+train_loader = torch.utils.data.DataLoader(train_data,
+ config.batch_size,
+ shuffle=False)
+
+test_loader = torch.utils.data.DataLoader(test_data,
+ config.batch_size,
+ shuffle=False)
+
+# 假设您的模型类是GRU,config中包含相关的模型配置信息
+pre_trained_model = GRU(config.feature_size, config.hidden_size, config.num_layers, config.output_size)
+
+# 加载预训练模型的参数
+# model_path = r"F:\ourfl\fedavg\tvhl_client_model_1.pth"
+model_path = r"F:\ourfl\fedavg\client_model_2.pth"
+
+pre_trained_model.load_state_dict(torch.load(model_path))
+
+transfer_model = GRU(newconfig.feature_size, newconfig.hidden_size, newconfig.num_layers, newconfig.output_size) # 定义GRU网络
+loss_function = nn.MSELoss() # 定义损失函数
+optimizer_transfer = torch.optim.AdamW(transfer_model.parameters(), lr= newconfig.learning_rate) # 定义优化器
+
+# 定义生成随机矩阵的数量
+num_random_matrices = 100
+
+# 获取预训练模型和新模型的参数
+pre_trained_params = pre_trained_model.state_dict()
+transfer_params = transfer_model.state_dict() # 使用之前定义的模型 model,而不是新的 transfer_model
+
+# 计算预训练模型参数的 L2 范数
+pre_trained_l2_norm = torch.norm(torch.stack([torch.norm(param) for param in pre_trained_params.values()]))
+
+# 计算新模型参数的 L2 范数
+transfer_l2_norm = torch.norm(torch.stack([torch.norm(param) for param in transfer_params.values()]))
+
+# 初始化最小 L2 范数和对应的随机矩阵
+min_l2_norm = float('inf')
+min_l2_norm_random_matrix = None
+
+# 循环生成随机矩阵,并计算 L2 范数
+for i in range(num_random_matrices):
+ # 生成随机矩阵
+ random_matrix = torch.rand(pre_trained_l2_norm.size())
+
+ # 应用随机矩阵到预训练模型参数
+ for name, param in pre_trained_params.items():
+ if "gru" in name:
+ name = name.replace("gru.", "") # 移除模型名称前缀
+ if name in transfer_params:
+ transfer_params[name].copy_(param * random_matrix)
+
+ # 计算使用当前随机矩阵映射后的新模型参数的 L2 范数
+ transfer_l2_norm_after_mapping = torch.norm(torch.stack([torch.norm(param) for param in transfer_params.values()]))
+
+ # 更新最小 L2 范数和对应的随机矩阵
+ if transfer_l2_norm_after_mapping < min_l2_norm:
+ min_l2_norm = transfer_l2_norm_after_mapping
+ min_l2_norm_random_matrix = random_matrix
+
+# 将最优的随机矩阵应用于预训练模型参数
+for name, param in pre_trained_params.items():
+ if "gru" in name:
+ name = name.replace("gru.", "") # 移除模型名称前缀
+ if name in transfer_params:
+ transfer_params[name].copy_(param * min_l2_norm_random_matrix)
+
+
+# 将已有模型的参数设为不可训练
+for param in pre_trained_model.parameters():
+ param.requires_grad = False
+
+
+for epoch in range(num_transfer_epochs):
+ transfer_model.train()
+ running_loss = 0
+ train_bar = tqdm(train_loader) # 使用之前定义的数据加载器 train_loader
+ for data in train_bar:
+ x_train, y_train = data
+ optimizer_transfer.zero_grad()
+ y_train_pred = transfer_model(x_train)
+ y_pretrained_pred = pre_trained_model(x_train)
+ loss = (1-u)*loss_function(y_train_pred, y_train) + u * loss_function(y_train_pred, y_pretrained_pred)
+ loss.backward()
+ optimizer_transfer.step()
+
+ running_loss += loss.item()
+ train_bar.set_description("Transfer Learning epoch[{}/{}] loss:{:.3f}".format(epoch + 1, num_transfer_epochs, loss.item()))
+
+
+
+transfer_model_save_path = './transfer_model2.pth'
+torch.save(transfer_model.state_dict(), transfer_model_save_path)
+
+print('Finished Transfer Learning')
+
+transfer_model.eval()
+
+# Combine the training and test data to use the entire dataset
+x_combined = np.concatenate([x_train, x_test], axis=0)
+y_combined = np.concatenate([y_train, y_test], axis=0)
+
+# Convert the combined data to tensors
+x_combined_tensor = torch.from_numpy(x_combined).to(torch.float32)
+y_combined_tensor = torch.from_numpy(y_combined).to(torch.float32)
+
+# Create a new DataLoader for the combined data
+combined_data = TensorDataset(x_combined_tensor, y_combined_tensor)
+combined_loader = torch.utils.data.DataLoader(combined_data, batch_size=config.batch_size, shuffle=False)
+
+with torch.no_grad():
+ y_true = [] # Ground truth labels
+ y_pred = [] # Predicted labels
+ for data in test_loader: # Using the test data loader
+ x_test1, y_test1 = data
+ predictions = transfer_model(x_test1)
+ y_true.extend(y_test1.numpy().flatten())
+ y_pred.extend(predictions.numpy().flatten())
+
+# Convert the lists to numpy arrays for easier calculations
+y_true = np.array(y_true)
+y_pred = np.array(y_pred)
+
+# Calculate evaluation metrics
+mse = mean_squared_error(y_true, y_pred)
+rmse = np.sqrt(mse)
+mae = mean_absolute_error(y_true, y_pred)
+
+# Calculate MAPE and SMAPE with checks for division by zero
+mape_denominator = np.maximum(np.abs(y_true), 1e-8) # Avoid division by zero
+mape = np.mean(np.abs((y_true - y_pred) / mape_denominator)) * 100
+
+smape_denominator = np.maximum(np.abs(y_true) + np.abs(y_pred), 1e-8) # Avoid division by zero
+smape = 2.0 * np.mean(np.abs(y_pred - y_true) / smape_denominator) * 100
+
+
+print("Mean Squared Error (MSE):", mse)
+print("Root Mean Squared Error (RMSE):", rmse)
+print("Mean Absolute Error (MAE):", mae)
+print("Mean Absolute Percentage Error (MAPE):", mape)
+print("Symmetric Mean Absolute Percentage Error (SMAPE):", smape)
+
+# Save predictions and metrics to CSV
+result_df = pd.DataFrame({'Truth': y_true, 'Predicted': y_pred})
+result_df.to_csv('transfer_learning_predictions2.csv', index=False)
+
+metrics_df = pd.DataFrame({
+ 'MSE': [mse],
+ 'RMSE': [rmse],
+ 'MAE': [mae],
+ 'MAPE': [mape],
+ 'SMAPE': [smape]
+})
+metrics_df.to_csv('transfer_learning_metrics2.csv', index=False)
+
+print("Transfer learning predictions and metrics saved successfully!")
diff --git a/subject2-pre/all-process/ourfl_kd/m1.py b/subject2-pre/all-process/ourfl_kd/m1.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e4f66ce7b05e71ad2a097c0ae43d57044324b2d
--- /dev/null
+++ b/subject2-pre/all-process/ourfl_kd/m1.py
@@ -0,0 +1,299 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from torch.utils.data import TensorDataset
+from tqdm import tqdm
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+
+
+data1 = pd.read_csv(r'F:\ourfl\data\pre_microsoft.csv')
+df = data1[['cpu_use']]
+# 2.将数据进行标准化
+scaler = MinMaxScaler(feature_range=(0, 1))
+data = scaler.fit_transform(df.values)
+
+u = 0.5
+num_transfer_epochs = 30 # 根据需要设置合适的训练轮数
+
+class Config():
+
+ timestep = 2 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 1 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 2 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0005 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+config = Config()
+
+#新模型
+class newConfig():
+
+ timestep = 2 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 1 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 4 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0001 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+newconfig = newConfig()
+
+# 形成训练数据
+def split_data1(data, timestep, feature_size):
+ dataX = [] # 保存X
+ dataY = [] # 保存Y
+
+ # 将整个窗口的数据保存到X中,将未来一天保存到Y中
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0]) # 目标列avg_mem为第5列,索引为4
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ return dataX, dataY
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = [] # 保存X
+ dataY = [] # 保存Y
+
+ # 将整个窗口的数据保存到X中,将未来一天保存到Y中
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0]) # 目标列avg_mem为第5列,索引为4
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ # 获取训练集大小
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ # 划分训练集、测试集
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+# 定义GRU网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size # 隐层大小
+ self.num_layers = num_layers # gru层数
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ # 初始化隐层状态
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ # GRU运算
+ output, h_0 = self.gru(x, h_0)
+
+ # 获取GRU输出的维度信息
+ batch_size, timestep, hidden_size = output.shape
+
+ # 将output变成 batch_size * timestep, hidden_dim
+ output = output.reshape(-1, hidden_size)
+
+ # 全连接层
+ output = self.fc(output)
+
+ # 转换维度,用于输出
+ output = output.reshape(batch_size, timestep, -1)
+
+ # 返回最后一个时间步的数据
+ return output[:, -1, :]
+
+
+# 获取训练数据
+x_train, y_train, x_test, y_test = split_data(data, config.timestep, config.feature_size)
+
+x_test1, y_test1 = split_data1(data, config.timestep, config.feature_size)
+
+# 4.将数据转为tensor
+x_train_tensor = torch.from_numpy(x_train).to(torch.float32)
+y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
+x_test_tensor = torch.from_numpy(x_test).to(torch.float32)
+y_test_tensor = torch.from_numpy(y_test).to(torch.float32)
+
+# 5.形成训练数据集
+train_data = TensorDataset(x_train_tensor, y_train_tensor)
+test_data = TensorDataset(x_test_tensor, y_test_tensor)
+
+# 6.将数据加载成迭代器
+train_loader = torch.utils.data.DataLoader(train_data,
+ config.batch_size,
+ shuffle=False)
+
+test_loader = torch.utils.data.DataLoader(test_data,
+ config.batch_size,
+ shuffle=False)
+
+# 假设您的模型类是GRU,config中包含相关的模型配置信息
+pre_trained_model = GRU(config.feature_size, config.hidden_size, config.num_layers, config.output_size)
+
+# 加载预训练模型的参数
+# model_path = r"F:\ourfl\fedavg\tvhl_client_model_1.pth"
+model_path = r"F:\ourfl\fedavg\client_model_3.pth"
+
+pre_trained_model.load_state_dict(torch.load(model_path))
+
+transfer_model = GRU(newconfig.feature_size, newconfig.hidden_size, newconfig.num_layers, newconfig.output_size) # 定义GRU网络
+loss_function = nn.MSELoss() # 定义损失函数
+optimizer_transfer = torch.optim.AdamW(transfer_model.parameters(), lr= newconfig.learning_rate) # 定义优化器
+
+# 定义生成随机矩阵的数量
+num_random_matrices = 100
+
+# 获取预训练模型和新模型的参数
+pre_trained_params = pre_trained_model.state_dict()
+transfer_params = transfer_model.state_dict() # 使用之前定义的模型 model,而不是新的 transfer_model
+
+# 计算预训练模型参数的 L2 范数
+pre_trained_l2_norm = torch.norm(torch.stack([torch.norm(param) for param in pre_trained_params.values()]))
+
+# 计算新模型参数的 L2 范数
+transfer_l2_norm = torch.norm(torch.stack([torch.norm(param) for param in transfer_params.values()]))
+
+# 初始化最小 L2 范数和对应的随机矩阵
+min_l2_norm = float('inf')
+min_l2_norm_random_matrix = None
+
+# 循环生成随机矩阵,并计算 L2 范数
+for i in range(num_random_matrices):
+ # 生成随机矩阵
+ random_matrix = torch.rand(pre_trained_l2_norm.size())
+
+ # 应用随机矩阵到预训练模型参数
+ for name, param in pre_trained_params.items():
+ if "gru" in name:
+ name = name.replace("gru.", "") # 移除模型名称前缀
+ if name in transfer_params:
+ transfer_params[name].copy_(param * random_matrix)
+
+ # 计算使用当前随机矩阵映射后的新模型参数的 L2 范数
+ transfer_l2_norm_after_mapping = torch.norm(torch.stack([torch.norm(param) for param in transfer_params.values()]))
+
+ # 更新最小 L2 范数和对应的随机矩阵
+ if transfer_l2_norm_after_mapping < min_l2_norm:
+ min_l2_norm = transfer_l2_norm_after_mapping
+ min_l2_norm_random_matrix = random_matrix
+
+# 将最优的随机矩阵应用于预训练模型参数
+for name, param in pre_trained_params.items():
+ if "gru" in name:
+ name = name.replace("gru.", "") # 移除模型名称前缀
+ if name in transfer_params:
+ transfer_params[name].copy_(param * min_l2_norm_random_matrix)
+
+
+# 将已有模型的参数设为不可训练
+for param in pre_trained_model.parameters():
+ param.requires_grad = False
+
+
+for epoch in range(num_transfer_epochs):
+ transfer_model.train()
+ running_loss = 0
+ train_bar = tqdm(train_loader) # 使用之前定义的数据加载器 train_loader
+ for data in train_bar:
+ x_train, y_train = data
+ optimizer_transfer.zero_grad()
+ y_train_pred = transfer_model(x_train)
+ y_pretrained_pred = pre_trained_model(x_train)
+ loss = (1-u)*loss_function(y_train_pred, y_train) + u * loss_function(y_train_pred, y_pretrained_pred)
+ loss.backward()
+ optimizer_transfer.step()
+
+ running_loss += loss.item()
+ train_bar.set_description("Transfer Learning epoch[{}/{}] loss:{:.3f}".format(epoch + 1, num_transfer_epochs, loss.item()))
+
+
+
+transfer_model_save_path = './transfer_model3.pth'
+torch.save(transfer_model.state_dict(), transfer_model_save_path)
+
+print('Finished Transfer Learning')
+
+transfer_model.eval()
+
+# Combine the training and test data to use the entire dataset
+x_combined = np.concatenate([x_train, x_test], axis=0)
+y_combined = np.concatenate([y_train, y_test], axis=0)
+
+# Convert the combined data to tensors
+x_combined_tensor = torch.from_numpy(x_combined).to(torch.float32)
+y_combined_tensor = torch.from_numpy(y_combined).to(torch.float32)
+
+# Create a new DataLoader for the combined data
+combined_data = TensorDataset(x_combined_tensor, y_combined_tensor)
+combined_loader = torch.utils.data.DataLoader(combined_data, batch_size=config.batch_size, shuffle=False)
+
+with torch.no_grad():
+ y_true = [] # Ground truth labels
+ y_pred = [] # Predicted labels
+ for data in test_loader: # Using the test data loader
+ x_test1, y_test1 = data
+ predictions = transfer_model(x_test1)
+ y_true.extend(y_test1.numpy().flatten())
+ y_pred.extend(predictions.numpy().flatten())
+
+# Convert the lists to numpy arrays for easier calculations
+y_true = np.array(y_true)
+y_pred = np.array(y_pred)
+
+# Calculate evaluation metrics
+mse = mean_squared_error(y_true, y_pred)
+rmse = np.sqrt(mse)
+mae = mean_absolute_error(y_true, y_pred)
+
+# Calculate MAPE and SMAPE with checks for division by zero
+mape_denominator = np.maximum(np.abs(y_true), 1e-8) # Avoid division by zero
+mape = np.mean(np.abs((y_true - y_pred) / mape_denominator)) * 100
+
+smape_denominator = np.maximum(np.abs(y_true) + np.abs(y_pred), 1e-8) # Avoid division by zero
+smape = 2.0 * np.mean(np.abs(y_pred - y_true) / smape_denominator) * 100
+
+
+print("Mean Squared Error (MSE):", mse)
+print("Root Mean Squared Error (RMSE):", rmse)
+print("Mean Absolute Error (MAE):", mae)
+print("Mean Absolute Percentage Error (MAPE):", mape)
+print("Symmetric Mean Absolute Percentage Error (SMAPE):", smape)
+
+# Save predictions and metrics to CSV
+result_df = pd.DataFrame({'Truth': y_true, 'Predicted': y_pred})
+result_df.to_csv('transfer_learning_predictions3.csv', index=False)
+
+metrics_df = pd.DataFrame({
+ 'MSE': [mse],
+ 'RMSE': [rmse],
+ 'MAE': [mae],
+ 'MAPE': [mape],
+ 'SMAPE': [smape]
+})
+metrics_df.to_csv('transfer_learning_metrics3.csv', index=False)
+
+print("Transfer learning predictions and metrics saved successfully!")
diff --git a/subject2-pre/all-process/ourfl_kd/s1.py b/subject2-pre/all-process/ourfl_kd/s1.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f4050ac43539640faddf8c4ee986162f9ca51eb
--- /dev/null
+++ b/subject2-pre/all-process/ourfl_kd/s1.py
@@ -0,0 +1,299 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from torch.utils.data import TensorDataset
+from tqdm import tqdm
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+
+
+data1 = pd.read_csv(r'F:\ourflc\data\processed_hefei.csv')
+df = data1[['cpu_use']]
+# 2.将数据进行标准化
+scaler = MinMaxScaler(feature_range=(0, 1))
+data = scaler.fit_transform(df.values)
+
+u = 0.5
+num_transfer_epochs = 30 # 根据需要设置合适的训练轮数
+
+class Config():
+
+ timestep = 2 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 1 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 2 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0001 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+config = Config()
+
+#新模型
+class newConfig():
+
+ timestep = 2 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次
+ feature_size = 1 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 256 # 隐层大小
+ output_size = 1 # 由于是单输出任务,最终输出层大小为1,预测未来1天的avg_mem
+ num_layers = 4 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.0001 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+newconfig = newConfig()
+
+# 形成训练数据
+def split_data1(data, timestep, feature_size):
+ dataX = [] # 保存X
+ dataY = [] # 保存Y
+
+ # 将整个窗口的数据保存到X中,将未来一天保存到Y中
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0]) # 目标列avg_mem为第5列,索引为4
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ return dataX, dataY
+
+# 形成训练数据
+def split_data(data, timestep, feature_size):
+ dataX = [] # 保存X
+ dataY = [] # 保存Y
+
+ # 将整个窗口的数据保存到X中,将未来一天保存到Y中
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep][0]) # 目标列avg_mem为第5列,索引为4
+
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+
+ # 获取训练集大小
+ train_size = int(np.round(0.6 * dataX.shape[0]))
+
+ # 划分训练集、测试集
+ x_train = dataX[: train_size, :]
+ y_train = dataY[: train_size].reshape(-1, 1)
+
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:].reshape(-1, 1)
+
+ return [x_train, y_train, x_test, y_test]
+
+# 定义GRU网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size # 隐层大小
+ self.num_layers = num_layers # gru层数
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+
+ # 初始化隐层状态
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+
+ # GRU运算
+ output, h_0 = self.gru(x, h_0)
+
+ # 获取GRU输出的维度信息
+ batch_size, timestep, hidden_size = output.shape
+
+ # 将output变成 batch_size * timestep, hidden_dim
+ output = output.reshape(-1, hidden_size)
+
+ # 全连接层
+ output = self.fc(output)
+
+ # 转换维度,用于输出
+ output = output.reshape(batch_size, timestep, -1)
+
+ # 返回最后一个时间步的数据
+ return output[:, -1, :]
+
+
+# 获取训练数据
+x_train, y_train, x_test, y_test = split_data(data, config.timestep, config.feature_size)
+
+x_test1, y_test1 = split_data1(data, config.timestep, config.feature_size)
+
+# 4.将数据转为tensor
+x_train_tensor = torch.from_numpy(x_train).to(torch.float32)
+y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
+x_test_tensor = torch.from_numpy(x_test).to(torch.float32)
+y_test_tensor = torch.from_numpy(y_test).to(torch.float32)
+
+# 5.形成训练数据集
+train_data = TensorDataset(x_train_tensor, y_train_tensor)
+test_data = TensorDataset(x_test_tensor, y_test_tensor)
+
+# 6.将数据加载成迭代器
+train_loader = torch.utils.data.DataLoader(train_data,
+ config.batch_size,
+ shuffle=False)
+
+test_loader = torch.utils.data.DataLoader(test_data,
+ config.batch_size,
+ shuffle=False)
+
+# 假设您的模型类是GRU,config中包含相关的模型配置信息
+pre_trained_model = GRU(config.feature_size, config.hidden_size, config.num_layers, config.output_size)
+
+# 加载预训练模型的参数
+# model_path = r"F:\ourfl\fedavg\tvhl_client_model_1.pth"
+model_path = r"F:\ourfl\fedavg\client_model_4.pth"
+
+pre_trained_model.load_state_dict(torch.load(model_path))
+
+transfer_model = GRU(newconfig.feature_size, newconfig.hidden_size, newconfig.num_layers, newconfig.output_size) # 定义GRU网络
+loss_function = nn.MSELoss() # 定义损失函数
+optimizer_transfer = torch.optim.AdamW(transfer_model.parameters(), lr= newconfig.learning_rate) # 定义优化器
+
+# 定义生成随机矩阵的数量
+num_random_matrices = 100
+
+# 获取预训练模型和新模型的参数
+pre_trained_params = pre_trained_model.state_dict()
+transfer_params = transfer_model.state_dict() # 使用之前定义的模型 model,而不是新的 transfer_model
+
+# 计算预训练模型参数的 L2 范数
+pre_trained_l2_norm = torch.norm(torch.stack([torch.norm(param) for param in pre_trained_params.values()]))
+
+# 计算新模型参数的 L2 范数
+transfer_l2_norm = torch.norm(torch.stack([torch.norm(param) for param in transfer_params.values()]))
+
+# 初始化最小 L2 范数和对应的随机矩阵
+min_l2_norm = float('inf')
+min_l2_norm_random_matrix = None
+
+# 循环生成随机矩阵,并计算 L2 范数
+for i in range(num_random_matrices):
+ # 生成随机矩阵
+ random_matrix = torch.rand(pre_trained_l2_norm.size())
+
+ # 应用随机矩阵到预训练模型参数
+ for name, param in pre_trained_params.items():
+ if "gru" in name:
+ name = name.replace("gru.", "") # 移除模型名称前缀
+ if name in transfer_params:
+ transfer_params[name].copy_(param * random_matrix)
+
+ # 计算使用当前随机矩阵映射后的新模型参数的 L2 范数
+ transfer_l2_norm_after_mapping = torch.norm(torch.stack([torch.norm(param) for param in transfer_params.values()]))
+
+ # 更新最小 L2 范数和对应的随机矩阵
+ if transfer_l2_norm_after_mapping < min_l2_norm:
+ min_l2_norm = transfer_l2_norm_after_mapping
+ min_l2_norm_random_matrix = random_matrix
+
+# 将最优的随机矩阵应用于预训练模型参数
+for name, param in pre_trained_params.items():
+ if "gru" in name:
+ name = name.replace("gru.", "") # 移除模型名称前缀
+ if name in transfer_params:
+ transfer_params[name].copy_(param * min_l2_norm_random_matrix)
+
+
+# 将已有模型的参数设为不可训练
+for param in pre_trained_model.parameters():
+ param.requires_grad = False
+
+
+for epoch in range(num_transfer_epochs):
+ transfer_model.train()
+ running_loss = 0
+ train_bar = tqdm(train_loader) # 使用之前定义的数据加载器 train_loader
+ for data in train_bar:
+ x_train, y_train = data
+ optimizer_transfer.zero_grad()
+ y_train_pred = transfer_model(x_train)
+ y_pretrained_pred = pre_trained_model(x_train)
+ loss = (1-u)*loss_function(y_train_pred, y_train) + u * loss_function(y_train_pred, y_pretrained_pred)
+ loss.backward()
+ optimizer_transfer.step()
+
+ running_loss += loss.item()
+ train_bar.set_description("Transfer Learning epoch[{}/{}] loss:{:.3f}".format(epoch + 1, num_transfer_epochs, loss.item()))
+
+
+
+transfer_model_save_path = './transfer_model4.pth'
+torch.save(transfer_model.state_dict(), transfer_model_save_path)
+
+print('Finished Transfer Learning')
+
+transfer_model.eval()
+
+# Combine the training and test data to use the entire dataset
+x_combined = np.concatenate([x_train, x_test], axis=0)
+y_combined = np.concatenate([y_train, y_test], axis=0)
+
+# Convert the combined data to tensors
+x_combined_tensor = torch.from_numpy(x_combined).to(torch.float32)
+y_combined_tensor = torch.from_numpy(y_combined).to(torch.float32)
+
+# Create a new DataLoader for the combined data
+combined_data = TensorDataset(x_combined_tensor, y_combined_tensor)
+combined_loader = torch.utils.data.DataLoader(combined_data, batch_size=config.batch_size, shuffle=False)
+
+with torch.no_grad():
+ y_true = [] # Ground truth labels
+ y_pred = [] # Predicted labels
+ for data in test_loader: # Using the test data loader
+ x_test1, y_test1 = data
+ predictions = transfer_model(x_test1)
+ y_true.extend(y_test1.numpy().flatten())
+ y_pred.extend(predictions.numpy().flatten())
+
+# Convert the lists to numpy arrays for easier calculations
+y_true = np.array(y_true)
+y_pred = np.array(y_pred)
+
+# Calculate evaluation metrics
+mse = mean_squared_error(y_true, y_pred)
+rmse = np.sqrt(mse)
+mae = mean_absolute_error(y_true, y_pred)
+
+# Calculate MAPE and SMAPE with checks for division by zero
+mape_denominator = np.maximum(np.abs(y_true), 1e-8) # Avoid division by zero
+mape = np.mean(np.abs((y_true - y_pred) / mape_denominator)) * 100
+
+smape_denominator = np.maximum(np.abs(y_true) + np.abs(y_pred), 1e-8) # Avoid division by zero
+smape = 2.0 * np.mean(np.abs(y_pred - y_true) / smape_denominator) * 100
+
+
+print("Mean Squared Error (MSE):", mse)
+print("Root Mean Squared Error (RMSE):", rmse)
+print("Mean Absolute Error (MAE):", mae)
+print("Mean Absolute Percentage Error (MAPE):", mape)
+print("Symmetric Mean Absolute Percentage Error (SMAPE):", smape)
+
+# Save predictions and metrics to CSV
+result_df = pd.DataFrame({'Truth': y_true, 'Predicted': y_pred})
+result_df.to_csv('transfer_learning_predictions4.csv', index=False)
+
+metrics_df = pd.DataFrame({
+ 'MSE': [mse],
+ 'RMSE': [rmse],
+ 'MAE': [mae],
+ 'MAPE': [mape],
+ 'SMAPE': [smape]
+})
+metrics_df.to_csv('transfer_learning_metrics4.csv', index=False)
+
+print("Transfer learning predictions and metrics saved successfully!")
diff --git a/subject2-pre/client.py b/subject2-pre/client.py
new file mode 100644
index 0000000000000000000000000000000000000000..37a03f8530e1fea5c5553d90c54e39acf21e3bab
--- /dev/null
+++ b/subject2-pre/client.py
@@ -0,0 +1,185 @@
+import socket
+import pickle
+import numpy as np
+import pandas as pd
+import torch.optim as optim
+
+import torch
+import torch.nn as nn
+import argparse
+
+# 定义网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size
+ self.num_layers = num_layers
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+ self.activation = nn.ReLU() # 添加激活函数
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+ output, h_0 = self.gru(x, h_0)
+ output = self.fc(output[:, -1, :])
+ output = self.activation(output) # 添加激活函数
+ return output, h_0
+
+# 定义学习任务的设置
+def parser_args():
+ parser = argparse.ArgumentParser(description='GRU Model Parameters')
+ parser.add_argument('--timestep', type=int, default=4, help='Time step size')
+ parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
+ parser.add_argument('--feature_size', type=int, default=6, help='Number of features')
+ parser.add_argument('--hidden_size', type=int, default=512, help='Size of the hidden layer')
+ parser.add_argument('--output_size', type=int, default=3, help='Size of the output layer')
+ parser.add_argument('--num_layers', type=int, default=2, help='Number of GRU layers')
+ parser.add_argument('--epochs', type=int, default=3, help='Number of training epochs')
+ parser.add_argument('--learning_rate', type=float, default=0.0001, help='Learning rate')
+ parser.add_argument('--model_name', type=str, default='gru', help='Name of the model')
+ parser.add_argument('--save_path', type=str, default='./{}.pth'.format('gru_a'), help='Path to save the best model')
+ parser.add_argument('--no_cuda', action='store_true', default=False, help='Disable CUDA')
+ args = parser.parse_args()
+ return args
+
+def train_model(model, x_train, y_train, config):
+ # 设置优化器和损失函数
+ optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
+ # optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
+ criterion = nn.MSELoss()
+ # 设置初始隐藏状态(可根据需要调整)
+ # hidden = None
+
+ # 进行本地训练
+ for epoch in range(config.epochs):
+ optimizer.zero_grad()
+
+ # 手动清除隐藏状态
+ hidden = None
+
+ # 前向传播
+ output, hidden = model(x_train, hidden)
+ loss = criterion(output, y_train)
+
+ # 反向传播和参数更新
+ loss.backward(retain_graph=True)
+ optimizer.step()
+
+ print(f"Epoch [{epoch + 1}/{config.epochs}], Loss: {loss.item()}")
+
+ # 返回更新后的客户端模型参数
+ return model
+
+# 划分数据
+def split_data(data, timestep,feature_size, target_size):
+ dataX = []
+ dataY = []
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep, :target_size])
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+ train_size = int(np.round(dataX.shape[0]))
+ x_train = dataX[:train_size, :]
+ y_train = dataY[:train_size, :]
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:, :]
+
+ return x_train, y_train, x_test, y_test
+
+
+# 数据加载
+def load_data(data_path):
+ dataset = pd.read_csv(data_path)
+ return dataset
+
+args = parser_args()
+
+data_path ='tvhl_a.csv'
+dataset = load_data(data_path)
+
+selected_features = ['cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan', 'mem_plan']
+target_size = 3 # 三个目标列的大小
+selected_data = dataset[selected_features].values
+
+x_train,y_train, x_test, y_test = split_data(selected_data, args.timestep, args.feature_size,target_size)
+x_train_tensor = torch.Tensor(x_train)
+y_train_tensor = torch.Tensor(y_train)
+
+# 开始训练
+model = GRU(args.feature_size, args.hidden_size, args.num_layers, args.output_size).to('cpu')
+# 创建优化器
+optimizer = optim.SGD(model.parameters(), lr=args.learning_rate)
+
+# 创建TCP客户端套接字
+client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+# 连接到服务器
+# server_address = ('127.0.0.1', 4321)
+server_address = ('202.117.43.11', 8080)
+client.connect(server_address)
+
+ROUNDS = 20
+length = len(pickle.dumps(model.state_dict()))
+chunk_size = 1024
+
+for round in range(ROUNDS):
+ print("第 %d 轮" % (round), end='\n')
+
+ size_message = client.recv(4096)
+ length = int.from_bytes(size_message, byteorder='big')
+
+ # 从服务器接收模型权重
+ received_weights = bytearray()
+
+ while len(received_weights) < length:
+ # print('从服务器接收模型权重')
+ chunk = client.recv(4096)
+ received_weights += chunk
+
+ print('全部接受完毕')
+ if len(received_weights) == length:
+ try:
+ received_weights = pickle.loads(received_weights)
+ print('解析成功')
+ # 设置模型权重
+ model.load_state_dict(received_weights)
+ except EOFError:
+ print('解析失败: EOFError')
+ else:
+ print('接收到的数据长度不符合预期,可能存在问题')
+
+
+ # 在客户端本地训练模型
+ print("开始训练")
+ train_model(model, x_train_tensor, y_train_tensor, args)
+
+ # 获取更新后的模型权重
+ updated_weights = model.state_dict()
+ print('获取更新后的模型权重')
+ # 序列化并将更新后的权重发送到服务器
+ print('将更新后的权重发送到服务器')
+ updated_weights_serialized = pickle.dumps(updated_weights)
+ print(f"Serialized client model weights size: {len(updated_weights_serialized)} bytes")
+
+ # size_message = str(len(updated_weights_serialized)).encode('utf-8')
+ # client.send(size_message)
+ size_message = len( updated_weights_serialized).to_bytes(4096, byteorder='big')
+ client.send(size_message)
+
+ total_sent = 0
+ for i in range(0, len(updated_weights_serialized), chunk_size):
+ chunk =updated_weights_serialized [i:i + chunk_size]
+ sent = client.send(chunk)
+ total_sent += sent
+
+
+ print('发送完毕')
+
+# 关闭客户端套接字
+client.close()
+torch.save(model, args.save_path)
\ No newline at end of file
diff --git a/subject2-pre/environment.md b/subject2-pre/environment.md
new file mode 100644
index 0000000000000000000000000000000000000000..17d56bf6bebbf903bf510951eadf673f4ef0a8ed
--- /dev/null
+++ b/subject2-pre/environment.md
@@ -0,0 +1,478 @@
+astropy 5.1
+asttokens 2.0.5
+astunparse 1.6.3
+atomicwrites 1.4.0
+attrs 22.1.0
+automat 20.2.0
+autopep8 1.6.0
+babel 2.11.0
+backcall 0.2.0
+backports 1.1
+backports.functools_lru_cache 1.6.4
+backports.tempfile 1.0
+backports.weakref 1.0.post1
+bcrypt 3.2.0
+beautifulsoup4 4.11.1
+binaryornot 0.4.4
+black 22.6.0
+blas 1.0
+bleach 4.1.0
+blosc 1.21.3
+bokeh 2.4.3
+boltons 23.0.0
+bottleneck 1.3.5
+brotli 1.0.9
+brotli-bin 1.0.9
+brotlipy 0.7.0
+bs4 0.0.1
+bzip2 1.0.8
+ca-certificates 2023.5.7
+cachetools 5.3.0
+certifi 2023.5.7
+cffi 1.15.1
+cfitsio 3.470
+chardet 4.0.0
+charls 2.2.0
+charset-normalizer 2.0.4
+click 8.0.4
+cloudpickle 2.0.0
+clyent 1.2.1
+colorama 0.4.6
+colorcet 3.0.1
+comm 0.1.2
+conda 23.3.1
+conda-build 3.24.0
+conda-content-trust 0.1.3
+conda-pack 0.6.0
+conda-package-handling 2.0.2
+conda-package-streaming 0.7.0
+conda-repo-cli 1.0.41
+conda-token 0.4.0
+conda-verify 3.4.2
+console_shortcut 0.1.1
+constantly 15.1.0
+contourpy 1.0.5
+cookiecutter 1.7.3
+cryptography 39.0.1
+cssselect 1.1.0
+cuda-cccl 12.1.109
+cuda-cudart 11.7.99
+cuda-cudart-dev 11.7.99
+cuda-cupti 11.7.101
+cuda-libraries 11.7.1
+cuda-libraries-dev 11.7.1
+cuda-nvrtc 11.7.99
+cuda-nvrtc-dev 11.7.99
+cuda-nvtx 11.7.91
+cuda-runtime 11.7.1
+curl 7.87.0
+cycler 0.11.0
+cytoolz 0.12.0
+daal4py 2023.0.2
+dal 2023.0.1
+dask 2022.7.0
+dask-core 2022.7.0
+datashader 0.14.4
+datashape 0.5.4
+debugpy 1.5.1
+decorator 5.1.1
+defusedxml 0.7.1
+diff-match-patch 20200713
+dill 0.3.6
+distributed 2022.7.0
+dm-tree 0.1.8
+docstring-to-markdown 0.11
+docutils 0.18.1
+easydict 1.10
+entrypoints 0.4
+et_xmlfile 1.1.0
+executing 0.8.3
+filelock 3.9.0
+flake8 6.0.0
+flask 2.2.2
+flatbuffers 23.5.9
+flit-core 3.6.0
+fonttools 4.25.0
+freetype 2.12.1
+fsspec 2022.11.0
+fst-pso 1.8.1
+future 0.18.3
+fuzzytm 2.0.5
+gast 0.4.0
+gensim 4.3.0
+giflib 5.2.1
+glib 2.69.1
+glob2 0.7
+google-auth 2.18.1
+google-auth-oauthlib 1.0.0
+google-pasta 0.2.0
+greenlet 2.0.1
+grpcio 1.54.2
+gst-plugins-base 1.18.5
+gstreamer 1.18.5
+h5py 3.7.0
+hdf5 1.10.6
+heapdict 1.0.1
+holoviews 1.15.4
+huggingface_hub 0.10.1
+hvplot 0.8.2
+hyperlink 21.0.0
+icc_rt 2022.1.0
+icu 58.2
+idna 3.4
+imagecodecs 2021.8.26
+imageio 2.26.0
+imagesize 1.4.1
+imbalanced-learn 0.10.1
+importlib-metadata 4.11.3
+importlib_metadata 4.11.3
+incremental 21.3.0
+inflection 0.5.1
+iniconfig 1.1.1
+intake 0.6.7
+intel-openmp 2021.4.0
+intervaltree 3.1.0
+ipykernel 6.19.2
+ipython 8.10.0
+ipython_genutils 0.2.0
+ipywidgets 7.6.5
+isort 5.9.3
+itemadapter 0.3.0
+itemloaders 1.0.4
+itsdangerous 2.0.1
+jax 0.4.10
+jedi 0.18.1
+jellyfish 0.9.0
+jinja2 3.1.2
+jinja2-time 0.2.0
+jmespath 0.10.0
+joblib 1.1.1
+jpeg 9e
+jq 1.6
+json5 0.9.6
+jsonpatch 1.32
+jsonpointer 2.1
+jsonschema 4.17.3
+jupyter 1.0.0
+jupyter_client 7.3.4
+jupyter_console 6.6.2
+jupyter_core 5.2.0
+jupyter_server 1.23.4
+jupyterlab 3.5.3
+jupyterlab_pygments 0.1.2
+jupyterlab_server 2.19.0
+jupyterlab_widgets 1.0.0
+jxrlib 1.1
+keras 2.12.0
+keyring 23.4.0
+kiwisolver 1.4.4
+lazy-object-proxy 1.6.0
+lcms2 2.12
+lerc 3.0
+libaec 1.0.4
+libarchive 3.6.2
+libbrotlicommon 1.0.9
+libbrotlidec 1.0.9
+libbrotlienc 1.0.9
+libclang 16.0.0
+libcublas 11.10.3.66
+libcublas-dev 11.10.3.66
+libcufft 10.7.2.124
+libcufft-dev 10.7.2.124
+libcurand 10.3.2.106
+libcurand-dev 10.3.2.106
+libcurl 7.87.0
+libcusolver 11.4.0.1
+libcusolver-dev 11.4.0.1
+libcusparse 11.7.4.91
+libcusparse-dev 11.7.4.91
+libdeflate 1.17
+libffi 3.4.2
+libiconv 1.16
+liblief 0.12.3
+libnpp 11.7.4.75
+libnpp-dev 11.7.4.75
+libnvjpeg 11.8.0.2
+libnvjpeg-dev 11.8.0.2
+libogg 1.3.5
+libpng 1.6.39
+libsodium 1.0.18
+libspatialindex 1.9.3
+libssh2 1.10.0
+libtiff 4.5.0
+libuv 1.44.2
+libvorbis 1.3.7
+libwebp 1.2.4
+libwebp-base 1.2.4
+libxml2 2.9.14
+libxslt 1.1.35
+libzopfli 1.0.3
+llvmlite 0.39.1
+locket 1.0.0
+lxml 4.9.1
+lz4 3.1.3
+lz4-c 1.9.4
+lzo 2.10
+m2-msys2-runtime 2.5.0.17080.65c939c
+m2-patch 2.7.5
+m2w64-libwinpthread-git 5.0.0.4634.697f757
+markdown 3.4.1
+markupsafe 2.1.1
+matplotlib 3.7.0
+matplotlib-base 3.7.0
+matplotlib-inline 0.1.6
+matplotlib-venn 0.11.9
+mccabe 0.7.0
+menuinst 1.4.19
+miniful 0.0.6
+mistune 0.8.4
+mkl 2021.4.0
+mkl-service 2.4.0
+mkl_fft 1.3.1
+mkl_random 1.2.2
+ml-dtypes 0.1.0
+mock 4.0.3
+mpmath 1.2.1
+msgpack-python 1.0.3
+msys2-conda-epoch 20160418
+multipledispatch 0.6.0
+munkres 1.1.4
+mypy_extensions 0.4.3
+navigator-updater 0.3.0
+nbclassic 0.5.2
+nbclient 0.5.13
+nbconvert 6.5.4
+nbformat 5.4.0
+nest-asyncio 1.5.6
+networkx 2.8.4
+ninja 1.10.2
+ninja-base 1.10.2
+nltk 3.7
+notebook 6.5.2
+notebook-shim 0.2.2
+numba 0.56.4
+numexpr 2.8.4
+numpy 1.23.0
+numpydoc 1.5.0
+oauthlib 3.2.2
+openjpeg 2.4.0
+openpyxl 3.0.10
+openssl 1.1.1t
+opt-einsum 3.3.0
+packaging 22.0
+pandarallel 1.6.5
+pandas 1.5.3
+pandocfilters 1.5.0
+panel 0.14.3
+param 1.12.3
+paramiko 2.8.1
+parsel 1.6.0
+parso 0.8.3
+partd 1.2.0
+pathlib 1.0.1
+pathspec 0.10.3
+patsy 0.5.3
+pcre 8.45
+pep8 1.7.1
+pexpect 4.8.0
+pickleshare 0.7.5
+pillow 9.4.0
+pip 22.3.1
+pkginfo 1.9.6
+platformdirs 2.5.2
+plotly 5.9.0
+pluggy 1.0.0
+ply 3.11
+pooch 1.4.0
+powershell_shortcut 0.0.1
+poyo 0.5.0
+prometheus_client 0.14.1
+prompt-toolkit 3.0.36
+prompt_toolkit 3.0.36
+protego 0.1.16
+protobuf 4.23.1
+psutil 5.9.0
+ptyprocess 0.7.0
+pure_eval 0.2.2
+py 1.11.0
+py-lief 0.12.3
+pyasn1 0.4.8
+pyasn1-modules 0.2.8
+pycodestyle 2.10.0
+pycosat 0.6.4
+pycparser 2.21
+pyct 0.5.0
+pycurl 7.45.1
+pydispatcher 2.0.5
+pydocstyle 6.3.0
+pyerfa 2.0.0
+pyflakes 3.0.1
+pyfume 0.2.25
+pygments 2.11.2
+pyhamcrest 2.0.2
+pyjwt 2.4.0
+pylint 2.16.2
+pylint-venv 2.3.0
+pyls-spyder 0.4.0
+pynacl 1.5.0
+pyodbc 4.0.34
+pyopenssl 23.0.0
+pyparsing 3.0.9
+pyqt 5.15.7
+pyqt5-sip 12.11.0
+pyqtwebengine 5.15.7
+pyrsistent 0.18.0
+pysocks 1.7.1
+pytables 3.7.0
+pytest 6.2.5
+python 3.10.9
+python-dateutil 2.8.2
+python-fastjsonschema 2.16.2
+python-libarchive-c 2.9
+python-lsp-black 1.2.1
+python-lsp-jsonrpc 1.0.0
+python-lsp-server 1.7.1
+python-slugify 5.0.2
+python-snappy 0.6.1
+pytoolconfig 1.2.5
+pytorch 2.0.1
+pytorch-cuda 11.7
+pytorch-mutex 1.0
+pytz 2022.7
+pyviz_comms 2.0.2
+pywavelets 1.4.1
+pywin32 305
+pywin32-ctypes 0.2.0
+pywinpty 2.0.10
+pyyaml 6.0
+pyzmq 23.2.0
+qdarkstyle 3.0.2
+qstylizer 0.2.2
+qt-main 5.15.2
+qt-webengine 5.15.9
+qtawesome 1.2.2
+qtconsole 5.4.0
+qtpy 2.2.0
+qtwebkit 5.212
+queuelib 1.5.0
+regex 2022.7.9
+requests 2.28.1
+requests-file 1.5.1
+requests-mock 1.11.0
+requests-oauthlib 1.3.1
+requests-toolbelt 0.9.1
+rope 1.7.0
+rsa 4.9
+rtree 1.0.1
+ruamel.yaml 0.17.21
+ruamel.yaml.clib 0.2.6
+ruamel_yaml 0.17.21
+scikit-image 0.19.3
+scikit-learn 1.2.1
+scikit-learn-intelex 2023.0.2
+scipy 1.10.0
+scrapy 2.8.0
+seaborn 0.12.2
+send2trash 1.8.0
+service_identity 18.1.0
+setuptools 68.0.0
+simpful 2.11.0
+simplejson 3.19.1
+sip 6.6.2
+six 1.16.0
+smart_open 5.2.1
+snappy 1.1.9
+sniffio 1.2.0
+snowballstemmer 2.2.0
+sortedcontainers 2.4.0
+soupsieve 2.3.2.post1
+sphinx 5.0.2
+sphinxcontrib-applehelp 1.0.2
+sphinxcontrib-devhelp 1.0.2
+sphinxcontrib-htmlhelp 2.0.0
+sphinxcontrib-jsmath 1.0.1
+sphinxcontrib-qthelp 1.0.3
+sphinxcontrib-serializinghtml 1.1.5
+spyder 5.4.1
+spyder-kernels 2.4.1
+sqlalchemy 1.4.39
+sqlite 3.40.1
+stack_data 0.2.0
+statsmodels 0.13.5
+supervenn 0.4.1
+sympy 1.11.1
+tabulate 0.8.10
+tbb 2021.7.0
+tbb4py 2021.7.0
+tblib 1.7.0
+tenacity 8.0.1
+tensorboard 2.12.3
+tensorboard-data-server 0.6.1
+tensorboard-plugin-wit 1.8.1
+tensorflow 2.12.0
+tensorflow-estimator 2.11.0
+tensorflow-intel 2.12.0
+tensorflow-io-gcs-filesystem 0.31.0
+tensorflow-probability 0.19.0
+termcolor 2.3.0
+terminado 0.17.1
+text-unidecode 1.3
+textdistance 4.2.1
+threadpoolctl 2.2.0
+three-merge 0.1.1
+tifffile 2021.7.2
+tinycss2 1.2.1
+tk 8.6.12
+tldextract 3.2.0
+tokenizers 0.11.4
+toml 0.10.2
+tomli 2.0.1
+tomlkit 0.11.1
+toolz 0.12.0
+torchaudio 2.0.2
+torchvision 0.15.2
+tornado 6.1
+tqdm 4.64.1
+traitlets 5.7.1
+transformers 4.24.0
+tushare 1.2.89
+twisted 22.2.0
+twisted-iocpsupport 1.0.2
+typeguard 2.13.3
+typing-extensions 4.4.0
+typing_extensions 4.4.0
+tzdata 2023.3
+ujson 5.4.0
+unidecode 1.2.0
+upsetplot 0.8.0
+urllib3 1.26.14
+utils 1.0.1
+vc 14.2
+vs2015_runtime 14.27.29016
+w3lib 1.21.0
+watchdog 2.1.6
+wcwidth 0.2.5
+webencodings 0.5.1
+websocket-client 0.57.0
+werkzeug 2.2.2
+whatthepatch 1.0.2
+wheel 0.40.0
+widgetsnbextension 3.5.2
+win_inet_pton 1.1.0
+wincertstore 0.2
+winpty 0.4.3
+wrapt 1.14.1
+xarray 2022.11.0
+xlwings 0.29.1
+xz 5.2.10
+yaml 0.2.5
+yapf 0.31.0
+zeromq 4.3.4
+zfp 0.5.5
+zict 2.1.0
+zipp 3.11.0
+zlib 1.2.13
+zope 1.0
+zope.interface 5.4.0
+zstandard 0.19.0
+zstd 1.5.2
diff --git a/subject2-pre/environment.yaml b/subject2-pre/environment.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..17d56bf6bebbf903bf510951eadf673f4ef0a8ed
--- /dev/null
+++ b/subject2-pre/environment.yaml
@@ -0,0 +1,478 @@
+astropy 5.1
+asttokens 2.0.5
+astunparse 1.6.3
+atomicwrites 1.4.0
+attrs 22.1.0
+automat 20.2.0
+autopep8 1.6.0
+babel 2.11.0
+backcall 0.2.0
+backports 1.1
+backports.functools_lru_cache 1.6.4
+backports.tempfile 1.0
+backports.weakref 1.0.post1
+bcrypt 3.2.0
+beautifulsoup4 4.11.1
+binaryornot 0.4.4
+black 22.6.0
+blas 1.0
+bleach 4.1.0
+blosc 1.21.3
+bokeh 2.4.3
+boltons 23.0.0
+bottleneck 1.3.5
+brotli 1.0.9
+brotli-bin 1.0.9
+brotlipy 0.7.0
+bs4 0.0.1
+bzip2 1.0.8
+ca-certificates 2023.5.7
+cachetools 5.3.0
+certifi 2023.5.7
+cffi 1.15.1
+cfitsio 3.470
+chardet 4.0.0
+charls 2.2.0
+charset-normalizer 2.0.4
+click 8.0.4
+cloudpickle 2.0.0
+clyent 1.2.1
+colorama 0.4.6
+colorcet 3.0.1
+comm 0.1.2
+conda 23.3.1
+conda-build 3.24.0
+conda-content-trust 0.1.3
+conda-pack 0.6.0
+conda-package-handling 2.0.2
+conda-package-streaming 0.7.0
+conda-repo-cli 1.0.41
+conda-token 0.4.0
+conda-verify 3.4.2
+console_shortcut 0.1.1
+constantly 15.1.0
+contourpy 1.0.5
+cookiecutter 1.7.3
+cryptography 39.0.1
+cssselect 1.1.0
+cuda-cccl 12.1.109
+cuda-cudart 11.7.99
+cuda-cudart-dev 11.7.99
+cuda-cupti 11.7.101
+cuda-libraries 11.7.1
+cuda-libraries-dev 11.7.1
+cuda-nvrtc 11.7.99
+cuda-nvrtc-dev 11.7.99
+cuda-nvtx 11.7.91
+cuda-runtime 11.7.1
+curl 7.87.0
+cycler 0.11.0
+cytoolz 0.12.0
+daal4py 2023.0.2
+dal 2023.0.1
+dask 2022.7.0
+dask-core 2022.7.0
+datashader 0.14.4
+datashape 0.5.4
+debugpy 1.5.1
+decorator 5.1.1
+defusedxml 0.7.1
+diff-match-patch 20200713
+dill 0.3.6
+distributed 2022.7.0
+dm-tree 0.1.8
+docstring-to-markdown 0.11
+docutils 0.18.1
+easydict 1.10
+entrypoints 0.4
+et_xmlfile 1.1.0
+executing 0.8.3
+filelock 3.9.0
+flake8 6.0.0
+flask 2.2.2
+flatbuffers 23.5.9
+flit-core 3.6.0
+fonttools 4.25.0
+freetype 2.12.1
+fsspec 2022.11.0
+fst-pso 1.8.1
+future 0.18.3
+fuzzytm 2.0.5
+gast 0.4.0
+gensim 4.3.0
+giflib 5.2.1
+glib 2.69.1
+glob2 0.7
+google-auth 2.18.1
+google-auth-oauthlib 1.0.0
+google-pasta 0.2.0
+greenlet 2.0.1
+grpcio 1.54.2
+gst-plugins-base 1.18.5
+gstreamer 1.18.5
+h5py 3.7.0
+hdf5 1.10.6
+heapdict 1.0.1
+holoviews 1.15.4
+huggingface_hub 0.10.1
+hvplot 0.8.2
+hyperlink 21.0.0
+icc_rt 2022.1.0
+icu 58.2
+idna 3.4
+imagecodecs 2021.8.26
+imageio 2.26.0
+imagesize 1.4.1
+imbalanced-learn 0.10.1
+importlib-metadata 4.11.3
+importlib_metadata 4.11.3
+incremental 21.3.0
+inflection 0.5.1
+iniconfig 1.1.1
+intake 0.6.7
+intel-openmp 2021.4.0
+intervaltree 3.1.0
+ipykernel 6.19.2
+ipython 8.10.0
+ipython_genutils 0.2.0
+ipywidgets 7.6.5
+isort 5.9.3
+itemadapter 0.3.0
+itemloaders 1.0.4
+itsdangerous 2.0.1
+jax 0.4.10
+jedi 0.18.1
+jellyfish 0.9.0
+jinja2 3.1.2
+jinja2-time 0.2.0
+jmespath 0.10.0
+joblib 1.1.1
+jpeg 9e
+jq 1.6
+json5 0.9.6
+jsonpatch 1.32
+jsonpointer 2.1
+jsonschema 4.17.3
+jupyter 1.0.0
+jupyter_client 7.3.4
+jupyter_console 6.6.2
+jupyter_core 5.2.0
+jupyter_server 1.23.4
+jupyterlab 3.5.3
+jupyterlab_pygments 0.1.2
+jupyterlab_server 2.19.0
+jupyterlab_widgets 1.0.0
+jxrlib 1.1
+keras 2.12.0
+keyring 23.4.0
+kiwisolver 1.4.4
+lazy-object-proxy 1.6.0
+lcms2 2.12
+lerc 3.0
+libaec 1.0.4
+libarchive 3.6.2
+libbrotlicommon 1.0.9
+libbrotlidec 1.0.9
+libbrotlienc 1.0.9
+libclang 16.0.0
+libcublas 11.10.3.66
+libcublas-dev 11.10.3.66
+libcufft 10.7.2.124
+libcufft-dev 10.7.2.124
+libcurand 10.3.2.106
+libcurand-dev 10.3.2.106
+libcurl 7.87.0
+libcusolver 11.4.0.1
+libcusolver-dev 11.4.0.1
+libcusparse 11.7.4.91
+libcusparse-dev 11.7.4.91
+libdeflate 1.17
+libffi 3.4.2
+libiconv 1.16
+liblief 0.12.3
+libnpp 11.7.4.75
+libnpp-dev 11.7.4.75
+libnvjpeg 11.8.0.2
+libnvjpeg-dev 11.8.0.2
+libogg 1.3.5
+libpng 1.6.39
+libsodium 1.0.18
+libspatialindex 1.9.3
+libssh2 1.10.0
+libtiff 4.5.0
+libuv 1.44.2
+libvorbis 1.3.7
+libwebp 1.2.4
+libwebp-base 1.2.4
+libxml2 2.9.14
+libxslt 1.1.35
+libzopfli 1.0.3
+llvmlite 0.39.1
+locket 1.0.0
+lxml 4.9.1
+lz4 3.1.3
+lz4-c 1.9.4
+lzo 2.10
+m2-msys2-runtime 2.5.0.17080.65c939c
+m2-patch 2.7.5
+m2w64-libwinpthread-git 5.0.0.4634.697f757
+markdown 3.4.1
+markupsafe 2.1.1
+matplotlib 3.7.0
+matplotlib-base 3.7.0
+matplotlib-inline 0.1.6
+matplotlib-venn 0.11.9
+mccabe 0.7.0
+menuinst 1.4.19
+miniful 0.0.6
+mistune 0.8.4
+mkl 2021.4.0
+mkl-service 2.4.0
+mkl_fft 1.3.1
+mkl_random 1.2.2
+ml-dtypes 0.1.0
+mock 4.0.3
+mpmath 1.2.1
+msgpack-python 1.0.3
+msys2-conda-epoch 20160418
+multipledispatch 0.6.0
+munkres 1.1.4
+mypy_extensions 0.4.3
+navigator-updater 0.3.0
+nbclassic 0.5.2
+nbclient 0.5.13
+nbconvert 6.5.4
+nbformat 5.4.0
+nest-asyncio 1.5.6
+networkx 2.8.4
+ninja 1.10.2
+ninja-base 1.10.2
+nltk 3.7
+notebook 6.5.2
+notebook-shim 0.2.2
+numba 0.56.4
+numexpr 2.8.4
+numpy 1.23.0
+numpydoc 1.5.0
+oauthlib 3.2.2
+openjpeg 2.4.0
+openpyxl 3.0.10
+openssl 1.1.1t
+opt-einsum 3.3.0
+packaging 22.0
+pandarallel 1.6.5
+pandas 1.5.3
+pandocfilters 1.5.0
+panel 0.14.3
+param 1.12.3
+paramiko 2.8.1
+parsel 1.6.0
+parso 0.8.3
+partd 1.2.0
+pathlib 1.0.1
+pathspec 0.10.3
+patsy 0.5.3
+pcre 8.45
+pep8 1.7.1
+pexpect 4.8.0
+pickleshare 0.7.5
+pillow 9.4.0
+pip 22.3.1
+pkginfo 1.9.6
+platformdirs 2.5.2
+plotly 5.9.0
+pluggy 1.0.0
+ply 3.11
+pooch 1.4.0
+powershell_shortcut 0.0.1
+poyo 0.5.0
+prometheus_client 0.14.1
+prompt-toolkit 3.0.36
+prompt_toolkit 3.0.36
+protego 0.1.16
+protobuf 4.23.1
+psutil 5.9.0
+ptyprocess 0.7.0
+pure_eval 0.2.2
+py 1.11.0
+py-lief 0.12.3
+pyasn1 0.4.8
+pyasn1-modules 0.2.8
+pycodestyle 2.10.0
+pycosat 0.6.4
+pycparser 2.21
+pyct 0.5.0
+pycurl 7.45.1
+pydispatcher 2.0.5
+pydocstyle 6.3.0
+pyerfa 2.0.0
+pyflakes 3.0.1
+pyfume 0.2.25
+pygments 2.11.2
+pyhamcrest 2.0.2
+pyjwt 2.4.0
+pylint 2.16.2
+pylint-venv 2.3.0
+pyls-spyder 0.4.0
+pynacl 1.5.0
+pyodbc 4.0.34
+pyopenssl 23.0.0
+pyparsing 3.0.9
+pyqt 5.15.7
+pyqt5-sip 12.11.0
+pyqtwebengine 5.15.7
+pyrsistent 0.18.0
+pysocks 1.7.1
+pytables 3.7.0
+pytest 6.2.5
+python 3.10.9
+python-dateutil 2.8.2
+python-fastjsonschema 2.16.2
+python-libarchive-c 2.9
+python-lsp-black 1.2.1
+python-lsp-jsonrpc 1.0.0
+python-lsp-server 1.7.1
+python-slugify 5.0.2
+python-snappy 0.6.1
+pytoolconfig 1.2.5
+pytorch 2.0.1
+pytorch-cuda 11.7
+pytorch-mutex 1.0
+pytz 2022.7
+pyviz_comms 2.0.2
+pywavelets 1.4.1
+pywin32 305
+pywin32-ctypes 0.2.0
+pywinpty 2.0.10
+pyyaml 6.0
+pyzmq 23.2.0
+qdarkstyle 3.0.2
+qstylizer 0.2.2
+qt-main 5.15.2
+qt-webengine 5.15.9
+qtawesome 1.2.2
+qtconsole 5.4.0
+qtpy 2.2.0
+qtwebkit 5.212
+queuelib 1.5.0
+regex 2022.7.9
+requests 2.28.1
+requests-file 1.5.1
+requests-mock 1.11.0
+requests-oauthlib 1.3.1
+requests-toolbelt 0.9.1
+rope 1.7.0
+rsa 4.9
+rtree 1.0.1
+ruamel.yaml 0.17.21
+ruamel.yaml.clib 0.2.6
+ruamel_yaml 0.17.21
+scikit-image 0.19.3
+scikit-learn 1.2.1
+scikit-learn-intelex 2023.0.2
+scipy 1.10.0
+scrapy 2.8.0
+seaborn 0.12.2
+send2trash 1.8.0
+service_identity 18.1.0
+setuptools 68.0.0
+simpful 2.11.0
+simplejson 3.19.1
+sip 6.6.2
+six 1.16.0
+smart_open 5.2.1
+snappy 1.1.9
+sniffio 1.2.0
+snowballstemmer 2.2.0
+sortedcontainers 2.4.0
+soupsieve 2.3.2.post1
+sphinx 5.0.2
+sphinxcontrib-applehelp 1.0.2
+sphinxcontrib-devhelp 1.0.2
+sphinxcontrib-htmlhelp 2.0.0
+sphinxcontrib-jsmath 1.0.1
+sphinxcontrib-qthelp 1.0.3
+sphinxcontrib-serializinghtml 1.1.5
+spyder 5.4.1
+spyder-kernels 2.4.1
+sqlalchemy 1.4.39
+sqlite 3.40.1
+stack_data 0.2.0
+statsmodels 0.13.5
+supervenn 0.4.1
+sympy 1.11.1
+tabulate 0.8.10
+tbb 2021.7.0
+tbb4py 2021.7.0
+tblib 1.7.0
+tenacity 8.0.1
+tensorboard 2.12.3
+tensorboard-data-server 0.6.1
+tensorboard-plugin-wit 1.8.1
+tensorflow 2.12.0
+tensorflow-estimator 2.11.0
+tensorflow-intel 2.12.0
+tensorflow-io-gcs-filesystem 0.31.0
+tensorflow-probability 0.19.0
+termcolor 2.3.0
+terminado 0.17.1
+text-unidecode 1.3
+textdistance 4.2.1
+threadpoolctl 2.2.0
+three-merge 0.1.1
+tifffile 2021.7.2
+tinycss2 1.2.1
+tk 8.6.12
+tldextract 3.2.0
+tokenizers 0.11.4
+toml 0.10.2
+tomli 2.0.1
+tomlkit 0.11.1
+toolz 0.12.0
+torchaudio 2.0.2
+torchvision 0.15.2
+tornado 6.1
+tqdm 4.64.1
+traitlets 5.7.1
+transformers 4.24.0
+tushare 1.2.89
+twisted 22.2.0
+twisted-iocpsupport 1.0.2
+typeguard 2.13.3
+typing-extensions 4.4.0
+typing_extensions 4.4.0
+tzdata 2023.3
+ujson 5.4.0
+unidecode 1.2.0
+upsetplot 0.8.0
+urllib3 1.26.14
+utils 1.0.1
+vc 14.2
+vs2015_runtime 14.27.29016
+w3lib 1.21.0
+watchdog 2.1.6
+wcwidth 0.2.5
+webencodings 0.5.1
+websocket-client 0.57.0
+werkzeug 2.2.2
+whatthepatch 1.0.2
+wheel 0.40.0
+widgetsnbextension 3.5.2
+win_inet_pton 1.1.0
+wincertstore 0.2
+winpty 0.4.3
+wrapt 1.14.1
+xarray 2022.11.0
+xlwings 0.29.1
+xz 5.2.10
+yaml 0.2.5
+yapf 0.31.0
+zeromq 4.3.4
+zfp 0.5.5
+zict 2.1.0
+zipp 3.11.0
+zlib 1.2.13
+zope 1.0
+zope.interface 5.4.0
+zstandard 0.19.0
+zstd 1.5.2
diff --git a/subject2-pre/post_fl.py b/subject2-pre/post_fl.py
new file mode 100644
index 0000000000000000000000000000000000000000..fab6ad925b4fd88fa1bc8a8f939c7d17de80e8f8
--- /dev/null
+++ b/subject2-pre/post_fl.py
@@ -0,0 +1,230 @@
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+import argparse
+
+
+# 数据加载
+def load_data(data_path):
+ dataset = pd.read_csv(data_path)
+ return dataset
+# 加载数据
+data_path = 'processed_alibaba.csv'
+data = load_data(data_path)
+# 选择需要训练和测试的特征列和目标列
+data = data[['cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan', 'mem_plan']]
+
+
+# 标准化
+scaler = MinMaxScaler(feature_range=(0, 1))
+data = scaler.fit_transform(data.values)
+
+u = 0.1
+# 定义迁移学习的训练轮数
+num_transfer_epochs = 100
+
+# 定义学习任务的设置
+def parser_args():
+ parser = argparse.ArgumentParser(description='GRU Model Parameters')
+ parser.add_argument('--timestep', type=int, default=4, help='Time step size')
+ parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
+ parser.add_argument('--feature_size', type=int, default=6, help='Number of features')
+ parser.add_argument('--hidden_size', type=int, default=512, help='Size of the hidden layer')
+ parser.add_argument('--output_size', type=int, default=3, help='Size of the output layer')
+ parser.add_argument('--num_layers', type=int, default=2, help='Number of GRU layers')
+ parser.add_argument('--epochs', type=int, default=10, help='Number of training epochs')
+ parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning rate')
+ parser.add_argument('--model_name', type=str, default='gru', help='Name of the model')
+ parser.add_argument('--save_path', type=str, default='./{}.pth'.format('gru'), help='Path to save the best model')
+ parser.add_argument('--no_cuda', action='store_true', default=False, help='Disable CUDA')
+ args = parser.parse_args()
+ return args
+args = parser_args()
+
+
+#新模型
+class Config():
+
+ timestep = 4 # 时间步长,就是利用多少时间窗口
+ batch_size = 32 # 批次大小
+ feature_size = 6 # 每个步长对应的特征数量,这里使用8维特征
+ hidden_size = 512 # 隐层大小
+ output_size = 3 # 由于是单输出任务
+ num_layers = 4 # gru的层数
+ epochs = 32 # 迭代轮数
+ best_loss = float('inf') # 记录损失,初始化为正无穷大
+ learning_rate = 0.001 # 学习率
+ model_name = 'gru' # 模型名称
+ save_path = './{}.pth'.format(model_name) # 最优模型保存路径
+
+config = Config()
+
+# 划分数据
+def split_data(data, timestep,target_size):
+ dataX = []
+ dataY = []
+ for index in range(len(data) - timestep):
+ dataX.append(data[index: index + timestep])
+ dataY.append(data[index + timestep, :target_size])
+ dataX = np.array(dataX)
+ dataY = np.array(dataY)
+ train_size = int(np.round(0.7*dataX.shape[0]))
+ x_train = dataX[:train_size, :]
+ y_train = dataY[:train_size, :]
+ x_test = dataX[train_size:, :]
+ y_test = dataY[train_size:, :]
+
+ return x_train, y_train, x_test, y_test
+
+# 定义网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size
+ self.num_layers = num_layers
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+ self.activation = nn.ReLU() # 添加激活函数
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+ output, h_0 = self.gru(x, h_0)
+ output = self.fc(output[:, -1, :])
+ output = self.activation(output) # 添加激活函数
+ return output, h_0
+
+
+
+
+# 获取训练数据
+x_train, y_train, x_test, y_test = split_data(data, config.timestep, config.feature_size)
+target_size = 3 # 三个目标列的大小
+x_train, y_train, x_test, y_test = split_data(data, args.timestep,target_size)
+
+#形成训练数据集
+x_train_tensor = torch.Tensor(x_train)
+y_train_tensor = torch.Tensor(y_train)
+x_test_tensor = torch.Tensor(x_test)
+y_test_tensor = torch.Tensor(y_test)
+
+# 假设您的模型类是GRU,config中包含相关的模型配置信息
+pre_trained_model = GRU(args.feature_size, args.hidden_size, args.num_layers, args.output_size)
+# 加载预训练模型的参数
+# 加载预训练模型的参数
+model_path = "gru_a.pth"
+pre_trained_model = torch.load(model_path, map_location='cpu')
+
+transfer_model = GRU(config.feature_size, config.hidden_size, config.num_layers,config.output_size) # 定义GRU网络
+
+loss_function = nn.MSELoss() # 定义损失函数
+optimizer_transfer = torch.optim.AdamW(transfer_model.parameters(), lr= config.learning_rate) # 定义优化器
+
+
+# 将公共模型的参数设为不可训练
+for param in pre_trained_model.parameters():
+ param.requires_grad = False
+print(y_train_tensor)
+
+# train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
+# # 创建 DataLoader
+# train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
+
+for epoch in range(num_transfer_epochs):
+
+ transfer_model.train()
+ running_loss = 0
+
+ optimizer_transfer.zero_grad()
+ # 手动清除隐藏状态
+ hidden = None
+
+ # 从数据中获取输入和目标张量
+ inputs = x_train_tensor
+ targets = y_train_tensor
+
+ y_train_pred,hidden = transfer_model(inputs)
+ y_pretrained_pred,hidden = pre_trained_model(inputs)
+
+ loss = (1 - u) * loss_function(y_train_pred, targets) + u * loss_function(y_train_pred, y_pretrained_pred)
+ # 反向传播和参数更新
+ loss.backward()
+ optimizer_transfer.step()
+
+ running_loss += loss.item()
+
+ # 将运行中的损失打印出来,以便监控训练过程
+ print(f"Epoch {epoch+1}/{num_transfer_epochs}, Loss: {running_loss}")
+
+
+# 保存迁移模型
+transfer_model_save_path = './ta_model.pth' # 定义迁移模型保存路径
+torch.save(transfer_model.state_dict(), transfer_model_save_path)
+
+print('Finished Transfer Learning')
+
+# 测试循环
+transfer_model.eval() # 将模型设置为评估模式
+
+# with torch.no_grad(): # 在测试期间禁用梯度计算
+# test_inputs = x_test_tensor
+# test_targets = y_test_tensor
+#
+# test_predictions, _ = transfer_model(test_inputs)
+# test_loss = loss_function(test_predictions, test_targets)
+#
+
+
+with torch.no_grad(): # 在测试期间禁用梯度计算
+ test_inputs = x_test_tensor
+ test_targets = y_test_tensor
+
+ test_predictions, _ = transfer_model(test_inputs)
+ test_loss = loss_function(test_predictions, test_targets)
+
+
+ # 创建一个文件保存输入数据
+ with open('input_data.txt', 'w') as input_file:
+ # 打印每次的数据输入和预测结果
+ for i in range(len(test_inputs)):
+ input_data = test_inputs[i].numpy()
+ target_data = test_targets[i].numpy()
+ predicted_data = test_predictions[i].numpy()
+
+ print(f"样本 {i + 1}:")
+ print("输入数据:")
+ for row in input_data:
+ print(" ".join(map(str, row)))
+
+ print("目标数据:", target_data)
+ print("预测数据:", predicted_data)
+ print("----------")
+
+ # 将输入数据写入文件
+ input_file.write(f"样本 {i + 1}:\n")
+ input_file.write("输入数据:\n")
+ for row in input_data:
+ input_file.write(" ".join(map(str, row)) + "\n")
+
+ input_file.write("目标数据:" + repr(target_data.tolist()) + "\n")
+ input_file.write("预测数据:" + repr(predicted_data.tolist()) + "\n")
+ input_file.write("----------\n")
+
+# print(f"测试损失: {test_loss.item()}")
+
+# # 将测试结果保存到CSV文件
+# result_df = pd.DataFrame({
+# 'Actual_cpu': test_targets[:, 0].numpy(),
+# 'Actual_gpu': test_targets[:, 1].numpy(),
+# 'Actual_mem': test_targets[:, 2].numpy(),
+# 'Predicted_cpu': test_predictions[:, 0].numpy(),
+# 'Predicted_gpu': test_predictions[:, 1].numpy(),
+# 'Predicted_mem': test_predictions[:, 2].numpy(),
+# })
+#
+# result_df.to_csv('pa_results.csv', index=False)
\ No newline at end of file
diff --git a/subject2-pre/server.py b/subject2-pre/server.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcb74aa79ffde7d9ec63a46c3819f47046702d48
--- /dev/null
+++ b/subject2-pre/server.py
@@ -0,0 +1,164 @@
+import socket
+import pickle
+import torch.nn as nn
+import argparse
+from concurrent.futures import ThreadPoolExecutor
+
+
+
+# 定义神经网络
+class GRU(nn.Module):
+ def __init__(self, feature_size, hidden_size, num_layers, output_size):
+ super(GRU, self).__init__()
+ self.hidden_size = hidden_size
+ self.num_layers = num_layers
+ self.gru = nn.GRU(feature_size, hidden_size, num_layers, batch_first=True)
+ self.fc = nn.Linear(hidden_size, output_size)
+ self.activation = nn.ReLU()
+
+ def forward(self, x, hidden=None):
+ batch_size = x.shape[0]
+ if hidden is None:
+ h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float()
+ else:
+ h_0 = hidden
+ output, h_0 = self.gru(x, h_0)
+ output = self.fc(output[:, -1, :])
+ output = self.activation(output)
+ return output, h_0
+
+# 定义学习任务的设置
+def parser_args():
+ parser = argparse.ArgumentParser(description='GRU Model Parameters')
+ parser.add_argument('--timestep', type=int, default=4, help='Time step size')
+ parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
+ parser.add_argument('--feature_size', type=int, default=6, help='Number of features')
+ parser.add_argument('--hidden_size', type=int, default=512, help='Size of the hidden layer')
+ parser.add_argument('--output_size', type=int, default=3, help='Size of the output layer')
+ parser.add_argument('--num_layers', type=int, default=2, help='Number of GRU layers')
+ parser.add_argument('--epochs', type=int, default=3, help='Number of training epochs')
+ parser.add_argument('--learning_rate', type=float, default=0.0001, help='Learning rate')
+ parser.add_argument('--model_name', type=str, default='gru', help='Name of the model')
+ parser.add_argument('--save_path', type=str, default='./{}.pth'.format('gru'), help='Path to save the best model')
+ parser.add_argument('--no_cuda', action='store_true', default=False, help='Disable CUDA')
+ args = parser.parse_args()
+ return args
+
+args = parser_args()
+model = GRU(args.feature_size, args.hidden_size, args.num_layers, args.output_size)
+
+# 创建服务器套接字
+server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+# 绑定服务器地址和端口
+# server_address = ('127.0.0.1', 4321)
+server_address = ('202.117.43.11', 8080)
+server.bind(server_address)
+# 开始监听,允许最多客户端连接
+server.listen(5)
+clients = []
+c_weights = {} # 用于保存每个客户端的权重数据
+c_length= {}
+
+
+while True:
+ print("等待客户端连接...")
+ client, addr = server.accept()
+ print(f"接受来自 {addr} 的连接")
+ clients.append(client)
+ if len(clients) == 2:
+ break
+
+ROUNDS = 20
+length = len(pickle.dumps(model.state_dict()))
+chunk_size = 1024
+
+
+for round in range(ROUNDS):
+
+ print("ROUND %d" % (round), end='\n')
+ # 获取当前模型的权重
+ model_weights = model.state_dict()
+ model_weights_serialized = pickle.dumps(model_weights)
+ print(f"Serialized model weights size: {len(model_weights_serialized)} bytes")
+
+ for client in clients:
+ print('接受客户端数据长度:')
+ print(client)
+ size_message = len(model_weights_serialized).to_bytes(4096, byteorder='big') # 将整数转换为字节数组
+ client.send(size_message)
+
+ # 向所有客户端发送当前模型的权重
+ print('向客户端发送当前模型的权重')
+ for client in clients:
+ total_sent = 0
+ for i in range(0, len(model_weights_serialized), chunk_size):
+ chunk = model_weights_serialized[i:i + chunk_size]
+ sent = client.send(chunk)
+ total_sent += sent
+
+
+
+ def receive_data_from_client(client, c_length):
+
+ print(f'接受客户端数据长度:{client}')
+ size_message = client.recv(4096)
+ length = int.from_bytes(size_message, byteorder='big')
+ c_length[client] = length
+
+ print(f'接受客户端数据:{client}')
+ received_weights = b""
+ while len(received_weights) < c_length[client]:
+ chunk = client.recv(4096)
+ received_weights += chunk
+
+ if len(received_weights) == length:
+ try:
+ received_weights = pickle.loads(received_weights)
+ print('解析成功')
+ c_weights[client] = received_weights
+ except EOFError:
+ print('解析失败: EOFError')
+ c_weights[client] = c_weights[client]
+ else:
+ print('接收到的数据长度不符合预期,可能存在问题')
+
+
+
+ # 创建ThreadPoolExecutor
+ with ThreadPoolExecutor(max_workers=len(clients)) as executor:
+ # 提交任务给线程池
+ futures = [executor.submit(receive_data_from_client, client, c_length) for client in clients]
+
+ # 等待所有任务完成
+ for future in futures:
+ future.result()
+
+
+ # 计算平均权重
+ aggregated_weights = model.state_dict()
+ # print(aggregated_weights)
+
+
+ # 将所有值清零
+ for key in aggregated_weights.keys():
+ aggregated_weights[key].zero_()
+
+ # 累积权重
+ for weights in c_weights.values():
+ for key, value in weights.items():
+ if key not in aggregated_weights:
+ aggregated_weights[key] = value.clone()
+ else:
+ aggregated_weights[key] += value
+
+ # 计算平均值
+ for key in aggregated_weights.keys():
+ aggregated_weights[key] /= len(c_weights)
+
+ # 将聚合后的权重应用到模型中
+ model.load_state_dict(aggregated_weights)
+ print("权重聚合完毕")
+
+# 关闭服务器套接字
+server.close()
+
diff --git a/subject2-pre/tvhl.py b/subject2-pre/tvhl.py
new file mode 100644
index 0000000000000000000000000000000000000000..421a73cc1b97c9b065546ddf8f00cb0206b93294
--- /dev/null
+++ b/subject2-pre/tvhl.py
@@ -0,0 +1,200 @@
+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import Sequential
+from sklearn.preprocessing import MinMaxScaler
+from tensorflow.keras.layers import Dense, LeakyReLU
+import tensorflow as tf
+from scipy.spatial.distance import cdist
+import requests
+import csv
+from io import StringIO
+import datetime
+
+def get_data_from_api(api_url):
+ """
+ 从API获取数据的函数接口
+ 参数:
+ - api_url: API的地址
+ 返回值:
+ - 如果成功,返回API响应的JSON数据
+ - 如果失败,返回None
+ """
+ try:
+ # 发起GET请求
+ response = requests.get(api_url)
+
+ # 检查响应状态码
+ if response.status_code == 200:
+ # 解析JSON数据并返回
+ return response.json()
+ else:
+ # 打印错误信息,并返回None
+ print(f"Error: {response.status_code} - {response.text}")
+ return None
+ except requests.RequestException as e:
+ # 打印异常信息,并返回None
+ print(f"Request Error: {e}")
+ return None
+
+def json_data_to_csv(json_data):
+ try:
+ # 提取表头(CSV文件的列名)
+ header = list(json_data['pageData'][0].keys())
+ # 创建一个内存中的文件对象,用于写入CSV数据
+ csv_output = StringIO()
+ # 创建CSV写入器
+ csv_writer = csv.DictWriter(csv_output, fieldnames=header)
+ # 写入表头
+ csv_writer.writeheader()
+ # 写入数据
+ csv_writer.writerows(json_data['pageData'])
+ # 获取CSV数据作为字符串
+ csv_data = csv_output.getvalue()
+ print("转换成功:JSON 数据到 CSV 字符串")
+ return csv_data
+ except Exception as e:
+ print(f"转换失败:{e}")
+ return None
+
+#创建生成器
+def build_generator():
+ model = Sequential()
+ model.add(Dense(64, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(32))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(7, activation='sigmoid'))
+ return model
+generator = build_generator()
+
+#创建鉴别器
+def build_discriminator():
+ model = Sequential()
+ model.add(Dense(32, input_dim=7))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(16))
+ model.add(LeakyReLU(alpha=0.2))
+ model.add(Dense(1, activation='sigmoid'))
+ return model
+discriminator = build_discriminator()
+
+
+#建立损失函数
+def compute_joint_distribution_loss(real_samples, fake_samples):
+ real_relative_time = real_samples[:, 0]
+ fake_relative_time = fake_samples[:, 0]
+ real_samples = real_samples[:, 1:]
+ fake_samples = fake_samples[:, 1:]
+ distance =10 * cdist(real_samples, fake_samples, metric='euclidean')
+ conditional_distance = np.abs(real_relative_time - fake_relative_time)
+ joint_distribution_loss = np.mean(distance * conditional_distance)
+ return joint_distribution_loss
+
+
+#训练模型
+def train_gan(noise_data, real_data, epochs, batch_size):
+ for epoch in range(epochs):
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ real_samples = real_data[np.random.randint(0, real_data.shape[0], size=batch_size)]
+ generated_samples = generator.predict(noise)
+ # print("Real Samples Shape:", real_samples.shape)
+ # print("Generated Samples Shape:", generated_samples.shape)
+ X = np.concatenate((real_samples, generated_samples))
+ y = np.ones(2 * batch_size)
+ y[batch_size:] = 0
+ discriminator.trainable = True
+ discriminator_loss = discriminator.train_on_batch(X, y)
+ noise = noise_data[np.random.randint(0, noise_data.shape[0], size=batch_size)]
+ y_gen = np.ones(batch_size)
+ discriminator.trainable = False
+ generator_loss = gan.train_on_batch(noise, y_gen)
+ # 计算联合分布差异项
+ joint_distribution_loss = compute_joint_distribution_loss(real_samples, generated_samples)
+ # 更新判别器的损失函数
+ discriminator_loss = discriminator_loss +1*joint_distribution_loss
+ # 打印损失和精度
+ if epoch % 100 == 0:
+ print(
+ f'Epoch: {epoch}/{epochs}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}')
+
+
+def gene_data(generated_data):
+ # 将时间恢复到24小时制
+ generated_data['time'] = (generated_data['time'] * 24).astype(int)
+
+ data_by_hour = {}
+ # 将generated_data按时间分组,放入字典中对应的键中
+ for i in range(24):
+ data_by_hour[i] = generated_data[generated_data['time'] == i]
+ empty_df = pd.DataFrame()
+
+ # 遍历1到23小时,检查每个小时的数据是否为空
+ for i in range(0, 24):
+ if data_by_hour[i].empty:
+ next_i = (i + 1) % 24 # 计算下一个小时的索引,使用取模运算确保最后一个小时的数据获取方式是循环到小时1的数据
+ data_by_hour[i] = data_by_hour[next_i].head(1)
+ data_length = len(data_by_hour[i])
+ if data_length < 31:
+ data_by_hour[i] = pd.concat([data_by_hour[i], data_by_hour[5].head(30)])
+ for i in range(0, 24):
+ empty_df = pd.concat([empty_df, data_by_hour[i].head(30)])
+
+ # 复制empty_df的第一行数据,并赋值给time_series_data
+ time_series_data = empty_df.iloc[:1].copy()
+ for i in range(1, 60):
+ for j in range(0, 24):
+ time_series_data = pd.concat([time_series_data, empty_df.iloc[j * 24 + i:j * 24 + i + 1].copy()])
+ time_series_data = time_series_data.copy()
+ time_series_data['time'] = range(len(time_series_data))
+ return time_series_data
+
+def set_noise():
+ uniform_noise = np.random.uniform(0, 1, size=(10000, 1))
+ normal_noise = np.random.normal(0, 1, size=(10000, 6))
+ noise = np.hstack((uniform_noise, normal_noise))
+ return noise
+
+
+#从API获取历史数据
+# api_url = "http://202.117.43.38:28081/jcce/v1.0/job/list"
+# data = get_data_from_api(api_url)
+data = pd.read_csv('pre_microsoft.csv')
+
+# 使用提取的数据进行CSV转换
+# data = json_data_to_csv(data)
+# data = pd.read_csv(StringIO(data), parse_dates=["startTime", "endTime", "arriveTime"])
+current_time = datetime.datetime.now()
+# print(current_time)
+# data["startTime"] = np.round((data["startTime"] - current_time) / np.timedelta64(1, "h") + 100)
+#提取特征
+features = ['time', 'cpu_use', 'gpu_use', 'mem_use', 'cpu_plan', 'gpu_plan','mem_plan']
+data = data[features]
+# 将相同时间的数据合并
+# data = data.groupby("startTime").sum().reset_index()
+data['time'] = data['time'] % 24
+#根据接口进行修改
+
+
+
+#归一化处理
+data = data[features].values
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)
+
+#设置数据生成参数
+discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+discriminator.trainable = False
+gan_input = tf.keras.Input(shape=(7,))
+gan_output = discriminator(generator(gan_input))
+gan = tf.keras.Model(gan_input, gan_output)
+gan.compile(loss='binary_crossentropy', optimizer='adam')
+
+noise = set_noise()
+#训练模型
+train_gan(data, noise, epochs=1000, batch_size=128)
+# 数据生成和处理
+generated_data = pd.DataFrame(generator.predict(noise), columns=features)
+generated_data = gene_data(generated_data)
+
+# 保存数据
+generated_data.to_csv('tvhl.csv', index=False)
\ No newline at end of file