import argparse import os.path import platform import numpy as np import torch from torch.utils.data import DataLoader from tqdm import tqdm from DataPreprocess import DataPreprocessor from DataSet import DataSet from models.model import Informer if name == 'main': parser = argparse.ArgumentParser() parser.add_argument('-b', '--batch_size', type=int, default=30) parser.add_argument('-c', '--cuda_device', type=str, default='0') parser.add_argument('-d', '--dataset', type=str, default='wht') parser.add_argument('-E', '--encoding_dimension', type=int, default=6) parser.add_argument('-e', '--epoch', type=int, default=10) parser.add_argument('-G', '--use_cuda', action='store_true') parser.add_argument('-i', '--input_length', type=int, default=60) parser.add_argument('-l', '--learning_rate', type=float, default=0.0001) parser.add_argument('-n', '--normalize', type=str, default='std') parser.add_argument('-p', '--pred_length', type=int, default=24) parser.add_argument('-t', '--encoding_type', type=str, default='time_encoding') parser.add_argument('-u', '--induce_length', type=int, default=5) args = parser.parse_args() total_epoch = args.epoch input_length = args.input_length induce_length = args.induce_length pred_length = args.pred_length encoding_dimension = args.encoding_dimension encoding_type = args.encoding_type batch_size = args.batch_size use_cuda = args.use_cuda learning_rate = args.learning_rate normalize = args.normalize device = torch.device('cuda:' + str(args.cuda_device) if use_cuda else 'cpu') data_root = None if platform.system() == 'Windows': data_root = 'E:\\forecastdataset\\pkl' else: data_root = '/home/icpc/pycharmproj/forecast.dataset/pkl/' data_dir = None if args.dataset == 'wht': data_dir = os.path.join(data_root, 'wht.pkl') elif args.dataset == 'synthetic': data_dir = os.path.join(data_root, 'synthetic.pkl') else: print('invalid data') exit() data_preprocessor = DataPreprocessor(data_dir, input_length, pred_length, encoding_type=encoding_type, encoding_dimension=encoding_dimension) train_set = DataSet(data_preprocessor.load_train_set(), data_preprocessor.load_train_encoding_set(), input_length, induce_length, pred_length) validate_set = DataSet(data_preprocessor.load_validate_set(), data_preprocessor.load_validate_encoding_set(), input_length, induce_length, pred_length) test_set = DataSet(data_preprocessor.load_test_set(), data_preprocessor.load_test_encoding_set(), input_length, induce_length, pred_length) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) validate_loader = DataLoader(validate_set, batch_size=batch_size, shuffle=False) test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False) # for i, (enc_input, enc_encoding, dec_input, dec_encoding,ground_truth) in enumerate(train_loader): # print(enc_input.shape,enc_encoding.shape,dec_input.shape,dec_encoding.shape,ground_truth.shape) # exit() enc_in = data_preprocessor.load_enc_dimension() dec_in = data_preprocessor.load_dec_dimension() c_out = data_preprocessor.load_output_dimension() out_len = pred_length model = Informer(enc_in, dec_in, c_out, out_len) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) criterion = torch.nn.MSELoss() model = model.to(device) pbar_epoch = tqdm(total=total_epoch, ascii=True, dynamic_ncols=True) for epoch in range(total_epoch): model.train() total_iters = len(train_loader) pbar_iter = tqdm(total=total_iters, ascii=True, leave=False, dynamic_ncols=True) pbar_iter.set_description('training') for i, (enc_input, enc_encoding, dec_input, dec_encoding, ground_truth) in enumerate(train_loader): enc_input = enc_input.to(device) enc_encoding = enc_encoding.to(device) dec_input = dec_input.to(device) dec_encoding = dec_encoding.to(device) ground_truth = ground_truth.to(device) optimizer.zero_grad() pred = model(enc_input, enc_encoding, dec_input, dec_encoding) loss = criterion(pred, ground_truth) loss.backward() optimizer.step() pbar_iter.update(1) pbar_iter.set_postfix_str('loss: %.4f' % loss.item()) pbar_iter.close() model.eval() total_iters = len(validate_loader) pbar_iter = tqdm(total=total_iters, ascii=True, leave=False, dynamic_ncols=True) pbar_iter.set_description('validating') prediction_list = [] gt_list = [] with torch.no_grad(): for i, (enc_input, enc_encoding, dec_input, dec_encoding, ground_truth) in enumerate(validate_loader): # print(enc_input.shape,enc_encoding.shape,dec_input.shape,dec_encoding.shape) # exit() enc_input = enc_input.to(device) enc_encoding = enc_encoding.to(device) dec_input = dec_input.to(device) dec_encoding = dec_encoding.to(device) ground_truth = ground_truth.to(device) pred = model(enc_input, enc_encoding, dec_input, dec_encoding) prediction_list.append(pred) gt_list.append(ground_truth) pbar_iter.update(1) predictions = torch.cat(prediction_list, dim=0) ground_truths = torch.cat(gt_list, dim=0) validate_loss = criterion(predictions, ground_truths) pbar_epoch.set_postfix_str('loss: %.4f' % validate_loss.item()) pbar_epoch.update() pbar_iter.close() pbar_epoch.close() model.eval() total_iters = len(test_loader) pbar_iter = tqdm(total=total_iters, ascii=True, dynamic_ncols=True) pbar_iter.set_description('testing') prediction_list = [] gt_list = [] with torch.no_grad(): for i, (enc_input, enc_encoding, dec_input, dec_encoding, ground_truth) in enumerate(test_loader): enc_input = enc_input.to(device) enc_encoding = enc_encoding.to(device) dec_input = dec_input.to(device) dec_encoding = dec_encoding.to(device) ground_truth = ground_truth.to(device) pred = model(enc_input, enc_encoding, dec_input, dec_encoding) prediction_list.append(pred) gt_list.append(ground_truth) pbar_iter.update(1) pbar_iter.close() predictions = torch.cat(prediction_list, dim=0) ground_truths = torch.cat(gt_list, dim=0) test_loss = criterion(predictions, ground_truths) print('\033[35mloss: %.4f\033[0m' % test_loss.item())

2023-05-30by@1391022981-yEk6k&@python.Model: gpt-3.5

1391022981-yEk6k

You are a Python Coder Expert，I have something to ask you.

1391022981-yEk6k

import argparse
import os.path
import platform

import numpy as np
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm

from DataPreprocess import DataPreprocessor
from DataSet import DataSet
from models.model import Informer

if name == 'main':

parser = argparse.ArgumentParser()
parser.add_argument('-b', '--batch_size', type=int, default=30)
parser.add_argument('-c', '--cuda_device', type=str, default='0')
parser.add_argument('-d', '--dataset', type=str, default='wht')
parser.add_argument('-E', '--encoding_dimension', type=int, default=6)
parser.add_argument('-e', '--epoch', type=int, default=10)
parser.add_argument('-G', '--use_cuda', action='store_true')
parser.add_argument('-i', '--input_length', type=int, default=60)
parser.add_argument('-l', '--learning_rate', type=float, default=0.0001)
parser.add_argument('-n', '--normalize', type=str, default='std')
parser.add_argument('-p', '--pred_length', type=int, default=24)
parser.add_argument('-t', '--encoding_type', type=str, default='time_encoding')
parser.add_argument('-u', '--induce_length', type=int, default=5)
args = parser.parse_args()

total_epoch = args.epoch
input_length = args.input_length
induce_length = args.induce_length
pred_length = args.pred_length
encoding_dimension = args.encoding_dimension
encoding_type = args.encoding_type
batch_size = args.batch_size
use_cuda = args.use_cuda
learning_rate = args.learning_rate
normalize = args.normalize

device = torch.device('cuda:' + str(args.cuda_device) if use_cuda else 'cpu')

data_root = None
if platform.system() == 'Windows':
    data_root = 'E:\\forecastdataset\\pkl'
else:
    data_root = '/home/icpc/pycharmproj/forecast.dataset/pkl/'
data_dir = None
if args.dataset == 'wht':
    data_dir = os.path.join(data_root, 'wht.pkl')
elif args.dataset == 'synthetic':
    data_dir = os.path.join(data_root, 'synthetic.pkl')
else:
    print('invalid data')
    exit()

data_preprocessor = DataPreprocessor(data_dir, input_length, pred_length, encoding_type=encoding_type,
                                     encoding_dimension=encoding_dimension)
train_set = DataSet(data_preprocessor.load_train_set(), data_preprocessor.load_train_encoding_set(),
                    input_length, induce_length, pred_length)
validate_set = DataSet(data_preprocessor.load_validate_set(), data_preprocessor.load_validate_encoding_set(),
                       input_length, induce_length, pred_length)
test_set = DataSet(data_preprocessor.load_test_set(), data_preprocessor.load_test_encoding_set(),
                   input_length, induce_length, pred_length)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
validate_loader = DataLoader(validate_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

# for i, (enc_input, enc_encoding, dec_input, dec_encoding,ground_truth) in enumerate(train_loader):
#     print(enc_input.shape,enc_encoding.shape,dec_input.shape,dec_encoding.shape,ground_truth.shape)
#     exit()
enc_in = data_preprocessor.load_enc_dimension()
dec_in = data_preprocessor.load_dec_dimension()
c_out = data_preprocessor.load_output_dimension()
out_len = pred_length
model = Informer(enc_in, dec_in, c_out, out_len)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss()
model = model.to(device)

pbar_epoch = tqdm(total=total_epoch, ascii=True, dynamic_ncols=True)
for epoch in range(total_epoch):
    model.train()
    total_iters = len(train_loader)
    pbar_iter = tqdm(total=total_iters, ascii=True, leave=False, dynamic_ncols=True)
    pbar_iter.set_description('training')
    for i, (enc_input, enc_encoding, dec_input, dec_encoding, ground_truth) in enumerate(train_loader):
        enc_input = enc_input.to(device)
        enc_encoding = enc_encoding.to(device)
        dec_input = dec_input.to(device)
        dec_encoding = dec_encoding.to(device)
        ground_truth = ground_truth.to(device)
        optimizer.zero_grad()
        pred = model(enc_input, enc_encoding, dec_input, dec_encoding)
        loss = criterion(pred, ground_truth)
        loss.backward()
        optimizer.step()
        pbar_iter.update(1)
        pbar_iter.set_postfix_str('loss: %.4f' % loss.item())
    pbar_iter.close()

    model.eval()
    total_iters = len(validate_loader)
    pbar_iter = tqdm(total=total_iters, ascii=True, leave=False, dynamic_ncols=True)
    pbar_iter.set_description('validating')
    prediction_list = []
    gt_list = []
    with torch.no_grad():
        for i, (enc_input, enc_encoding, dec_input, dec_encoding, ground_truth) in enumerate(validate_loader):
            # print(enc_input.shape,enc_encoding.shape,dec_input.shape,dec_encoding.shape)
            # exit()
            enc_input = enc_input.to(device)
            enc_encoding = enc_encoding.to(device)
            dec_input = dec_input.to(device)
            dec_encoding = dec_encoding.to(device)
            ground_truth = ground_truth.to(device)
            pred = model(enc_input, enc_encoding, dec_input, dec_encoding)
            prediction_list.append(pred)
            gt_list.append(ground_truth)
            pbar_iter.update(1)
        predictions = torch.cat(prediction_list, dim=0)
        ground_truths = torch.cat(gt_list, dim=0)
        validate_loss = criterion(predictions, ground_truths)
        pbar_epoch.set_postfix_str('loss: %.4f' % validate_loss.item())
        pbar_epoch.update()
    pbar_iter.close()
pbar_epoch.close()

model.eval()
total_iters = len(test_loader)
pbar_iter = tqdm(total=total_iters, ascii=True, dynamic_ncols=True)
pbar_iter.set_description('testing')
prediction_list = []
gt_list = []
with torch.no_grad():
    for i, (enc_input, enc_encoding, dec_input, dec_encoding, ground_truth) in enumerate(test_loader):
        enc_input = enc_input.to(device)
        enc_encoding = enc_encoding.to(device)
        dec_input = dec_input.to(device)
        dec_encoding = dec_encoding.to(device)
        ground_truth = ground_truth.to(device)
        pred = model(enc_input, enc_encoding, dec_input, dec_encoding)
        prediction_list.append(pred)
        gt_list.append(ground_truth)
        pbar_iter.update(1)
    pbar_iter.close()
    predictions = torch.cat(prediction_list, dim=0)
    ground_truths = torch.cat(gt_list, dim=0)
    test_loss = criterion(predictions, ground_truths)
    print('\033[35mloss: %.4f\033[0m' % test_loss.item())

This code is implementing a deep learning model (Informer) for time series forecasting. It loads the data from a pickle file, preprocesses the data, and then trains the model on the training set. It uses the validation set to evaluate the model during training and finally, tests the model on the test set.

The model takes input and output sequences of a fixed length, and uses an encoder-decoder architecture with attention mechanisms to make the predictions. The model is optimized using the Adam optimizer and the mean squared error loss function.

The code uses argparse to parse command-line arguments for various hyperparameters such as batch size, learning rate, encoding type, and dataset. It also uses tqdm to display progress bars during training and testing.

1391022981-yEk6k

怎么修改程序使得输出的预测值曲线呈现为概率区间

要使输出的预测值曲线呈现为概率区间，可以使用置信区间的概念。一种简单的方法是在训练期间计算每个预测的置信区间，并将其存储在列表中。然后，在测试期间，可以使用这些置信区间来计算概率区间。下面是一种可能的修改方法：

在训练期间，在每个迭代中计算每个预测的置信区间，并将其存储在一个列表中。可以使用均值和标准差来计算每个预测的置信区间，例如：

with torch.no_grad():
    for i, (enc_input, enc_encoding, dec_input, dec_encoding, ground_truth) in enumerate(validate_loader):
        enc_input = enc_input.to(device)
        enc_encoding = enc_encoding.to(device)
        dec_input = dec_input.to(device)
        dec_encoding = dec_encoding.to(device)
        ground_truth = ground_truth.to(device)
        pred = model(enc_input, enc_encoding, dec_input, dec_encoding)
        prediction_list.append(pred)
        gt_list.append(ground_truth)
        conf_int = (pred.mean(dim=0) - 1.96 * pred.std(dim=0), pred.mean(dim=0) + 1.96 * pred.std(dim=0))
        conf_int_list.append(conf_int)
        pbar_iter.update(1)

这里使用了95％置信区间，但可以根据需要进行更改。conf_int_list现在包含每个预测的置信区间。

在测试期间，可以使用conf_int_list来计算每个时间点的概率区间。例如：

with torch.no_grad():
    for i, (enc_input, enc_encoding, dec_input, dec_encoding, ground_truth) in enumerate(test_loader):
        enc_input = enc_input.to(device)
        enc_encoding = enc_encoding.to(device)
        dec_input = dec_input.to(device)
        dec_encoding = dec_encoding.to(device)
        ground_truth = ground_truth.to(device)
        pred = model(enc_input, enc_encoding, dec_input, dec_encoding)
        prediction_list.append(pred)
        gt_list.append(ground_truth)
        conf_int = conf_int_list[i]
        lower_bound = conf_int[0]
        upper_bound = conf_int[1]
        prob_interval = [torch.sum((pred[:, j] >= lower_bound[j]) & (pred[:, j] <= upper_bound[j])).item() / pred.shape[0] for j in range(pred.shape[1])]
        prob_interval_list.append(prob_interval)
        pbar_iter.update(1)

这里使用了预测的均值和标准差来计算每个时间点的置信区间。然后，使用置信区间计算每个时间点的概率区间。

最后，可以将概率区间可视化为图形，例如使用matplotlib：

import matplotlib.pyplot as plt

plt.plot(prob_interval_list)
plt.show()

这将显示预测的概率区间曲线。

1391022981-yEk6k

2022-2023年新发表的关于informer的文献

OpenPrompt