From a0ff61132eb022a4f0930980c8e19f8492a7a1c3 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Tue, 26 Nov 2024 18:48:40 +0000 Subject: [PATCH 1/7] experimenting with DTensor construction from file - Simple text-based serialisation of tensor data - example in main.cu where we read data from file --- main.cu | 68 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/main.cu b/main.cu index d9f3421..00d3491 100644 --- a/main.cu +++ b/main.cu @@ -1,27 +1,57 @@ -#include -#include -#include -#include -#include -#include #include "include/tensor.cuh" -#include +#include +#include +#include +#include + +template +struct data_t { + size_t numRows; + size_t numCols; + size_t numMats; + std::vector data; +}; -#define real_t double +template +data_t vectorFromFile(std::string path_to_file) { + data_t dataStruct; + std::ifstream file; + file.open(path_to_file, std::ios::in); + + std::string line; + getline(file, line); dataStruct.numRows = atoi(line.c_str()); + getline(file, line); dataStruct.numCols = atoi(line.c_str()); + getline(file, line); dataStruct.numMats = atoi(line.c_str()); + + size_t numElements = dataStruct.numRows * dataStruct.numCols * dataStruct.numMats; + std::vector vecDataFromFile(numElements); + + size_t i = 0; + while (getline(file, line)) { + if constexpr (std::is_same_v) { + vecDataFromFile[i] = atoi(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stod(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stof(line.c_str()); + } + if (i == numElements - 1) break; + i++; + } + file.close(); + + dataStruct.data = vecDataFromFile; + return dataStruct; +} int main() { + auto z = vectorFromFile("../test/data/my.dtensor"); + for (size_t i = 0; i < 3; i++) std::cout << z.data[i] << ", "; + DTensor dz(z.data, z.numRows, z.numCols, z.numMats); + std::cout << "\n\n"; - std::vector aData = {10.0, 2.0, 3.0, - 2.0, 20.0, -1.0, - 3.0, -1.0, 30.0}; - DTensor A(3, 3, 2); - DTensor A0(A, 2, 0, 0); - DTensor A1(A, 2, 1, 1); - A0.upload(aData); - A1.upload(aData); - CholeskyBatchFactoriser chol(A); - chol.factorise(); - std::cout << chol.info()(0); + auto q = vectorFromFile("../test/data/my.dtensor"); + for (size_t i = 0; i < 3; i++) std::cout << q.data[i] << ", "; return 0; } From 279159780fd5c7b5acb0e445eff1aef8d913d580 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Tue, 26 Nov 2024 19:08:57 +0000 Subject: [PATCH 2/7] implement DTensor::parseFromFile --- include/tensor.cuh | 51 ++++++++++++++++++++++++++++++++++++++++++++++ main.cu | 49 ++------------------------------------------ 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 5a08392..41add5a 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -11,6 +11,7 @@ #include #include #include +#include #ifndef TENSOR_CUH #define TENSOR_CUH @@ -250,6 +251,8 @@ public: */ static DTensor createRandomTensor(size_t numRows, size_t numCols, size_t numMats, T low, T hi); + static DTensor parseFromTextFile(std::string path_to_file); + /** * Constructs a DTensor object. */ @@ -564,6 +567,54 @@ DTensor DTensor::createRandomTensor(size_t numRows, size_t numCols, size_t throw std::invalid_argument("[createRandomTensor] unsupported type T"); } + +template +struct data_t { + size_t numRows; + size_t numCols; + size_t numMats; + std::vector data; +}; + +template +data_t vectorFromFile(std::string path_to_file) { + data_t dataStruct; + std::ifstream file; + file.open(path_to_file, std::ios::in); + if (!file.is_open()) { throw std::exception(); }; + + std::string line; + getline(file, line); dataStruct.numRows = atoi(line.c_str()); + getline(file, line); dataStruct.numCols = atoi(line.c_str()); + getline(file, line); dataStruct.numMats = atoi(line.c_str()); + + size_t numElements = dataStruct.numRows * dataStruct.numCols * dataStruct.numMats; + std::vector vecDataFromFile(numElements); + + size_t i = 0; + while (getline(file, line)) { + if constexpr (std::is_same_v) { + vecDataFromFile[i] = atoi(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stod(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stof(line.c_str()); + } + if (i == numElements - 1) break; + i++; + } + + dataStruct.data = vecDataFromFile; + file.close(); + return dataStruct; +} + +template DTensor DTensor::parseFromTextFile(std::string path_to_file) { + auto parsedData = vectorFromFile(path_to_file); + DTensor tensorFromData(parsedData.data, parsedData.numRows, parsedData.numCols, parsedData.numMats); + return tensorFromData; +} + template void DTensor::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats) { if (m_numRows == newNumRows && m_numCols == newNumCols && m_numMats == newNumMats) return; diff --git a/main.cu b/main.cu index 00d3491..a4c28e1 100644 --- a/main.cu +++ b/main.cu @@ -4,54 +4,9 @@ #include #include -template -struct data_t { - size_t numRows; - size_t numCols; - size_t numMats; - std::vector data; -}; - -template -data_t vectorFromFile(std::string path_to_file) { - data_t dataStruct; - std::ifstream file; - file.open(path_to_file, std::ios::in); - - std::string line; - getline(file, line); dataStruct.numRows = atoi(line.c_str()); - getline(file, line); dataStruct.numCols = atoi(line.c_str()); - getline(file, line); dataStruct.numMats = atoi(line.c_str()); - - size_t numElements = dataStruct.numRows * dataStruct.numCols * dataStruct.numMats; - std::vector vecDataFromFile(numElements); - - size_t i = 0; - while (getline(file, line)) { - if constexpr (std::is_same_v) { - vecDataFromFile[i] = atoi(line.c_str()); - } else if constexpr (std::is_same_v) { - vecDataFromFile[i] = std::stod(line.c_str()); - } else if constexpr (std::is_same_v) { - vecDataFromFile[i] = std::stof(line.c_str()); - } - if (i == numElements - 1) break; - i++; - } - file.close(); - - dataStruct.data = vecDataFromFile; - return dataStruct; -} int main() { - auto z = vectorFromFile("../test/data/my.dtensor"); - for (size_t i = 0; i < 3; i++) std::cout << z.data[i] << ", "; - DTensor dz(z.data, z.numRows, z.numCols, z.numMats); - std::cout << "\n\n"; - - auto q = vectorFromFile("../test/data/my.dtensor"); - for (size_t i = 0; i < 3; i++) std::cout << q.data[i] << ", "; - + auto z = DTensor::parseFromTextFile("../test/data/my.dtensor"); + std::cout << z; return 0; } From c8601efa061712f878b9b493e2826b278299685c Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Tue, 26 Nov 2024 20:36:30 +0000 Subject: [PATCH 3/7] parseFromTextFile takes a storage mode now --- include/tensor.cuh | 19 ++++++++++++++++--- main.cu | 4 +++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 41add5a..61841c0 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -251,7 +251,18 @@ public: */ static DTensor createRandomTensor(size_t numRows, size_t numCols, size_t numMats, T low, T hi); - static DTensor parseFromTextFile(std::string path_to_file); + /** + * Parse data from text file and create an instance of DTensor + * + * This static function reads data from a text file, creates a DTensor and uploads the data to the device. + * + * @param path_to_file path to file as string + * @param mode storage mode (default: StorageMode::defaultMajor) + * @return instance of DTensor + * + * @throws std::invalid_argument if the file is not found + */ + static DTensor parseFromTextFile(std::string path_to_file, StorageMode mode = StorageMode::defaultMajor); /** * Constructs a DTensor object. @@ -581,7 +592,7 @@ data_t vectorFromFile(std::string path_to_file) { data_t dataStruct; std::ifstream file; file.open(path_to_file, std::ios::in); - if (!file.is_open()) { throw std::exception(); }; + if (!file.is_open()) { throw std::invalid_argument("the file you provided does not exist"); }; std::string line; getline(file, line); dataStruct.numRows = atoi(line.c_str()); @@ -609,7 +620,9 @@ data_t vectorFromFile(std::string path_to_file) { return dataStruct; } -template DTensor DTensor::parseFromTextFile(std::string path_to_file) { +template +DTensor DTensor::parseFromTextFile(std::string path_to_file, + StorageMode mode) { auto parsedData = vectorFromFile(path_to_file); DTensor tensorFromData(parsedData.data, parsedData.numRows, parsedData.numCols, parsedData.numMats); return tensorFromData; diff --git a/main.cu b/main.cu index a4c28e1..b089ffc 100644 --- a/main.cu +++ b/main.cu @@ -6,7 +6,9 @@ int main() { - auto z = DTensor::parseFromTextFile("../test/data/my.dtensor"); + auto z = DTensor::parseFromTextFile("../test/data/my.dtensor", + StorageMode::rowMajor); std::cout << z; + std::cout << " -- "; return 0; } From f775efdf60834f62237f4c62b995ab0e9d68e60a Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Tue, 26 Nov 2024 22:25:44 +0000 Subject: [PATCH 4/7] unit test for saveToFile/parseFromTestFile --- include/tensor.cuh | 20 +++++++++++++++++++- test/testTensor.cu | 25 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 61841c0..d40a4e0 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -501,6 +501,13 @@ public: */ void reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats = 1); + /** + * @todo + * + * @param pathToFile + */ + void saveToFile(std::string pathToFile); + /* ------------- OPERATORS ------------- */ DTensor &operator=(const DTensor &other); @@ -614,7 +621,6 @@ data_t vectorFromFile(std::string path_to_file) { if (i == numElements - 1) break; i++; } - dataStruct.data = vecDataFromFile; file.close(); return dataStruct; @@ -628,6 +634,18 @@ DTensor DTensor::parseFromTextFile(std::string path_to_file, return tensorFromData; } +template +void DTensor::saveToFile(std::string pathToFile) { + std::ofstream file(pathToFile); + file << numRows() << std::endl << numCols() << std::endl << numMats() << std::endl; + std::vector myData(numEl()); + download(myData); + if constexpr (std::is_floating_point::value) { + file << std::setprecision(15); + } + for(const T& el : myData) file << el << std::endl; +} + template void DTensor::reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats) { if (m_numRows == newNumRows && m_numCols == newNumCols && m_numMats == newNumMats) return; diff --git a/test/testTensor.cu b/test/testTensor.cu index 0b0d312..27bd323 100644 --- a/test/testTensor.cu +++ b/test/testTensor.cu @@ -1,5 +1,6 @@ #include #include "../include/tensor.cuh" +#include #define PRECISION_LOW 1e-4 #define PRECISION_HIGH 1e-10 @@ -115,6 +116,30 @@ TEST_F(TensorTest, randomTensorCreation) { randomTensorCreation(); } +/* --------------------------------------- + * Save to file and parse + * --------------------------------------- */ + +TEMPLATE_WITH_TYPE_T +void parseTensorFromFile() { + size_t nR = 20, nC = 40, nM = 60; + auto r = DTensor::createRandomTensor(nR, nC, nM, -1, 1); + std::string fName = "myTest.dtensor"; + r.saveToFile(fName); + auto a = DTensor::parseFromTextFile(fName); + EXPECT_EQ(nR, a.numRows()); + EXPECT_EQ(nC, a.numCols()); + EXPECT_EQ(nM, a.numMats()); + auto diff = a - r; + T err = diff.maxAbs(); + EXPECT_LT(err, 1e-15); +} + +TEST_F(TensorTest, parseTensorFromFile) { + parseTensorFromFile(); + parseTensorFromFile(); +} + /* --------------------------------------- * Move constructor * --------------------------------------- */ From 5e75f3cde2c37d157f7783bcbf17c3103b144d65 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Tue, 26 Nov 2024 22:39:12 +0000 Subject: [PATCH 5/7] update CHANGELOG and README --- CHANGELOG.md | 10 ++++++++++ README.md | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ec3fe0..10b05c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v1.5.0 - 27-11-2024 + +### Added + +- Created methods for serialising and deserialising `DTensor` objects + + diff --git a/README.md b/README.md index 3a7c260..8452793 100644 --- a/README.md +++ b/README.md @@ -246,6 +246,27 @@ The `DTensor` `B` will be overwritten with the solution. > overwrite only part of the given `B`, as `B` is a > (4,1,1)-tensor and the solution is a (3,1,1)-tensor. +### 1.8. Saving and loading tensors + +Tensor data can be stored in simple text files which have the following structure + +```text +number_of_rows +number_of_columns +number_of_matrices +data (one entry per line) +``` + +To save a tensor in a file, simply call `DTensor::saveToFile(filename)`. + +To load a tensor from a file, the static function `DTensor::parseFromTextFile(filename)` can be used. For example: + +```c++ +auto z = DTensor::parseFromTextFile("path/to/my.dtensor") +``` + +If necessar, you can provide a second argument to `parseFromTextFile` to specify the order in which the data are stored (the `StorageMode`). + ## 2. Cholesky factorisation and system solution > [!WARNING] From 11fc93ca495cf7036dcae980af730e46be5b7c04 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Tue, 26 Nov 2024 23:23:37 +0000 Subject: [PATCH 6/7] save/load: support for additional data types --- README.md | 2 +- include/tensor.cuh | 24 ++++++++++++++++++------ main.cu | 4 ++-- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 8452793..6995001 100644 --- a/README.md +++ b/README.md @@ -265,7 +265,7 @@ To load a tensor from a file, the static function `DTensor::parseFromTextFile auto z = DTensor::parseFromTextFile("path/to/my.dtensor") ``` -If necessar, you can provide a second argument to `parseFromTextFile` to specify the order in which the data are stored (the `StorageMode`). +If necessary, you can provide a second argument to `parseFromTextFile` to specify the order in which the data are stored (the `StorageMode`). ## 2. Cholesky factorisation and system solution diff --git a/include/tensor.cuh b/include/tensor.cuh index d40a4e0..380b65e 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -617,9 +617,21 @@ data_t vectorFromFile(std::string path_to_file) { vecDataFromFile[i] = std::stod(line.c_str()); } else if constexpr (std::is_same_v) { vecDataFromFile[i] = std::stof(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stold(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stol(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stoll(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stoul(line.c_str()); + } else if constexpr (std::is_same_v) { + vecDataFromFile[i] = std::stoull(line.c_str()); + } else if constexpr (std::is_same_v) { + sscanf(line.c_str(), "%zu", &vecDataFromFile[i]); } - if (i == numElements - 1) break; - i++; + + if (++i == numElements) break; } dataStruct.data = vecDataFromFile; file.close(); @@ -628,7 +640,7 @@ data_t vectorFromFile(std::string path_to_file) { template DTensor DTensor::parseFromTextFile(std::string path_to_file, - StorageMode mode) { + StorageMode mode) { auto parsedData = vectorFromFile(path_to_file); DTensor tensorFromData(parsedData.data, parsedData.numRows, parsedData.numCols, parsedData.numMats); return tensorFromData; @@ -638,10 +650,10 @@ template void DTensor::saveToFile(std::string pathToFile) { std::ofstream file(pathToFile); file << numRows() << std::endl << numCols() << std::endl << numMats() << std::endl; - std::vector myData(numEl()); - download(myData); + std::vector myData(numEl()); download(myData); if constexpr (std::is_floating_point::value) { - file << std::setprecision(15); + int prec = std::numeric_limits::max_digits10 - 1; + file << std::setprecision(prec); } for(const T& el : myData) file << el << std::endl; } diff --git a/main.cu b/main.cu index b089ffc..cc911e9 100644 --- a/main.cu +++ b/main.cu @@ -6,9 +6,9 @@ int main() { - auto z = DTensor::parseFromTextFile("../test/data/my.dtensor", + auto z = DTensor::parseFromTextFile("../test/data/my.dtensor", StorageMode::rowMajor); std::cout << z; - std::cout << " -- "; + z.saveToFile("hohoho.dtensor"); return 0; } From 633a3f382057ae56106824dc005c5732dc51a038 Mon Sep 17 00:00:00 2001 From: Pantelis Sopasakis Date: Tue, 26 Nov 2024 23:30:45 +0000 Subject: [PATCH 7/7] use std::numeric_limits::epsilon() in tests --- include/tensor.cuh | 3 ++- test/testTensor.cu | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/tensor.cuh b/include/tensor.cuh index 380b65e..6da01a5 100644 --- a/include/tensor.cuh +++ b/include/tensor.cuh @@ -502,7 +502,7 @@ public: void reshape(size_t newNumRows, size_t newNumCols, size_t newNumMats = 1); /** - * @todo + * Saves the current instance of DTensor to a (text) file * * @param pathToFile */ @@ -630,6 +630,7 @@ data_t vectorFromFile(std::string path_to_file) { } else if constexpr (std::is_same_v) { sscanf(line.c_str(), "%zu", &vecDataFromFile[i]); } + // todo if (++i == numElements) break; } diff --git a/test/testTensor.cu b/test/testTensor.cu index 27bd323..2d97df7 100644 --- a/test/testTensor.cu +++ b/test/testTensor.cu @@ -132,7 +132,7 @@ void parseTensorFromFile() { EXPECT_EQ(nM, a.numMats()); auto diff = a - r; T err = diff.maxAbs(); - EXPECT_LT(err, 1e-15); + EXPECT_LT(err, 2*std::numeric_limits::epsilon()); } TEST_F(TensorTest, parseTensorFromFile) {