From 3e2ce5494fcda644c02a14b6b12f129a22ad906a Mon Sep 17 00:00:00 2001 From: carbon Date: Fri, 31 May 2024 11:58:25 +0800 Subject: [PATCH] add cnpy commit 2f56f4c23ea840f9d15c43801368cf4a089efa84 Author: sophgo-forum-service Date: Sun May 19 23:03:28 2024 +0800 [feat] cnpy opensource for cv18xx soc. - e9d84e --- .version/2024-05-31.md | 1 + cnpy/CMakeLists.txt | 65 ++++ cnpy/LICENSE | 21 ++ cnpy/README.md | 55 +++ cnpy/cnpy.cpp | 785 +++++++++++++++++++++++++++++++++++++++++ cnpy/cnpy.h | 117 ++++++ cnpy/example1.cpp | 70 ++++ cnpy/mat2npz | 18 + cnpy/npy2mat | 15 + cnpy/npz2mat | 15 + 10 files changed, 1162 insertions(+) create mode 100644 cnpy/CMakeLists.txt create mode 100644 cnpy/LICENSE create mode 100644 cnpy/README.md create mode 100644 cnpy/cnpy.cpp create mode 100644 cnpy/cnpy.h create mode 100644 cnpy/example1.cpp create mode 100644 cnpy/mat2npz create mode 100644 cnpy/npy2mat create mode 100755 cnpy/npz2mat diff --git a/.version/2024-05-31.md b/.version/2024-05-31.md index ec79e0332..5edb66443 100644 --- a/.version/2024-05-31.md +++ b/.version/2024-05-31.md @@ -20,3 +20,4 @@ | cvikernel | cvikernel | https://github.com/sophgo/cvikernel.git | sg200x-dev | 9f1f57a | | cviruntime | cviruntime | https://github.com/sophgo/cviruntime.git | sg200x-dev | 3f49386 | | cvimath | cvimath | https://github.com/sophgo/cvimath.git | sg200x-dev | ce8705f | +| cnpy | cnpy | https://github.com/sophgo/cnpy.git | sg200x-dev | 2f56f4c | diff --git a/cnpy/CMakeLists.txt b/cnpy/CMakeLists.txt new file mode 100644 index 000000000..6a893eb8c --- /dev/null +++ b/cnpy/CMakeLists.txt @@ -0,0 +1,65 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR) +if(COMMAND cmake_policy) + cmake_policy(SET CMP0003 NEW) +endif(COMMAND cmake_policy) + +if (NOT DEFINED LLVM_MAIN_SRC_DIR) + project(CNPY) + + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + + message(STATUS "CMAKE_SYSROOT ${CMAKE_SYSROOT}") + include_directories(${CMAKE_SYSROOT}/include) + + include_directories(${CMAKE_INSTALL_PREFIX}/include) + link_directories(${CMAKE_INSTALL_PREFIX}/lib) + + set(ENV{PKG_CONFIG_DIR} "") + set(ENV{PKG_CONFIG_LIBDIR} "${CMAKE_SYSROOT}/usr/lib/pkgconfig:${CMAKE_SYSROOT}/usr/share/pkgconfig") + set(ENV{PKG_CONFIG_SYSROOT_DIR} ${CMAKE_SYSROOT}) + find_package(ZLIB) + + option(ENABLE_STATIC "Build static (.a) library" ON) + add_library(cnpy SHARED "cnpy.cpp") + target_link_libraries(cnpy z) + install(TARGETS "cnpy" LIBRARY DESTINATION lib PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + + if(ENABLE_STATIC) + add_library(cnpy-static STATIC "cnpy.cpp") + set_target_properties(cnpy-static PROPERTIES OUTPUT_NAME "cnpy") + install(TARGETS "cnpy-static" ARCHIVE DESTINATION lib) + endif(ENABLE_STATIC) + + install(FILES "cnpy.h" DESTINATION include) + install(FILES "mat2npz" "npy2mat" "npz2mat" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) + + add_executable(example1 example1.cpp) + target_link_libraries(example1 cnpy) + +else() + message(STATUS "CNPY LLVM Tree Build") + + add_custom_target(CNPY) + set_target_properties(CNPY PROPERTIES FOLDER Third_party) + add_dependencies(CNPY + cnpy + cnpy_example1 + ) + + set(LLVM_OPTIONAL_SOURCES + cnpy.cpp + example1.cpp + ) + + find_package(ZLIB REQUIRED) + + add_llvm_library(cnpy SHARED + cnpy.cpp) + target_link_libraries(cnpy ${ZLIB_LIBRARIES}) + + install(FILES "cnpy.h" DESTINATION include) + add_llvm_executable(cnpy_example1 + example1.cpp) + target_link_libraries(cnpy_example1 PRIVATE + cnpy) +endif() diff --git a/cnpy/LICENSE b/cnpy/LICENSE new file mode 100644 index 000000000..e60eadbcc --- /dev/null +++ b/cnpy/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) Carl Rogers, 2011 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/cnpy/README.md b/cnpy/README.md new file mode 100644 index 000000000..37c4a4340 --- /dev/null +++ b/cnpy/README.md @@ -0,0 +1,55 @@ +# Purpose: + +NumPy offers the `save` method for easy saving of arrays into .npy and `savez` for zipping multiple .npy arrays together into a .npz file. + +`cnpy` lets you read and write to these formats in C++. + +The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python. + +Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size. +The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary. + +Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice. + +# Installation: + +Default installation directory is /usr/local. +To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/dir` to the cmake invocation in step 4. + +1. get [cmake](www.cmake.org) +2. create a build directory, say $HOME/build +3. cd $HOME/build +4. cmake /path/to/cnpy +5. make +6. make install + +# Using: + +To use, `#include"cnpy.h"` in your source code. Compile the source code mycode.cpp as + +```bash +g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++11 +``` + +# Description: + +There are two functions for writing data: `npy_save` and `npz_save`. + +There are 3 functions for reading: +- `npy_load` will load a .npy file. +- `npz_load(fname)` will load a .npz and return a dictionary of NpyArray structues. +- `npz_load(fname,varname)` will load and return the NpyArray for data varname from the specified .npz file. + +The data structure for loaded data is below. +Data is accessed via the `data()`-method, which returns a pointer of the specified type (which must match the underlying datatype of the data). +The array shape and word size are read from the npy header. + +```c++ +struct NpyArray { + std::vector shape; + size_t word_size; + template T* data(); +}; +``` + +See [example1.cpp](example1.cpp) for examples of how to use the library. example1 will also be build during cmake installation. diff --git a/cnpy/cnpy.cpp b/cnpy/cnpy.cpp new file mode 100644 index 000000000..54be33977 --- /dev/null +++ b/cnpy/cnpy.cpp @@ -0,0 +1,785 @@ +//Copyright (C) 2011 Carl Rogers +//Released under MIT License +//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php +#define _FILE_OFFSET_BITS 64 +#define __USE_FILE_OFFSET64 +#define __USE_LARGEFILE64 +#define _LARGEFILE64_SOURCE + +#include"cnpy.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#define ZIP64_LIMIT ((((size_t)1) << 31) - 1) + +namespace cnpy { + +static char BigEndianTest() { + int x = 1; + return (((char *)&x)[0]) ? '<' : '>'; +} + +static char map_type(const std::type_info& t) +{ + if( t == typeid(float) ) return 'f'; + if( t == typeid(double) ) return 'f'; + if( t == typeid(long double) ) return 'f'; + + if( t == typeid(int) ) return 'i'; + if( t == typeid(char) ) return 'i'; + if( t == typeid(signed char) ) return 'i'; + if( t == typeid(short) ) return 'i'; + if( t == typeid(long) ) return 'i'; + if( t == typeid(long long) ) return 'i'; + + if( t == typeid(unsigned char) ) return 'u'; + if( t == typeid(unsigned short) ) return 'u'; + if( t == typeid(unsigned long) ) return 'u'; + if( t == typeid(unsigned long long) ) return 'u'; + if( t == typeid(unsigned int) ) return 'u'; + + if( t == typeid(bool) ) return 'b'; + + if( t == typeid(std::complex) ) return 'c'; + if( t == typeid(std::complex) ) return 'c'; + if( t == typeid(std::complex) ) return 'c'; + + std::cout << "libcnpy error: unknown type_id " + << t.name() << "\n"; + // ref: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin + assert(0); + return '?'; +} + +template +std::vector& operator+=(std::vector& lhs, const T rhs) { + //write in little endian + for(size_t byte = 0; byte < sizeof(T); byte++) { + char val = *((const char*)&rhs+byte); + lhs.push_back(val); + } + return lhs; +} + +template<> +std::vector& operator+=(std::vector& lhs, const std::string rhs) { + lhs.insert(lhs.end(),rhs.begin(),rhs.end()); + return lhs; +} + +template<> +std::vector& operator+=(std::vector& lhs, const char* rhs) { + //write in little endian + size_t len = strlen(rhs); + lhs.reserve(len); + for(size_t byte = 0; byte < len; byte++) { + lhs.push_back(rhs[byte]); + } + return lhs; +} + +std::vector create_npy_header(const std::vector& shape, + size_t word_size, char type) { + std::vector dict; + dict += "{'descr': '"; + dict += BigEndianTest(); + dict += type; + dict += std::to_string(word_size); + dict += "', 'fortran_order': False, 'shape': ("; + dict += std::to_string(shape[0]); + for(size_t i = 1;i < shape.size();i++) { + dict += ", "; + dict += std::to_string(shape[i]); + } + if(shape.size() == 1) dict += ","; + dict += "), }"; + //pad with spaces so that preamble+dict is modulo 16 bytes. + //preamble is 10 bytes. dict needs to end with \n + int remainder = 16 - (10 + dict.size()) % 16; + dict.insert(dict.end(),remainder,' '); + dict.back() = '\n'; + + std::vector header; + header += (char) 0x93; + header += "NUMPY"; + header += (char) 0x01; //major version of numpy format + header += (char) 0x00; //minor version of numpy format + header += (uint16_t) dict.size(); + header.insert(header.end(),dict.begin(),dict.end()); + + return header; +} + +void parse_npy_header(unsigned char* buffer, size_t& word_size, char& type, + std::vector& shape, bool& fortran_order) { + //std::string magic_string(buffer,6); + //uint8_t major_version = *reinterpret_cast(buffer+6); + //uint8_t minor_version = *reinterpret_cast(buffer+7); + uint16_t header_len = *reinterpret_cast(buffer+8); + std::string header(reinterpret_cast(buffer+9),header_len); + + size_t loc1, loc2; + + //fortran order + loc1 = header.find("fortran_order")+16; + fortran_order = (header.substr(loc1,4) == "True" ? true : false); + + //shape + loc1 = header.find("("); + loc2 = header.find(")"); + + std::regex num_regex("[0-9][0-9]*"); + std::smatch sm; + shape.clear(); + + std::string str_shape = header.substr(loc1+1,loc2-loc1-1); + while(std::regex_search(str_shape, sm, num_regex)) { + shape.push_back(std::stoi(sm[0].str())); + str_shape = sm.suffix().str(); + } + + //endian, word size, data type + //byte order code | stands for not applicable. + //not sure when this applies except for byte array + loc1 = header.find("descr")+9; + bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); + assert(littleEndian); + + type = header[loc1+1]; + //assert(type == map_type(T)); + + std::string str_ws = header.substr(loc1+2); + loc2 = str_ws.find("'"); + word_size = atoi(str_ws.substr(0,loc2).c_str()); +} + +void parse_npy_header(FILE* fp, size_t& word_size, char& type, + std::vector& shape, bool& fortran_order) { + char buffer[256]; + size_t res = fread(buffer,sizeof(char),11,fp); + if(res != 11) + throw std::runtime_error("parse_npy_header: failed fread"); + std::string header = fgets(buffer,256,fp); + assert(header[header.size()-1] == '\n'); + + size_t loc1, loc2; + + //fortran order + loc1 = header.find("fortran_order"); + if (loc1 == std::string::npos) + throw std::runtime_error("parse_npy_header: " + "failed to find header keyword: 'fortran_order'"); + loc1 += 16; + fortran_order = (header.substr(loc1,4) == "True" ? true : false); + + //shape + loc1 = header.find("("); + loc2 = header.find(")"); + if (loc1 == std::string::npos || loc2 == std::string::npos) + throw std::runtime_error("parse_npy_header: " + "failed to find header keyword: '(' or ')'"); + + std::regex num_regex("[0-9][0-9]*"); + std::smatch sm; + shape.clear(); + + std::string str_shape = header.substr(loc1+1,loc2-loc1-1); + while(std::regex_search(str_shape, sm, num_regex)) { + shape.push_back(std::stoi(sm[0].str())); + str_shape = sm.suffix().str(); + } + + //endian, word size, data type + //byte order code | stands for not applicable. + //not sure when this applies except for byte array + loc1 = header.find("descr"); + if (loc1 == std::string::npos) + throw std::runtime_error("parse_npy_header: " + "failed to find header keyword: 'descr'"); + loc1 += 9; + bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); + assert(littleEndian); + + type = header[loc1+1]; + //assert(type == map_type(T)); + + std::string str_ws = header.substr(loc1+2); + loc2 = str_ws.find("'"); + word_size = atoi(str_ws.substr(0,loc2).c_str()); +} + +void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, + size_t& global_header_offset) { + std::vector footer(22); + fseek(fp,-22,SEEK_END); + size_t res = fread(&footer[0],sizeof(char),22,fp); + if(res != 22) + throw std::runtime_error("parse_zip_footer: failed fread"); + + uint16_t disk_no, disk_start, nrecs_on_disk, comment_len; + disk_no = *(uint16_t*) &footer[4]; + disk_start = *(uint16_t*) &footer[6]; + nrecs_on_disk = *(uint16_t*) &footer[8]; + nrecs = *(uint16_t*) &footer[10]; + global_header_size = *(uint32_t*) &footer[12]; + global_header_offset = *(uint32_t*) &footer[16]; + comment_len = *(uint16_t*) &footer[20]; + + assert(disk_no == 0); + assert(disk_start == 0); + assert(nrecs_on_disk == nrecs); + assert(comment_len == 0); + if (global_header_offset >= 0xFFFFFFFF) { + //get global header offset from extra data + std::vector zip64endrec_header(56); + fseek(fp,-98,SEEK_END); + size_t res = fread(&zip64endrec_header[0],sizeof(char),56,fp); + global_header_offset = *(uint64_t*) &zip64endrec_header[48]; + } +} + +template +void npy_save(std::string fname, const T* data, + const std::vector shape, std::string mode) { + FILE* fp = NULL; + //if appending, the shape of existing + new data + std::vector true_data_shape; + + if(mode == "a") fp = fopen(fname.c_str(),"r+b"); + + if(fp) { + //file exists. we need to append to it. read the header, modify the array size + size_t word_size; + char type; + bool fortran_order; + parse_npy_header(fp,word_size,type,true_data_shape,fortran_order); + assert(!fortran_order); + + if(word_size != sizeof(T)) { + std::cout << "libnpy error: " << fname << " has word size " + << word_size << " but npy_save appending data sized " + << sizeof(T) << "\n"; + assert( word_size == sizeof(T) ); + } + if(true_data_shape.size() != shape.size()) { + std::cout << "libnpy error: npy_save attempting to append " + << "misdimensioned data to " << fname << "\n"; + assert(true_data_shape.size() != shape.size()); + } + + for(size_t i = 1; i < shape.size(); i++) { + if(shape[i] != true_data_shape[i]) { + std::cout << "libnpy error: npy_save attempting to append " + << "misshaped data to " << fname << "\n"; + assert(shape[i] == true_data_shape[i]); + } + } + true_data_shape[0] += shape[0]; + } + else { + fp = fopen(fname.c_str(),"wb"); + true_data_shape = shape; + } + + size_t word_size = sizeof(T); + char type = map_type(typeid(T)); + std::vector header = create_npy_header(true_data_shape, word_size, type); + size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies()); + + fseek(fp,0,SEEK_SET); + fwrite(&header[0],sizeof(char),header.size(),fp); + fseek(fp,0,SEEK_END); + fwrite(data,sizeof(T),nels,fp); + fclose(fp); +} + +template void npy_save >(std::string, + const std::complex*, + const std::vector, std::string); +template void npy_save(std::string, const double*, + const std::vector, std::string); +template void npy_save(std::string, const char*, + const std::vector, std::string); + +template +void npy_save(std::string fname, const std::vector data, + std::string mode) { + std::vector shape; + shape.push_back(data.size()); + npy_save(fname, &data[0], shape, mode); +} + +template +void npz_save(std::string zipname, std::string fname, + const T* data, const std::vector& shape, + std::string mode) { + //first, append a .npy to the fname + fname += ".npy"; + + //now, on with the show + FILE* fp = NULL; + uint16_t nrecs = 0; + size_t global_header_offset = 0; + std::vector global_header; + + if(mode == "a") fp = fopen(zipname.c_str(),"r+b"); + + if(fp) { + //zip file exists. we need to add a new npy file to it. + //first read the footer. + //this gives us the offset and size of the global header + //then read and store the global header. + //below, we will write the the new data at the start of the global + //header then append the global header and footer below it + size_t global_header_size; + parse_zip_footer(fp,nrecs,global_header_size,global_header_offset); + fseek(fp,global_header_offset,SEEK_SET); + global_header.resize(global_header_size); + size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp); + if(res != global_header_size){ + throw std::runtime_error("npz_save: " + "header read error while adding to existing zip"); + } + fseek(fp,global_header_offset,SEEK_SET); + } + else { + fp = fopen(zipname.c_str(),"wb"); + } + + size_t word_size = sizeof(T); + char type = map_type(typeid(T)); + std::vector npy_header; + if(shape.size() != 0){ + npy_header = create_npy_header(shape, word_size, type); + }else{ + std::cerr << "[Warning] zip name: " << fname <<" npz shape size is 0, skip it\n"; + fclose(fp); + return; + } + + size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies()); + size_t nbytes = nels*sizeof(T) + npy_header.size(); + + //get the CRC of the data to be added + uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size()); + crc = crc32(crc,(const uint8_t*)data,nels*sizeof(T)); + + //build the local header + std::vector local_header; + local_header += "PK"; //first part of sig + local_header += (uint16_t) 0x0403; //second part of sig + local_header += (uint16_t) 20; //min version to extract + local_header += (uint16_t) 0; //general purpose bit flag + local_header += (uint16_t) 0; //compression method + local_header += (uint16_t) 0; //file last mod time + local_header += (uint16_t) 0; //file last mod date + local_header += (uint32_t) crc; //crc + local_header += (uint32_t) nbytes; //compressed size + local_header += (uint32_t) nbytes; //uncompressed size + local_header += (uint16_t) fname.size(); //fname length + local_header += (uint16_t) 0; //extra field length + local_header += fname; + + fwrite(&local_header[0],sizeof(char),local_header.size(),fp); + fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp); + fwrite(data,sizeof(T),nels,fp); + /* + Only support global_header_offset is larger than ZIP64_LIMIT. + Not support size is larger than ZIP64_LIMIT now. + */ + if (global_header_offset + nbytes + local_header.size() >= ZIP64_LIMIT) { + //structCentralDir = "<4s4B4HL2L5H2L" + //centdir = struct.pack(structCentralDir, + //stringCentralDir, create_version, + //zinfo.create_system, extract_version, zinfo.reserved, + //flag_bits, zinfo.compress_type, dostime, dosdate, + //zinfo.CRC, compress_size, file_size, + //len(filename), len(extra_data), len(zinfo.comment), + //0, zinfo.internal_attr, zinfo.external_attr, + //header_offset) + + //build global header + global_header += "PK"; //first part of sig + global_header += (uint16_t) 0x0201; //second part of sig + global_header += (uint8_t) 45; //create_version + global_header += (uint8_t) 3; //zinfo.create_system + global_header += (uint8_t) 45; //extract_version + global_header += (uint8_t) 0; //zinfo.reserved + global_header.insert(global_header.end(),local_header.begin()+6, + local_header.begin()+28); + global_header += (uint16_t) 12; //extran data length + global_header += (uint16_t) 0; //file comment length + global_header += (uint16_t) 0; //disk number where file starts + global_header += (uint16_t) 0; //internal file attributes + global_header += (uint32_t) 0; //external file attributes + //relative offset of local file header + //since it begins where the global header used to begin + global_header += (uint32_t) 0xFFFFFFFF ; //global_header_offset; + global_header += fname; + // Append a ZIP64 field to the extra's + // extra_data = struct.pack( + // '= ZIP64_LIMIT) { + //structEndArchive64 = "<4sQ2H2L4Q" + //zip64endrec = struct.pack( + // structEndArchive64, stringEndArchive64, + // 44, 45, 45, 0, 0, centDirCount, centDirCount, + // centDirSize, centDirOffset) + //self.fp.write(zip64endrec) + std::vector zip64endrec_header; + zip64endrec_header += "PK"; + zip64endrec_header += (uint16_t) 0x0606; + zip64endrec_header += (uint64_t) 0x44; + zip64endrec_header += (uint16_t) 0x45; + zip64endrec_header += (uint16_t) 0x45; + zip64endrec_header += (uint32_t) 0x0; + zip64endrec_header += (uint32_t) 0x0; + zip64endrec_header += (uint64_t) (nrecs+1); //centDirCount + zip64endrec_header += (uint64_t) (nrecs+1); //centDirCount + zip64endrec_header += (uint64_t) global_header.size(); //centDirSize + zip64endrec_header += (uint64_t) global_header_offset + nbytes + local_header.size(); //centDirOffset + fwrite(&zip64endrec_header[0],sizeof(char),zip64endrec_header.size(),fp); + + //structEndArchive64Locator = "<4sLQL" + //zip64locrec = struct.pack( + // structEndArchive64Locator, + // stringEndArchive64Locator, 0, pos2, 1) + //self.fp.write(zip64locrec) + std::vector zip64locrec_header; + zip64locrec_header += "PK"; + zip64locrec_header += (uint16_t) 0x0706; + zip64locrec_header += (uint32_t) 0x0; + zip64locrec_header += (uint64_t) global_header_offset + nbytes + local_header.size() + + zip64endrec_header.size(); // zip64endrec_header offset + zip64locrec_header += (uint32_t) 0x1; + fwrite(&zip64locrec_header[0],sizeof(char),zip64locrec_header.size(),fp); + } + //build footer + std::vector footer; + footer += "PK"; //first part of sig + footer += (uint16_t) 0x0605; //second part of sig + footer += (uint16_t) 0; //number of this disk + footer += (uint16_t) 0; //disk where footer starts + footer += (uint16_t) (nrecs+1); //number of records on this disk + footer += (uint16_t) (nrecs+1); //total number of records + footer += (uint32_t) global_header.size(); //nbytes of global headers + //offset of start of global headers + //since global header now starts after newly written array + footer += (global_header_offset >= ZIP64_LIMIT) ? + (uint32_t) 0xFFFFFFFF : (uint32_t) (global_header_offset + nbytes + local_header.size()); + footer += (uint16_t) 0; //zip file comment length + + fwrite(&footer[0],sizeof(char),footer.size(),fp); + fclose(fp); +} + +template void npz_save >(std::string, std::string, + const std::complex*, const std::vector&, + std::string); +template void npz_save(std::string, std::string, + const double*, const std::vector&, std::string); +template void npz_save(std::string, std::string, + const char*, const std::vector&, std::string); + +template +void npz_save(std::string zipname, std::string fname, + const std::vector &data, std::string mode) { + std::vector shape; + shape.push_back(data.size()); + npz_save(zipname, fname, &data[0], shape, mode); +} + +template +void npz_save(std::string zipname, std::string fname, + NpyArray &array, std::string mode) { + npz_save(zipname, fname, array.data(), array.shape, mode); +} + +template +void npz_add_array(npz_t &map, std::string fname, + const T* data, const std::vector shape) { + size_t word_size = sizeof(T); + char type = map_type(typeid(T)); + bool fortran_order = false; + NpyArray array(shape, word_size, type, fortran_order); + memcpy(array.data(), data, array.num_bytes()); + map[fname] = array; +} + +template void npz_add_array >(npz_t &, std::string, + const std::complex*, const std::vector); +template void npz_add_array(npz_t &, std::string, + const float*, const std::vector); +template void npz_add_array(npz_t &, std::string, + const int8_t*, const std::vector); +template void npz_add_array(npz_t &, std::string, + const uint8_t*, const std::vector); +template void npz_add_array(npz_t &, std::string, + const int16_t*, const std::vector); +template void npz_add_array(npz_t &, std::string, + const uint16_t*, const std::vector); +template void npz_add_array(npz_t &, std::string, + const uint32_t*, const std::vector); + +template +void npz_add_array(npz_t &map, std::string fname, + const std::vector &data) { + std::vector shape; + shape.push_back(data.size()); + npz_add_array(map, fname, &data[0], shape); +} + +template void npz_add_array >(npz_t &, std::string, + const std::vector > &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); + +void npz_save_all(std::string zipname, npz_t &map) { + for (auto it = map.begin(); it != map.end(); it++) { + std::string mode = (it == map.begin()) ? "w" : "a"; + NpyArray &arr = it->second; + if (arr.type == 'f') { + // support float only for now + assert(arr.word_size = sizeof(float)); + npz_save(zipname, it->first, it->second, mode); + } else if (arr.type == 'i') { + // support int8/int16 only + if (arr.word_size == sizeof(int8_t)) { + npz_save(zipname, it->first, it->second, mode); + } else if (arr.word_size == sizeof(int16_t)) { + npz_save(zipname, it->first, it->second, mode); + } else { + assert(0); + } + } else if (arr.type == 'u') { + // support uint8/uint16/uint32 + if (arr.word_size == sizeof(uint8_t)) { + npz_save(zipname, it->first, it->second, mode); + } else if (arr.word_size == sizeof(uint16_t)) { + npz_save(zipname, it->first, it->second, mode); + } else if (arr.word_size == sizeof(uint32_t)) { + npz_save(zipname, it->first, it->second, mode); + } else { + assert(0); + } + } else if (arr.type == 'b') { + // not support yet + assert(0); + } else if (arr.type == 'c') { + // not support yet + assert(0); + } else { + // invalid type + std::cout << "libcnpy error: invalid array type " + << arr.type << ", for " << it->first << "\n"; + assert(0); + } + } +} + +static NpyArray load_the_npy_file(FILE* fp) { + std::vector shape; + size_t word_size; + char type; + bool fortran_order; + parse_npy_header(fp,word_size,type,shape,fortran_order); + + NpyArray arr(shape, word_size, type, fortran_order); + size_t nread = fread(arr.data(),1,arr.num_bytes(),fp); + if(nread != arr.num_bytes()) + throw std::runtime_error("load_the_npy_file: failed fread"); + return arr; +} + +static NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, + uint32_t uncompr_bytes) { + std::vector buffer_compr(compr_bytes); + std::vector buffer_uncompr(uncompr_bytes); + size_t nread = fread(&buffer_compr[0],1,compr_bytes,fp); + if(nread != compr_bytes) + throw std::runtime_error("load_the_npy_file: failed fread"); + + int err; + z_stream d_stream; + + d_stream.zalloc = Z_NULL; + d_stream.zfree = Z_NULL; + d_stream.opaque = Z_NULL; + d_stream.avail_in = 0; + d_stream.next_in = Z_NULL; + err = inflateInit2(&d_stream, -MAX_WBITS); + assert(err = 0); + + d_stream.avail_in = compr_bytes; + d_stream.next_in = &buffer_compr[0]; + d_stream.avail_out = uncompr_bytes; + d_stream.next_out = &buffer_uncompr[0]; + + err = inflate(&d_stream, Z_FINISH); + assert(err = 0); + err = inflateEnd(&d_stream); + assert(err = 0); + + std::vector shape; + size_t word_size; + char type; + bool fortran_order; + parse_npy_header(&buffer_uncompr[0],word_size,type,shape,fortran_order); + + NpyArray array(shape, word_size, type, fortran_order); + + size_t offset = uncompr_bytes - array.num_bytes(); + memcpy(array.data(),&buffer_uncompr[0]+offset,array.num_bytes()); + + return array; +} + +npz_t npz_load(std::string fname) { + npz_t arrays; + arrays.clear(); + + FILE* fp = fopen(fname.c_str(),"rb"); + if(!fp) { + //throw std::runtime_error("npz_load: Error! Unable to open file "+fname+"!"); + return arrays; + } + + while(1) { + std::vector local_header(30); + size_t headerres = fread(&local_header[0],sizeof(char),30,fp); + if(headerres != 30) + break; + + //if we've reached the global header, stop reading + if(local_header[2] != 0x03 || local_header[3] != 0x04) break; + + //read in the variable name + uint16_t name_len = *(uint16_t*) &local_header[26]; + std::string varname(name_len,' '); + size_t vname_res = fread(&varname[0],sizeof(char),name_len,fp); + if(vname_res != name_len) + throw std::runtime_error("npz_load: failed fread"); + + //erase the lagging .npy + varname.erase(varname.end()-4,varname.end()); + + //read in the extra field + uint16_t extra_field_len = *(uint16_t*) &local_header[28]; + if(extra_field_len > 0) { + std::vector buff(extra_field_len); + size_t efield_res = fread(&buff[0],sizeof(char),extra_field_len,fp); + if(efield_res != extra_field_len) + throw std::runtime_error("npz_load: failed fread"); + } + + uint16_t compr_method = *reinterpret_cast(&local_header[0]+8); + uint32_t compr_bytes = *reinterpret_cast(&local_header[0]+18); + uint32_t uncompr_bytes = *reinterpret_cast(&local_header[0]+22); + + if(compr_method == 0) {arrays[varname] = load_the_npy_file(fp);} + else {arrays[varname] = load_the_npz_array(fp,compr_bytes,uncompr_bytes);} + } + + fclose(fp); + return arrays; +} + +NpyArray npz_load(std::string fname, std::string varname) { + FILE* fp = fopen(fname.c_str(),"rb"); + + if(!fp) throw std::runtime_error("npz_load: Unable to open file "+fname); + + while(1) { + std::vector local_header(30); + size_t header_res = fread(&local_header[0],sizeof(char),30,fp); + if(header_res != 30) + throw std::runtime_error("npz_load: failed fread"); + + //if we've reached the global header, stop reading + if(local_header[2] != 0x03 || local_header[3] != 0x04) break; + + //read in the variable name + uint16_t name_len = *(uint16_t*) &local_header[26]; + std::string vname(name_len,' '); + size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp); + if(vname_res != name_len) + throw std::runtime_error("npz_load: failed fread"); + vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy + + //read in the extra field + uint16_t extra_field_len = *(uint16_t*) &local_header[28]; + fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field + + uint16_t compr_method = *reinterpret_cast(&local_header[0]+8); + uint32_t compr_bytes = *reinterpret_cast(&local_header[0]+18); + uint32_t uncompr_bytes = *reinterpret_cast(&local_header[0]+22); + + if(vname == varname) { + NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) + : load_the_npz_array(fp,compr_bytes,uncompr_bytes); + fclose(fp); + return array; + } + else { + //skip past the data + uint32_t size = *(uint32_t*) &local_header[22]; + fseek(fp,size,SEEK_CUR); + } + } + + fclose(fp); + + //if we get here, we haven't found the variable in the file + throw std::runtime_error("npz_load: Variable name "+varname+" not found in "+fname); +} + +NpyArray npy_load(std::string fname) { + + FILE* fp = fopen(fname.c_str(), "rb"); + + if(!fp) throw std::runtime_error("npy_load: Unable to open file "+fname); + + NpyArray arr = load_the_npy_file(fp); + + fclose(fp); + return arr; +} + +} // namespace cnpy diff --git a/cnpy/cnpy.h b/cnpy/cnpy.h new file mode 100644 index 000000000..3024cc80b --- /dev/null +++ b/cnpy/cnpy.h @@ -0,0 +1,117 @@ +//Copyright (C) 2011 Carl Rogers +//Released under MIT License +//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php + +#ifndef LIBCNPY_H_ +#define LIBCNPY_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cnpy { + +struct NpyArray { + NpyArray(const std::vector& _shape, size_t _word_size, + char _type, bool _fortran_order) + : shape(_shape), word_size(_word_size), + type(_type), fortran_order(_fortran_order) { + num_vals = 1; + for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i]; + data_holder = std::shared_ptr>( + new std::vector(num_vals * word_size)); + } + + NpyArray() : shape(0), word_size(0), type(0), fortran_order(0), num_vals(0) {} + + template + T* data() { + return reinterpret_cast(&(*data_holder)[0]); + } + + template + const T* data() const { + return reinterpret_cast(&(*data_holder)[0]); + } + + template + std::vector as_vec() const { + const T* p = data(); + return std::vector(p, p+num_vals); + } + + size_t num_bytes() const { + return data_holder->size(); + } + + std::shared_ptr> data_holder; + std::vector shape; + size_t word_size; + char type; + bool fortran_order; + size_t num_vals; +}; + +using npz_t = std::map; + +std::vector create_npy_header(const std::vector& shape, + size_t word_size, char type); +void parse_npy_header(FILE* fp,size_t& word_size, char& type, + std::vector& shape, bool& fortran_order); +void parse_npy_header(unsigned char* buffer, size_t& word_size, char& type, + std::vector& shape, bool& fortran_order); +void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, + size_t& global_header_offset); +npz_t npz_load(std::string fname); +NpyArray npz_load(std::string fname, std::string varname); +NpyArray npy_load(std::string fname); + +template +std::vector& operator+=(std::vector& lhs, const T rhs); +template<> +std::vector& operator+=(std::vector& lhs, const std::string rhs); +template<> +std::vector& operator+=(std::vector& lhs, const char* rhs); + +template +void npy_save(std::string fname, const T* data, + const std::vector shape, std::string mode = "w"); +template +void npy_save(std::string fname, const std::vector data, + std::string mode = "w"); + +template +void npz_save(std::string zipname, std::string fname, + const T* data, const std::vector& shape, + std::string mode = "w"); +template +void npz_save(std::string zipname, std::string fname, + const std::vector &data, std::string mode = "w"); +template +void npz_save(std::string zipname, std::string fname, + NpyArray &array, std::string mode = "w"); + + +template +void npz_add_array(npz_t &map, std::string fname, + const T* data, const std::vector shape); +template +void npz_add_array(npz_t &map, std::string fname, + const std::vector &data); + +void npz_save_all(std::string zipname, npz_t &map); + +} // namespace cnpy + +#endif diff --git a/cnpy/example1.cpp b/cnpy/example1.cpp new file mode 100644 index 000000000..b304ecd57 --- /dev/null +++ b/cnpy/example1.cpp @@ -0,0 +1,70 @@ +#include"cnpy.h" +#include +#include +#include +#include +#include + +const int Nx = 128; +const int Ny = 64; +const int Nz = 32; + +int main() +{ + //set random seed so that result is reproducible (for testing) + srand(0); + + //create random data + std::vector> data(Nx*Ny*Nz); + for(int i = 0;i < Nx*Ny*Nz;i++) data[i] = std::complex(rand(),rand()); + + //save it to file + cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"w"); + + //load it into a new array + cnpy::NpyArray arr = cnpy::npy_load("arr1.npy"); + std::complex* loaded_data = arr.data>(); + + //make sure the loaded data matches the saved data + assert(arr.word_size == sizeof(std::complex)); + assert(arr.shape.size() == 3 && arr.shape[0] == Nz && arr.shape[1] == Ny && arr.shape[2] == Nx); + for(int i = 0; i < Nx*Ny*Nz;i++) assert(data[i] == loaded_data[i]); + + //append the same data to file + //npy array on file now has shape (Nz+Nz,Ny,Nx) + cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"a"); + + //now write to an npz file + //non-array variables are treated as 1D arrays with 1 element + double myVar1 = 1.2; + char myVar2 = 'a'; + cnpy::npz_save("out.npz","myVar1",&myVar1,{1},"w"); //"w" overwrites any existing file + cnpy::npz_save("out.npz","myVar2",&myVar2,{1},"a"); //"a" appends to the file we created above + cnpy::npz_save("out.npz","arr1",&data[0],{Nz,Ny,Nx},"a"); //"a" appends to the file we created above + + //load a single var from the npz file + cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1"); + + //load the entire npz file + cnpy::npz_t my_npz = cnpy::npz_load("out.npz"); + + // add a new array + std::vector> new_data(20); + for(int i = 0;i < 20;i++) + new_data[i] = std::complex(i, 20 - i); + cnpy::npz_add_array >(my_npz, "new_arr", new_data); + + //save the entire npz file back + // TODO: this has some problem, because npz_save_all() is assuming + // all array in same type, this is because NPArray struct does not + // convey a type info. + cnpy::npz_save_all("out_new.npz", my_npz); + + //check that the loaded myVar1 matches myVar1 + cnpy::NpyArray arr_mv1 = my_npz["myVar1"]; + double* mv1 = arr_mv1.data(); + assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1); + assert(mv1[0] == myVar1); + + return 0; +} diff --git a/cnpy/mat2npz b/cnpy/mat2npz new file mode 100644 index 000000000..c1bbd3e4d --- /dev/null +++ b/cnpy/mat2npz @@ -0,0 +1,18 @@ +#!/usr/bin/env python + +import sys +from numpy import savez +from scipy.io import loadmat + +assert len(sys.argv) > 1 + +files = sys.argv[1:] + +for f in files: + mat_vars = loadmat(f) + mat_vars.pop('__version__') + mat_vars.pop('__header__') + mat_vars.pop('__globals__') + + fn = f.replace('.mat','.npz') + savez(fn,**mat_vars) diff --git a/cnpy/npy2mat b/cnpy/npy2mat new file mode 100644 index 000000000..fdd5da758 --- /dev/null +++ b/cnpy/npy2mat @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +import sys +from numpy import load +from scipy.io import savemat + +assert len(sys.argv) > 1 + +files = sys.argv[1:] + +for f in files: + data = load(f) + fn = f.replace('.npy','') + fn = fn.replace('.','_') + savemat(fn,{fn : data}) diff --git a/cnpy/npz2mat b/cnpy/npz2mat new file mode 100755 index 000000000..2e317d6ee --- /dev/null +++ b/cnpy/npz2mat @@ -0,0 +1,15 @@ +#!/usr/bin/env python + +import sys +from numpy import load +from scipy.io import savemat + +assert len(sys.argv) > 1 + +files = sys.argv[1:] + +for f in files: + data = load(f) + fn = f.replace('.npz','') + fn = fn.replace('.','_') #matlab cant handle dots + savemat(fn,data)