add cnpy
commit 2f56f4c23ea840f9d15c43801368cf4a089efa84 Author: sophgo-forum-service <forum_service@sophgo.com> Date: Sun May 19 23:03:28 2024 +0800 [feat] cnpy opensource for cv18xx soc. - e9d84e
This commit is contained in:
@ -20,3 +20,4 @@
|
|||||||
| cvikernel | cvikernel | https://github.com/sophgo/cvikernel.git | sg200x-dev | 9f1f57a |
|
| cvikernel | cvikernel | https://github.com/sophgo/cvikernel.git | sg200x-dev | 9f1f57a |
|
||||||
| cviruntime | cviruntime | https://github.com/sophgo/cviruntime.git | sg200x-dev | 3f49386 |
|
| cviruntime | cviruntime | https://github.com/sophgo/cviruntime.git | sg200x-dev | 3f49386 |
|
||||||
| cvimath | cvimath | https://github.com/sophgo/cvimath.git | sg200x-dev | ce8705f |
|
| cvimath | cvimath | https://github.com/sophgo/cvimath.git | sg200x-dev | ce8705f |
|
||||||
|
| cnpy | cnpy | https://github.com/sophgo/cnpy.git | sg200x-dev | 2f56f4c |
|
||||||
|
|||||||
65
cnpy/CMakeLists.txt
Normal file
65
cnpy/CMakeLists.txt
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR)
|
||||||
|
if(COMMAND cmake_policy)
|
||||||
|
cmake_policy(SET CMP0003 NEW)
|
||||||
|
endif(COMMAND cmake_policy)
|
||||||
|
|
||||||
|
if (NOT DEFINED LLVM_MAIN_SRC_DIR)
|
||||||
|
project(CNPY)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||||
|
|
||||||
|
message(STATUS "CMAKE_SYSROOT ${CMAKE_SYSROOT}")
|
||||||
|
include_directories(${CMAKE_SYSROOT}/include)
|
||||||
|
|
||||||
|
include_directories(${CMAKE_INSTALL_PREFIX}/include)
|
||||||
|
link_directories(${CMAKE_INSTALL_PREFIX}/lib)
|
||||||
|
|
||||||
|
set(ENV{PKG_CONFIG_DIR} "")
|
||||||
|
set(ENV{PKG_CONFIG_LIBDIR} "${CMAKE_SYSROOT}/usr/lib/pkgconfig:${CMAKE_SYSROOT}/usr/share/pkgconfig")
|
||||||
|
set(ENV{PKG_CONFIG_SYSROOT_DIR} ${CMAKE_SYSROOT})
|
||||||
|
find_package(ZLIB)
|
||||||
|
|
||||||
|
option(ENABLE_STATIC "Build static (.a) library" ON)
|
||||||
|
add_library(cnpy SHARED "cnpy.cpp")
|
||||||
|
target_link_libraries(cnpy z)
|
||||||
|
install(TARGETS "cnpy" LIBRARY DESTINATION lib PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
|
||||||
|
|
||||||
|
if(ENABLE_STATIC)
|
||||||
|
add_library(cnpy-static STATIC "cnpy.cpp")
|
||||||
|
set_target_properties(cnpy-static PROPERTIES OUTPUT_NAME "cnpy")
|
||||||
|
install(TARGETS "cnpy-static" ARCHIVE DESTINATION lib)
|
||||||
|
endif(ENABLE_STATIC)
|
||||||
|
|
||||||
|
install(FILES "cnpy.h" DESTINATION include)
|
||||||
|
install(FILES "mat2npz" "npy2mat" "npz2mat" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
|
||||||
|
|
||||||
|
add_executable(example1 example1.cpp)
|
||||||
|
target_link_libraries(example1 cnpy)
|
||||||
|
|
||||||
|
else()
|
||||||
|
message(STATUS "CNPY LLVM Tree Build")
|
||||||
|
|
||||||
|
add_custom_target(CNPY)
|
||||||
|
set_target_properties(CNPY PROPERTIES FOLDER Third_party)
|
||||||
|
add_dependencies(CNPY
|
||||||
|
cnpy
|
||||||
|
cnpy_example1
|
||||||
|
)
|
||||||
|
|
||||||
|
set(LLVM_OPTIONAL_SOURCES
|
||||||
|
cnpy.cpp
|
||||||
|
example1.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
find_package(ZLIB REQUIRED)
|
||||||
|
|
||||||
|
add_llvm_library(cnpy SHARED
|
||||||
|
cnpy.cpp)
|
||||||
|
target_link_libraries(cnpy ${ZLIB_LIBRARIES})
|
||||||
|
|
||||||
|
install(FILES "cnpy.h" DESTINATION include)
|
||||||
|
add_llvm_executable(cnpy_example1
|
||||||
|
example1.cpp)
|
||||||
|
target_link_libraries(cnpy_example1 PRIVATE
|
||||||
|
cnpy)
|
||||||
|
endif()
|
||||||
21
cnpy/LICENSE
Normal file
21
cnpy/LICENSE
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
The MIT License
|
||||||
|
|
||||||
|
Copyright (c) Carl Rogers, 2011
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
55
cnpy/README.md
Normal file
55
cnpy/README.md
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# Purpose:
|
||||||
|
|
||||||
|
NumPy offers the `save` method for easy saving of arrays into .npy and `savez` for zipping multiple .npy arrays together into a .npz file.
|
||||||
|
|
||||||
|
`cnpy` lets you read and write to these formats in C++.
|
||||||
|
|
||||||
|
The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python.
|
||||||
|
|
||||||
|
Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size.
|
||||||
|
The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary.
|
||||||
|
|
||||||
|
Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice.
|
||||||
|
|
||||||
|
# Installation:
|
||||||
|
|
||||||
|
Default installation directory is /usr/local.
|
||||||
|
To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/dir` to the cmake invocation in step 4.
|
||||||
|
|
||||||
|
1. get [cmake](www.cmake.org)
|
||||||
|
2. create a build directory, say $HOME/build
|
||||||
|
3. cd $HOME/build
|
||||||
|
4. cmake /path/to/cnpy
|
||||||
|
5. make
|
||||||
|
6. make install
|
||||||
|
|
||||||
|
# Using:
|
||||||
|
|
||||||
|
To use, `#include"cnpy.h"` in your source code. Compile the source code mycode.cpp as
|
||||||
|
|
||||||
|
```bash
|
||||||
|
g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++11
|
||||||
|
```
|
||||||
|
|
||||||
|
# Description:
|
||||||
|
|
||||||
|
There are two functions for writing data: `npy_save` and `npz_save`.
|
||||||
|
|
||||||
|
There are 3 functions for reading:
|
||||||
|
- `npy_load` will load a .npy file.
|
||||||
|
- `npz_load(fname)` will load a .npz and return a dictionary of NpyArray structues.
|
||||||
|
- `npz_load(fname,varname)` will load and return the NpyArray for data varname from the specified .npz file.
|
||||||
|
|
||||||
|
The data structure for loaded data is below.
|
||||||
|
Data is accessed via the `data<T>()`-method, which returns a pointer of the specified type (which must match the underlying datatype of the data).
|
||||||
|
The array shape and word size are read from the npy header.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
struct NpyArray {
|
||||||
|
std::vector<size_t> shape;
|
||||||
|
size_t word_size;
|
||||||
|
template<typename T> T* data();
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
See [example1.cpp](example1.cpp) for examples of how to use the library. example1 will also be build during cmake installation.
|
||||||
785
cnpy/cnpy.cpp
Normal file
785
cnpy/cnpy.cpp
Normal file
@ -0,0 +1,785 @@
|
|||||||
|
//Copyright (C) 2011 Carl Rogers
|
||||||
|
//Released under MIT License
|
||||||
|
//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
|
||||||
|
#define _FILE_OFFSET_BITS 64
|
||||||
|
#define __USE_FILE_OFFSET64
|
||||||
|
#define __USE_LARGEFILE64
|
||||||
|
#define _LARGEFILE64_SOURCE
|
||||||
|
|
||||||
|
#include"cnpy.h"
|
||||||
|
#include<complex>
|
||||||
|
#include<cstdlib>
|
||||||
|
#include<algorithm>
|
||||||
|
#include<cstring>
|
||||||
|
#include<iomanip>
|
||||||
|
#include<stdint.h>
|
||||||
|
#include<stdexcept>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
|
#define ZIP64_LIMIT ((((size_t)1) << 31) - 1)
|
||||||
|
|
||||||
|
namespace cnpy {
|
||||||
|
|
||||||
|
static char BigEndianTest() {
|
||||||
|
int x = 1;
|
||||||
|
return (((char *)&x)[0]) ? '<' : '>';
|
||||||
|
}
|
||||||
|
|
||||||
|
static char map_type(const std::type_info& t)
|
||||||
|
{
|
||||||
|
if( t == typeid(float) ) return 'f';
|
||||||
|
if( t == typeid(double) ) return 'f';
|
||||||
|
if( t == typeid(long double) ) return 'f';
|
||||||
|
|
||||||
|
if( t == typeid(int) ) return 'i';
|
||||||
|
if( t == typeid(char) ) return 'i';
|
||||||
|
if( t == typeid(signed char) ) return 'i';
|
||||||
|
if( t == typeid(short) ) return 'i';
|
||||||
|
if( t == typeid(long) ) return 'i';
|
||||||
|
if( t == typeid(long long) ) return 'i';
|
||||||
|
|
||||||
|
if( t == typeid(unsigned char) ) return 'u';
|
||||||
|
if( t == typeid(unsigned short) ) return 'u';
|
||||||
|
if( t == typeid(unsigned long) ) return 'u';
|
||||||
|
if( t == typeid(unsigned long long) ) return 'u';
|
||||||
|
if( t == typeid(unsigned int) ) return 'u';
|
||||||
|
|
||||||
|
if( t == typeid(bool) ) return 'b';
|
||||||
|
|
||||||
|
if( t == typeid(std::complex<float>) ) return 'c';
|
||||||
|
if( t == typeid(std::complex<double>) ) return 'c';
|
||||||
|
if( t == typeid(std::complex<long double>) ) return 'c';
|
||||||
|
|
||||||
|
std::cout << "libcnpy error: unknown type_id "
|
||||||
|
<< t.name() << "\n";
|
||||||
|
// ref: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin
|
||||||
|
assert(0);
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
|
||||||
|
//write in little endian
|
||||||
|
for(size_t byte = 0; byte < sizeof(T); byte++) {
|
||||||
|
char val = *((const char*)&rhs+byte);
|
||||||
|
lhs.push_back(val);
|
||||||
|
}
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs) {
|
||||||
|
lhs.insert(lhs.end(),rhs.begin(),rhs.end());
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs) {
|
||||||
|
//write in little endian
|
||||||
|
size_t len = strlen(rhs);
|
||||||
|
lhs.reserve(len);
|
||||||
|
for(size_t byte = 0; byte < len; byte++) {
|
||||||
|
lhs.push_back(rhs[byte]);
|
||||||
|
}
|
||||||
|
return lhs;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<char> create_npy_header(const std::vector<size_t>& shape,
|
||||||
|
size_t word_size, char type) {
|
||||||
|
std::vector<char> dict;
|
||||||
|
dict += "{'descr': '";
|
||||||
|
dict += BigEndianTest();
|
||||||
|
dict += type;
|
||||||
|
dict += std::to_string(word_size);
|
||||||
|
dict += "', 'fortran_order': False, 'shape': (";
|
||||||
|
dict += std::to_string(shape[0]);
|
||||||
|
for(size_t i = 1;i < shape.size();i++) {
|
||||||
|
dict += ", ";
|
||||||
|
dict += std::to_string(shape[i]);
|
||||||
|
}
|
||||||
|
if(shape.size() == 1) dict += ",";
|
||||||
|
dict += "), }";
|
||||||
|
//pad with spaces so that preamble+dict is modulo 16 bytes.
|
||||||
|
//preamble is 10 bytes. dict needs to end with \n
|
||||||
|
int remainder = 16 - (10 + dict.size()) % 16;
|
||||||
|
dict.insert(dict.end(),remainder,' ');
|
||||||
|
dict.back() = '\n';
|
||||||
|
|
||||||
|
std::vector<char> header;
|
||||||
|
header += (char) 0x93;
|
||||||
|
header += "NUMPY";
|
||||||
|
header += (char) 0x01; //major version of numpy format
|
||||||
|
header += (char) 0x00; //minor version of numpy format
|
||||||
|
header += (uint16_t) dict.size();
|
||||||
|
header.insert(header.end(),dict.begin(),dict.end());
|
||||||
|
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_npy_header(unsigned char* buffer, size_t& word_size, char& type,
|
||||||
|
std::vector<size_t>& shape, bool& fortran_order) {
|
||||||
|
//std::string magic_string(buffer,6);
|
||||||
|
//uint8_t major_version = *reinterpret_cast<uint8_t*>(buffer+6);
|
||||||
|
//uint8_t minor_version = *reinterpret_cast<uint8_t*>(buffer+7);
|
||||||
|
uint16_t header_len = *reinterpret_cast<uint16_t*>(buffer+8);
|
||||||
|
std::string header(reinterpret_cast<char*>(buffer+9),header_len);
|
||||||
|
|
||||||
|
size_t loc1, loc2;
|
||||||
|
|
||||||
|
//fortran order
|
||||||
|
loc1 = header.find("fortran_order")+16;
|
||||||
|
fortran_order = (header.substr(loc1,4) == "True" ? true : false);
|
||||||
|
|
||||||
|
//shape
|
||||||
|
loc1 = header.find("(");
|
||||||
|
loc2 = header.find(")");
|
||||||
|
|
||||||
|
std::regex num_regex("[0-9][0-9]*");
|
||||||
|
std::smatch sm;
|
||||||
|
shape.clear();
|
||||||
|
|
||||||
|
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
|
||||||
|
while(std::regex_search(str_shape, sm, num_regex)) {
|
||||||
|
shape.push_back(std::stoi(sm[0].str()));
|
||||||
|
str_shape = sm.suffix().str();
|
||||||
|
}
|
||||||
|
|
||||||
|
//endian, word size, data type
|
||||||
|
//byte order code | stands for not applicable.
|
||||||
|
//not sure when this applies except for byte array
|
||||||
|
loc1 = header.find("descr")+9;
|
||||||
|
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
|
||||||
|
assert(littleEndian);
|
||||||
|
|
||||||
|
type = header[loc1+1];
|
||||||
|
//assert(type == map_type(T));
|
||||||
|
|
||||||
|
std::string str_ws = header.substr(loc1+2);
|
||||||
|
loc2 = str_ws.find("'");
|
||||||
|
word_size = atoi(str_ws.substr(0,loc2).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_npy_header(FILE* fp, size_t& word_size, char& type,
|
||||||
|
std::vector<size_t>& shape, bool& fortran_order) {
|
||||||
|
char buffer[256];
|
||||||
|
size_t res = fread(buffer,sizeof(char),11,fp);
|
||||||
|
if(res != 11)
|
||||||
|
throw std::runtime_error("parse_npy_header: failed fread");
|
||||||
|
std::string header = fgets(buffer,256,fp);
|
||||||
|
assert(header[header.size()-1] == '\n');
|
||||||
|
|
||||||
|
size_t loc1, loc2;
|
||||||
|
|
||||||
|
//fortran order
|
||||||
|
loc1 = header.find("fortran_order");
|
||||||
|
if (loc1 == std::string::npos)
|
||||||
|
throw std::runtime_error("parse_npy_header: "
|
||||||
|
"failed to find header keyword: 'fortran_order'");
|
||||||
|
loc1 += 16;
|
||||||
|
fortran_order = (header.substr(loc1,4) == "True" ? true : false);
|
||||||
|
|
||||||
|
//shape
|
||||||
|
loc1 = header.find("(");
|
||||||
|
loc2 = header.find(")");
|
||||||
|
if (loc1 == std::string::npos || loc2 == std::string::npos)
|
||||||
|
throw std::runtime_error("parse_npy_header: "
|
||||||
|
"failed to find header keyword: '(' or ')'");
|
||||||
|
|
||||||
|
std::regex num_regex("[0-9][0-9]*");
|
||||||
|
std::smatch sm;
|
||||||
|
shape.clear();
|
||||||
|
|
||||||
|
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
|
||||||
|
while(std::regex_search(str_shape, sm, num_regex)) {
|
||||||
|
shape.push_back(std::stoi(sm[0].str()));
|
||||||
|
str_shape = sm.suffix().str();
|
||||||
|
}
|
||||||
|
|
||||||
|
//endian, word size, data type
|
||||||
|
//byte order code | stands for not applicable.
|
||||||
|
//not sure when this applies except for byte array
|
||||||
|
loc1 = header.find("descr");
|
||||||
|
if (loc1 == std::string::npos)
|
||||||
|
throw std::runtime_error("parse_npy_header: "
|
||||||
|
"failed to find header keyword: 'descr'");
|
||||||
|
loc1 += 9;
|
||||||
|
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
|
||||||
|
assert(littleEndian);
|
||||||
|
|
||||||
|
type = header[loc1+1];
|
||||||
|
//assert(type == map_type(T));
|
||||||
|
|
||||||
|
std::string str_ws = header.substr(loc1+2);
|
||||||
|
loc2 = str_ws.find("'");
|
||||||
|
word_size = atoi(str_ws.substr(0,loc2).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size,
|
||||||
|
size_t& global_header_offset) {
|
||||||
|
std::vector<char> footer(22);
|
||||||
|
fseek(fp,-22,SEEK_END);
|
||||||
|
size_t res = fread(&footer[0],sizeof(char),22,fp);
|
||||||
|
if(res != 22)
|
||||||
|
throw std::runtime_error("parse_zip_footer: failed fread");
|
||||||
|
|
||||||
|
uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
|
||||||
|
disk_no = *(uint16_t*) &footer[4];
|
||||||
|
disk_start = *(uint16_t*) &footer[6];
|
||||||
|
nrecs_on_disk = *(uint16_t*) &footer[8];
|
||||||
|
nrecs = *(uint16_t*) &footer[10];
|
||||||
|
global_header_size = *(uint32_t*) &footer[12];
|
||||||
|
global_header_offset = *(uint32_t*) &footer[16];
|
||||||
|
comment_len = *(uint16_t*) &footer[20];
|
||||||
|
|
||||||
|
assert(disk_no == 0);
|
||||||
|
assert(disk_start == 0);
|
||||||
|
assert(nrecs_on_disk == nrecs);
|
||||||
|
assert(comment_len == 0);
|
||||||
|
if (global_header_offset >= 0xFFFFFFFF) {
|
||||||
|
//get global header offset from extra data
|
||||||
|
std::vector<char> zip64endrec_header(56);
|
||||||
|
fseek(fp,-98,SEEK_END);
|
||||||
|
size_t res = fread(&zip64endrec_header[0],sizeof(char),56,fp);
|
||||||
|
global_header_offset = *(uint64_t*) &zip64endrec_header[48];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npy_save(std::string fname, const T* data,
|
||||||
|
const std::vector<size_t> shape, std::string mode) {
|
||||||
|
FILE* fp = NULL;
|
||||||
|
//if appending, the shape of existing + new data
|
||||||
|
std::vector<size_t> true_data_shape;
|
||||||
|
|
||||||
|
if(mode == "a") fp = fopen(fname.c_str(),"r+b");
|
||||||
|
|
||||||
|
if(fp) {
|
||||||
|
//file exists. we need to append to it. read the header, modify the array size
|
||||||
|
size_t word_size;
|
||||||
|
char type;
|
||||||
|
bool fortran_order;
|
||||||
|
parse_npy_header(fp,word_size,type,true_data_shape,fortran_order);
|
||||||
|
assert(!fortran_order);
|
||||||
|
|
||||||
|
if(word_size != sizeof(T)) {
|
||||||
|
std::cout << "libnpy error: " << fname << " has word size "
|
||||||
|
<< word_size << " but npy_save appending data sized "
|
||||||
|
<< sizeof(T) << "\n";
|
||||||
|
assert( word_size == sizeof(T) );
|
||||||
|
}
|
||||||
|
if(true_data_shape.size() != shape.size()) {
|
||||||
|
std::cout << "libnpy error: npy_save attempting to append "
|
||||||
|
<< "misdimensioned data to " << fname << "\n";
|
||||||
|
assert(true_data_shape.size() != shape.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
for(size_t i = 1; i < shape.size(); i++) {
|
||||||
|
if(shape[i] != true_data_shape[i]) {
|
||||||
|
std::cout << "libnpy error: npy_save attempting to append "
|
||||||
|
<< "misshaped data to " << fname << "\n";
|
||||||
|
assert(shape[i] == true_data_shape[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true_data_shape[0] += shape[0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fp = fopen(fname.c_str(),"wb");
|
||||||
|
true_data_shape = shape;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t word_size = sizeof(T);
|
||||||
|
char type = map_type(typeid(T));
|
||||||
|
std::vector<char> header = create_npy_header(true_data_shape, word_size, type);
|
||||||
|
size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
|
||||||
|
|
||||||
|
fseek(fp,0,SEEK_SET);
|
||||||
|
fwrite(&header[0],sizeof(char),header.size(),fp);
|
||||||
|
fseek(fp,0,SEEK_END);
|
||||||
|
fwrite(data,sizeof(T),nels,fp);
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void npy_save<std::complex<double> >(std::string,
|
||||||
|
const std::complex<double>*,
|
||||||
|
const std::vector<size_t>, std::string);
|
||||||
|
template void npy_save<double>(std::string, const double*,
|
||||||
|
const std::vector<size_t>, std::string);
|
||||||
|
template void npy_save<char>(std::string, const char*,
|
||||||
|
const std::vector<size_t>, std::string);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npy_save(std::string fname, const std::vector<T> data,
|
||||||
|
std::string mode) {
|
||||||
|
std::vector<size_t> shape;
|
||||||
|
shape.push_back(data.size());
|
||||||
|
npy_save<T>(fname, &data[0], shape, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npz_save(std::string zipname, std::string fname,
|
||||||
|
const T* data, const std::vector<size_t>& shape,
|
||||||
|
std::string mode) {
|
||||||
|
//first, append a .npy to the fname
|
||||||
|
fname += ".npy";
|
||||||
|
|
||||||
|
//now, on with the show
|
||||||
|
FILE* fp = NULL;
|
||||||
|
uint16_t nrecs = 0;
|
||||||
|
size_t global_header_offset = 0;
|
||||||
|
std::vector<char> global_header;
|
||||||
|
|
||||||
|
if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
|
||||||
|
|
||||||
|
if(fp) {
|
||||||
|
//zip file exists. we need to add a new npy file to it.
|
||||||
|
//first read the footer.
|
||||||
|
//this gives us the offset and size of the global header
|
||||||
|
//then read and store the global header.
|
||||||
|
//below, we will write the the new data at the start of the global
|
||||||
|
//header then append the global header and footer below it
|
||||||
|
size_t global_header_size;
|
||||||
|
parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
|
||||||
|
fseek(fp,global_header_offset,SEEK_SET);
|
||||||
|
global_header.resize(global_header_size);
|
||||||
|
size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
|
||||||
|
if(res != global_header_size){
|
||||||
|
throw std::runtime_error("npz_save: "
|
||||||
|
"header read error while adding to existing zip");
|
||||||
|
}
|
||||||
|
fseek(fp,global_header_offset,SEEK_SET);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fp = fopen(zipname.c_str(),"wb");
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t word_size = sizeof(T);
|
||||||
|
char type = map_type(typeid(T));
|
||||||
|
std::vector<char> npy_header;
|
||||||
|
if(shape.size() != 0){
|
||||||
|
npy_header = create_npy_header(shape, word_size, type);
|
||||||
|
}else{
|
||||||
|
std::cerr << "[Warning] zip name: " << fname <<" npz shape size is 0, skip it\n";
|
||||||
|
fclose(fp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
|
||||||
|
size_t nbytes = nels*sizeof(T) + npy_header.size();
|
||||||
|
|
||||||
|
//get the CRC of the data to be added
|
||||||
|
uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size());
|
||||||
|
crc = crc32(crc,(const uint8_t*)data,nels*sizeof(T));
|
||||||
|
|
||||||
|
//build the local header
|
||||||
|
std::vector<char> local_header;
|
||||||
|
local_header += "PK"; //first part of sig
|
||||||
|
local_header += (uint16_t) 0x0403; //second part of sig
|
||||||
|
local_header += (uint16_t) 20; //min version to extract
|
||||||
|
local_header += (uint16_t) 0; //general purpose bit flag
|
||||||
|
local_header += (uint16_t) 0; //compression method
|
||||||
|
local_header += (uint16_t) 0; //file last mod time
|
||||||
|
local_header += (uint16_t) 0; //file last mod date
|
||||||
|
local_header += (uint32_t) crc; //crc
|
||||||
|
local_header += (uint32_t) nbytes; //compressed size
|
||||||
|
local_header += (uint32_t) nbytes; //uncompressed size
|
||||||
|
local_header += (uint16_t) fname.size(); //fname length
|
||||||
|
local_header += (uint16_t) 0; //extra field length
|
||||||
|
local_header += fname;
|
||||||
|
|
||||||
|
fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
|
||||||
|
fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
|
||||||
|
fwrite(data,sizeof(T),nels,fp);
|
||||||
|
/*
|
||||||
|
Only support global_header_offset is larger than ZIP64_LIMIT.
|
||||||
|
Not support size is larger than ZIP64_LIMIT now.
|
||||||
|
*/
|
||||||
|
if (global_header_offset + nbytes + local_header.size() >= ZIP64_LIMIT) {
|
||||||
|
//structCentralDir = "<4s4B4HL2L5H2L"
|
||||||
|
//centdir = struct.pack(structCentralDir,
|
||||||
|
//stringCentralDir, create_version,
|
||||||
|
//zinfo.create_system, extract_version, zinfo.reserved,
|
||||||
|
//flag_bits, zinfo.compress_type, dostime, dosdate,
|
||||||
|
//zinfo.CRC, compress_size, file_size,
|
||||||
|
//len(filename), len(extra_data), len(zinfo.comment),
|
||||||
|
//0, zinfo.internal_attr, zinfo.external_attr,
|
||||||
|
//header_offset)
|
||||||
|
|
||||||
|
//build global header
|
||||||
|
global_header += "PK"; //first part of sig
|
||||||
|
global_header += (uint16_t) 0x0201; //second part of sig
|
||||||
|
global_header += (uint8_t) 45; //create_version
|
||||||
|
global_header += (uint8_t) 3; //zinfo.create_system
|
||||||
|
global_header += (uint8_t) 45; //extract_version
|
||||||
|
global_header += (uint8_t) 0; //zinfo.reserved
|
||||||
|
global_header.insert(global_header.end(),local_header.begin()+6,
|
||||||
|
local_header.begin()+28);
|
||||||
|
global_header += (uint16_t) 12; //extran data length
|
||||||
|
global_header += (uint16_t) 0; //file comment length
|
||||||
|
global_header += (uint16_t) 0; //disk number where file starts
|
||||||
|
global_header += (uint16_t) 0; //internal file attributes
|
||||||
|
global_header += (uint32_t) 0; //external file attributes
|
||||||
|
//relative offset of local file header
|
||||||
|
//since it begins where the global header used to begin
|
||||||
|
global_header += (uint32_t) 0xFFFFFFFF ; //global_header_offset;
|
||||||
|
global_header += fname;
|
||||||
|
// Append a ZIP64 field to the extra's
|
||||||
|
// extra_data = struct.pack(
|
||||||
|
// '<HH' + 'Q'*len(extra),
|
||||||
|
// 1, 8*len(extra), *extra) + extra_data
|
||||||
|
// extract_version = max(45, zinfo.extract_version)
|
||||||
|
// create_version = max(45, zinfo.create_version)
|
||||||
|
global_header += (uint16_t) 0x01;
|
||||||
|
global_header += (uint16_t) 0x08;
|
||||||
|
global_header += (uint64_t) global_header_offset;
|
||||||
|
} else {
|
||||||
|
//build global header
|
||||||
|
global_header += "PK"; //first part of sig
|
||||||
|
global_header += (uint16_t) 0x0201; //second part of sig
|
||||||
|
global_header += (uint16_t) 20; //version made by
|
||||||
|
global_header.insert(global_header.end(),local_header.begin()+4,
|
||||||
|
local_header.begin()+30);
|
||||||
|
global_header += (uint16_t) 0; //file comment length
|
||||||
|
global_header += (uint16_t) 0; //disk number where file starts
|
||||||
|
global_header += (uint16_t) 0; //internal file attributes
|
||||||
|
global_header += (uint32_t) 0; //external file attributes
|
||||||
|
//relative offset of local file header
|
||||||
|
//since it begins where the global header used to begin
|
||||||
|
global_header += (uint32_t) global_header_offset;
|
||||||
|
global_header += fname;
|
||||||
|
}
|
||||||
|
|
||||||
|
fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
|
||||||
|
|
||||||
|
if (global_header_offset >= ZIP64_LIMIT) {
|
||||||
|
//structEndArchive64 = "<4sQ2H2L4Q"
|
||||||
|
//zip64endrec = struct.pack(
|
||||||
|
// structEndArchive64, stringEndArchive64,
|
||||||
|
// 44, 45, 45, 0, 0, centDirCount, centDirCount,
|
||||||
|
// centDirSize, centDirOffset)
|
||||||
|
//self.fp.write(zip64endrec)
|
||||||
|
std::vector<char> zip64endrec_header;
|
||||||
|
zip64endrec_header += "PK";
|
||||||
|
zip64endrec_header += (uint16_t) 0x0606;
|
||||||
|
zip64endrec_header += (uint64_t) 0x44;
|
||||||
|
zip64endrec_header += (uint16_t) 0x45;
|
||||||
|
zip64endrec_header += (uint16_t) 0x45;
|
||||||
|
zip64endrec_header += (uint32_t) 0x0;
|
||||||
|
zip64endrec_header += (uint32_t) 0x0;
|
||||||
|
zip64endrec_header += (uint64_t) (nrecs+1); //centDirCount
|
||||||
|
zip64endrec_header += (uint64_t) (nrecs+1); //centDirCount
|
||||||
|
zip64endrec_header += (uint64_t) global_header.size(); //centDirSize
|
||||||
|
zip64endrec_header += (uint64_t) global_header_offset + nbytes + local_header.size(); //centDirOffset
|
||||||
|
fwrite(&zip64endrec_header[0],sizeof(char),zip64endrec_header.size(),fp);
|
||||||
|
|
||||||
|
//structEndArchive64Locator = "<4sLQL"
|
||||||
|
//zip64locrec = struct.pack(
|
||||||
|
// structEndArchive64Locator,
|
||||||
|
// stringEndArchive64Locator, 0, pos2, 1)
|
||||||
|
//self.fp.write(zip64locrec)
|
||||||
|
std::vector<char> zip64locrec_header;
|
||||||
|
zip64locrec_header += "PK";
|
||||||
|
zip64locrec_header += (uint16_t) 0x0706;
|
||||||
|
zip64locrec_header += (uint32_t) 0x0;
|
||||||
|
zip64locrec_header += (uint64_t) global_header_offset + nbytes + local_header.size() +
|
||||||
|
zip64endrec_header.size(); // zip64endrec_header offset
|
||||||
|
zip64locrec_header += (uint32_t) 0x1;
|
||||||
|
fwrite(&zip64locrec_header[0],sizeof(char),zip64locrec_header.size(),fp);
|
||||||
|
}
|
||||||
|
//build footer
|
||||||
|
std::vector<char> footer;
|
||||||
|
footer += "PK"; //first part of sig
|
||||||
|
footer += (uint16_t) 0x0605; //second part of sig
|
||||||
|
footer += (uint16_t) 0; //number of this disk
|
||||||
|
footer += (uint16_t) 0; //disk where footer starts
|
||||||
|
footer += (uint16_t) (nrecs+1); //number of records on this disk
|
||||||
|
footer += (uint16_t) (nrecs+1); //total number of records
|
||||||
|
footer += (uint32_t) global_header.size(); //nbytes of global headers
|
||||||
|
//offset of start of global headers
|
||||||
|
//since global header now starts after newly written array
|
||||||
|
footer += (global_header_offset >= ZIP64_LIMIT) ?
|
||||||
|
(uint32_t) 0xFFFFFFFF : (uint32_t) (global_header_offset + nbytes + local_header.size());
|
||||||
|
footer += (uint16_t) 0; //zip file comment length
|
||||||
|
|
||||||
|
fwrite(&footer[0],sizeof(char),footer.size(),fp);
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void npz_save<std::complex<double> >(std::string, std::string,
|
||||||
|
const std::complex<double>*, const std::vector<size_t>&,
|
||||||
|
std::string);
|
||||||
|
template void npz_save<double>(std::string, std::string,
|
||||||
|
const double*, const std::vector<size_t>&, std::string);
|
||||||
|
template void npz_save<char>(std::string, std::string,
|
||||||
|
const char*, const std::vector<size_t>&, std::string);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npz_save(std::string zipname, std::string fname,
|
||||||
|
const std::vector<T> &data, std::string mode) {
|
||||||
|
std::vector<size_t> shape;
|
||||||
|
shape.push_back(data.size());
|
||||||
|
npz_save(zipname, fname, &data[0], shape, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npz_save(std::string zipname, std::string fname,
|
||||||
|
NpyArray &array, std::string mode) {
|
||||||
|
npz_save<T>(zipname, fname, array.data<T>(), array.shape, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npz_add_array(npz_t &map, std::string fname,
|
||||||
|
const T* data, const std::vector<size_t> shape) {
|
||||||
|
size_t word_size = sizeof(T);
|
||||||
|
char type = map_type(typeid(T));
|
||||||
|
bool fortran_order = false;
|
||||||
|
NpyArray array(shape, word_size, type, fortran_order);
|
||||||
|
memcpy(array.data<unsigned char>(), data, array.num_bytes());
|
||||||
|
map[fname] = array;
|
||||||
|
}
|
||||||
|
|
||||||
|
template void npz_add_array<std::complex<double> >(npz_t &, std::string,
|
||||||
|
const std::complex<double>*, const std::vector<size_t>);
|
||||||
|
template void npz_add_array<float>(npz_t &, std::string,
|
||||||
|
const float*, const std::vector<size_t>);
|
||||||
|
template void npz_add_array<int8_t>(npz_t &, std::string,
|
||||||
|
const int8_t*, const std::vector<size_t>);
|
||||||
|
template void npz_add_array<uint8_t>(npz_t &, std::string,
|
||||||
|
const uint8_t*, const std::vector<size_t>);
|
||||||
|
template void npz_add_array<int16_t>(npz_t &, std::string,
|
||||||
|
const int16_t*, const std::vector<size_t>);
|
||||||
|
template void npz_add_array<uint16_t>(npz_t &, std::string,
|
||||||
|
const uint16_t*, const std::vector<size_t>);
|
||||||
|
template void npz_add_array<uint32_t>(npz_t &, std::string,
|
||||||
|
const uint32_t*, const std::vector<size_t>);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npz_add_array(npz_t &map, std::string fname,
|
||||||
|
const std::vector<T> &data) {
|
||||||
|
std::vector<size_t> shape;
|
||||||
|
shape.push_back(data.size());
|
||||||
|
npz_add_array(map, fname, &data[0], shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void npz_add_array<std::complex<double> >(npz_t &, std::string,
|
||||||
|
const std::vector<std::complex<double> > &);
|
||||||
|
template void npz_add_array<float>(npz_t &, std::string,
|
||||||
|
const std::vector<float> &);
|
||||||
|
template void npz_add_array<int8_t>(npz_t &, std::string,
|
||||||
|
const std::vector<int8_t> &);
|
||||||
|
template void npz_add_array<int16_t>(npz_t &, std::string,
|
||||||
|
const std::vector<int16_t> &);
|
||||||
|
template void npz_add_array<uint16_t>(npz_t &, std::string,
|
||||||
|
const std::vector<uint16_t> &);
|
||||||
|
|
||||||
|
void npz_save_all(std::string zipname, npz_t &map) {
|
||||||
|
for (auto it = map.begin(); it != map.end(); it++) {
|
||||||
|
std::string mode = (it == map.begin()) ? "w" : "a";
|
||||||
|
NpyArray &arr = it->second;
|
||||||
|
if (arr.type == 'f') {
|
||||||
|
// support float only for now
|
||||||
|
assert(arr.word_size = sizeof(float));
|
||||||
|
npz_save<float>(zipname, it->first, it->second, mode);
|
||||||
|
} else if (arr.type == 'i') {
|
||||||
|
// support int8/int16 only
|
||||||
|
if (arr.word_size == sizeof(int8_t)) {
|
||||||
|
npz_save<int8_t>(zipname, it->first, it->second, mode);
|
||||||
|
} else if (arr.word_size == sizeof(int16_t)) {
|
||||||
|
npz_save<int16_t>(zipname, it->first, it->second, mode);
|
||||||
|
} else {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
} else if (arr.type == 'u') {
|
||||||
|
// support uint8/uint16/uint32
|
||||||
|
if (arr.word_size == sizeof(uint8_t)) {
|
||||||
|
npz_save<uint8_t>(zipname, it->first, it->second, mode);
|
||||||
|
} else if (arr.word_size == sizeof(uint16_t)) {
|
||||||
|
npz_save<uint16_t>(zipname, it->first, it->second, mode);
|
||||||
|
} else if (arr.word_size == sizeof(uint32_t)) {
|
||||||
|
npz_save<uint32_t>(zipname, it->first, it->second, mode);
|
||||||
|
} else {
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
} else if (arr.type == 'b') {
|
||||||
|
// not support yet
|
||||||
|
assert(0);
|
||||||
|
} else if (arr.type == 'c') {
|
||||||
|
// not support yet
|
||||||
|
assert(0);
|
||||||
|
} else {
|
||||||
|
// invalid type
|
||||||
|
std::cout << "libcnpy error: invalid array type "
|
||||||
|
<< arr.type << ", for " << it->first << "\n";
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static NpyArray load_the_npy_file(FILE* fp) {
|
||||||
|
std::vector<size_t> shape;
|
||||||
|
size_t word_size;
|
||||||
|
char type;
|
||||||
|
bool fortran_order;
|
||||||
|
parse_npy_header(fp,word_size,type,shape,fortran_order);
|
||||||
|
|
||||||
|
NpyArray arr(shape, word_size, type, fortran_order);
|
||||||
|
size_t nread = fread(arr.data<char>(),1,arr.num_bytes(),fp);
|
||||||
|
if(nread != arr.num_bytes())
|
||||||
|
throw std::runtime_error("load_the_npy_file: failed fread");
|
||||||
|
return arr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes,
|
||||||
|
uint32_t uncompr_bytes) {
|
||||||
|
std::vector<unsigned char> buffer_compr(compr_bytes);
|
||||||
|
std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
|
||||||
|
size_t nread = fread(&buffer_compr[0],1,compr_bytes,fp);
|
||||||
|
if(nread != compr_bytes)
|
||||||
|
throw std::runtime_error("load_the_npy_file: failed fread");
|
||||||
|
|
||||||
|
int err;
|
||||||
|
z_stream d_stream;
|
||||||
|
|
||||||
|
d_stream.zalloc = Z_NULL;
|
||||||
|
d_stream.zfree = Z_NULL;
|
||||||
|
d_stream.opaque = Z_NULL;
|
||||||
|
d_stream.avail_in = 0;
|
||||||
|
d_stream.next_in = Z_NULL;
|
||||||
|
err = inflateInit2(&d_stream, -MAX_WBITS);
|
||||||
|
assert(err = 0);
|
||||||
|
|
||||||
|
d_stream.avail_in = compr_bytes;
|
||||||
|
d_stream.next_in = &buffer_compr[0];
|
||||||
|
d_stream.avail_out = uncompr_bytes;
|
||||||
|
d_stream.next_out = &buffer_uncompr[0];
|
||||||
|
|
||||||
|
err = inflate(&d_stream, Z_FINISH);
|
||||||
|
assert(err = 0);
|
||||||
|
err = inflateEnd(&d_stream);
|
||||||
|
assert(err = 0);
|
||||||
|
|
||||||
|
std::vector<size_t> shape;
|
||||||
|
size_t word_size;
|
||||||
|
char type;
|
||||||
|
bool fortran_order;
|
||||||
|
parse_npy_header(&buffer_uncompr[0],word_size,type,shape,fortran_order);
|
||||||
|
|
||||||
|
NpyArray array(shape, word_size, type, fortran_order);
|
||||||
|
|
||||||
|
size_t offset = uncompr_bytes - array.num_bytes();
|
||||||
|
memcpy(array.data<unsigned char>(),&buffer_uncompr[0]+offset,array.num_bytes());
|
||||||
|
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
npz_t npz_load(std::string fname) {
|
||||||
|
npz_t arrays;
|
||||||
|
arrays.clear();
|
||||||
|
|
||||||
|
FILE* fp = fopen(fname.c_str(),"rb");
|
||||||
|
if(!fp) {
|
||||||
|
//throw std::runtime_error("npz_load: Error! Unable to open file "+fname+"!");
|
||||||
|
return arrays;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
std::vector<char> local_header(30);
|
||||||
|
size_t headerres = fread(&local_header[0],sizeof(char),30,fp);
|
||||||
|
if(headerres != 30)
|
||||||
|
break;
|
||||||
|
|
||||||
|
//if we've reached the global header, stop reading
|
||||||
|
if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
|
||||||
|
|
||||||
|
//read in the variable name
|
||||||
|
uint16_t name_len = *(uint16_t*) &local_header[26];
|
||||||
|
std::string varname(name_len,' ');
|
||||||
|
size_t vname_res = fread(&varname[0],sizeof(char),name_len,fp);
|
||||||
|
if(vname_res != name_len)
|
||||||
|
throw std::runtime_error("npz_load: failed fread");
|
||||||
|
|
||||||
|
//erase the lagging .npy
|
||||||
|
varname.erase(varname.end()-4,varname.end());
|
||||||
|
|
||||||
|
//read in the extra field
|
||||||
|
uint16_t extra_field_len = *(uint16_t*) &local_header[28];
|
||||||
|
if(extra_field_len > 0) {
|
||||||
|
std::vector<char> buff(extra_field_len);
|
||||||
|
size_t efield_res = fread(&buff[0],sizeof(char),extra_field_len,fp);
|
||||||
|
if(efield_res != extra_field_len)
|
||||||
|
throw std::runtime_error("npz_load: failed fread");
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0]+8);
|
||||||
|
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+18);
|
||||||
|
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+22);
|
||||||
|
|
||||||
|
if(compr_method == 0) {arrays[varname] = load_the_npy_file(fp);}
|
||||||
|
else {arrays[varname] = load_the_npz_array(fp,compr_bytes,uncompr_bytes);}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
return arrays;
|
||||||
|
}
|
||||||
|
|
||||||
|
NpyArray npz_load(std::string fname, std::string varname) {
|
||||||
|
FILE* fp = fopen(fname.c_str(),"rb");
|
||||||
|
|
||||||
|
if(!fp) throw std::runtime_error("npz_load: Unable to open file "+fname);
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
std::vector<char> local_header(30);
|
||||||
|
size_t header_res = fread(&local_header[0],sizeof(char),30,fp);
|
||||||
|
if(header_res != 30)
|
||||||
|
throw std::runtime_error("npz_load: failed fread");
|
||||||
|
|
||||||
|
//if we've reached the global header, stop reading
|
||||||
|
if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
|
||||||
|
|
||||||
|
//read in the variable name
|
||||||
|
uint16_t name_len = *(uint16_t*) &local_header[26];
|
||||||
|
std::string vname(name_len,' ');
|
||||||
|
size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp);
|
||||||
|
if(vname_res != name_len)
|
||||||
|
throw std::runtime_error("npz_load: failed fread");
|
||||||
|
vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy
|
||||||
|
|
||||||
|
//read in the extra field
|
||||||
|
uint16_t extra_field_len = *(uint16_t*) &local_header[28];
|
||||||
|
fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field
|
||||||
|
|
||||||
|
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0]+8);
|
||||||
|
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+18);
|
||||||
|
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+22);
|
||||||
|
|
||||||
|
if(vname == varname) {
|
||||||
|
NpyArray array = (compr_method == 0) ? load_the_npy_file(fp)
|
||||||
|
: load_the_npz_array(fp,compr_bytes,uncompr_bytes);
|
||||||
|
fclose(fp);
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//skip past the data
|
||||||
|
uint32_t size = *(uint32_t*) &local_header[22];
|
||||||
|
fseek(fp,size,SEEK_CUR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
//if we get here, we haven't found the variable in the file
|
||||||
|
throw std::runtime_error("npz_load: Variable name "+varname+" not found in "+fname);
|
||||||
|
}
|
||||||
|
|
||||||
|
NpyArray npy_load(std::string fname) {
|
||||||
|
|
||||||
|
FILE* fp = fopen(fname.c_str(), "rb");
|
||||||
|
|
||||||
|
if(!fp) throw std::runtime_error("npy_load: Unable to open file "+fname);
|
||||||
|
|
||||||
|
NpyArray arr = load_the_npy_file(fp);
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
return arr;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace cnpy
|
||||||
117
cnpy/cnpy.h
Normal file
117
cnpy/cnpy.h
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
//Copyright (C) 2011 Carl Rogers
|
||||||
|
//Released under MIT License
|
||||||
|
//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
|
||||||
|
|
||||||
|
#ifndef LIBCNPY_H_
|
||||||
|
#define LIBCNPY_H_
|
||||||
|
|
||||||
|
#include<string>
|
||||||
|
#include<cstring>
|
||||||
|
#include<stdexcept>
|
||||||
|
#include<sstream>
|
||||||
|
#include<vector>
|
||||||
|
#include<cstdio>
|
||||||
|
#include<typeinfo>
|
||||||
|
#include<iostream>
|
||||||
|
#include<cassert>
|
||||||
|
#include<zlib.h>
|
||||||
|
#include<map>
|
||||||
|
#include<memory>
|
||||||
|
#include<stdint.h>
|
||||||
|
#include<numeric>
|
||||||
|
|
||||||
|
namespace cnpy {
|
||||||
|
|
||||||
|
struct NpyArray {
|
||||||
|
NpyArray(const std::vector<size_t>& _shape, size_t _word_size,
|
||||||
|
char _type, bool _fortran_order)
|
||||||
|
: shape(_shape), word_size(_word_size),
|
||||||
|
type(_type), fortran_order(_fortran_order) {
|
||||||
|
num_vals = 1;
|
||||||
|
for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
|
||||||
|
data_holder = std::shared_ptr<std::vector<char>>(
|
||||||
|
new std::vector<char>(num_vals * word_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
NpyArray() : shape(0), word_size(0), type(0), fortran_order(0), num_vals(0) {}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
T* data() {
|
||||||
|
return reinterpret_cast<T*>(&(*data_holder)[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
const T* data() const {
|
||||||
|
return reinterpret_cast<T*>(&(*data_holder)[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
std::vector<T> as_vec() const {
|
||||||
|
const T* p = data<T>();
|
||||||
|
return std::vector<T>(p, p+num_vals);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t num_bytes() const {
|
||||||
|
return data_holder->size();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<std::vector<char>> data_holder;
|
||||||
|
std::vector<size_t> shape;
|
||||||
|
size_t word_size;
|
||||||
|
char type;
|
||||||
|
bool fortran_order;
|
||||||
|
size_t num_vals;
|
||||||
|
};
|
||||||
|
|
||||||
|
using npz_t = std::map<std::string, NpyArray>;
|
||||||
|
|
||||||
|
std::vector<char> create_npy_header(const std::vector<size_t>& shape,
|
||||||
|
size_t word_size, char type);
|
||||||
|
void parse_npy_header(FILE* fp,size_t& word_size, char& type,
|
||||||
|
std::vector<size_t>& shape, bool& fortran_order);
|
||||||
|
void parse_npy_header(unsigned char* buffer, size_t& word_size, char& type,
|
||||||
|
std::vector<size_t>& shape, bool& fortran_order);
|
||||||
|
void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size,
|
||||||
|
size_t& global_header_offset);
|
||||||
|
npz_t npz_load(std::string fname);
|
||||||
|
NpyArray npz_load(std::string fname, std::string varname);
|
||||||
|
NpyArray npy_load(std::string fname);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs);
|
||||||
|
template<>
|
||||||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
|
||||||
|
template<>
|
||||||
|
std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npy_save(std::string fname, const T* data,
|
||||||
|
const std::vector<size_t> shape, std::string mode = "w");
|
||||||
|
template<typename T>
|
||||||
|
void npy_save(std::string fname, const std::vector<T> data,
|
||||||
|
std::string mode = "w");
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npz_save(std::string zipname, std::string fname,
|
||||||
|
const T* data, const std::vector<size_t>& shape,
|
||||||
|
std::string mode = "w");
|
||||||
|
template<typename T>
|
||||||
|
void npz_save(std::string zipname, std::string fname,
|
||||||
|
const std::vector<T> &data, std::string mode = "w");
|
||||||
|
template<typename T>
|
||||||
|
void npz_save(std::string zipname, std::string fname,
|
||||||
|
NpyArray &array, std::string mode = "w");
|
||||||
|
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void npz_add_array(npz_t &map, std::string fname,
|
||||||
|
const T* data, const std::vector<size_t> shape);
|
||||||
|
template<typename T>
|
||||||
|
void npz_add_array(npz_t &map, std::string fname,
|
||||||
|
const std::vector<T> &data);
|
||||||
|
|
||||||
|
void npz_save_all(std::string zipname, npz_t &map);
|
||||||
|
|
||||||
|
} // namespace cnpy
|
||||||
|
|
||||||
|
#endif
|
||||||
70
cnpy/example1.cpp
Normal file
70
cnpy/example1.cpp
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
#include"cnpy.h"
|
||||||
|
#include<complex>
|
||||||
|
#include<cstdlib>
|
||||||
|
#include<iostream>
|
||||||
|
#include<map>
|
||||||
|
#include<string>
|
||||||
|
|
||||||
|
const int Nx = 128;
|
||||||
|
const int Ny = 64;
|
||||||
|
const int Nz = 32;
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
//set random seed so that result is reproducible (for testing)
|
||||||
|
srand(0);
|
||||||
|
|
||||||
|
//create random data
|
||||||
|
std::vector<std::complex<double>> data(Nx*Ny*Nz);
|
||||||
|
for(int i = 0;i < Nx*Ny*Nz;i++) data[i] = std::complex<double>(rand(),rand());
|
||||||
|
|
||||||
|
//save it to file
|
||||||
|
cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"w");
|
||||||
|
|
||||||
|
//load it into a new array
|
||||||
|
cnpy::NpyArray arr = cnpy::npy_load("arr1.npy");
|
||||||
|
std::complex<double>* loaded_data = arr.data<std::complex<double>>();
|
||||||
|
|
||||||
|
//make sure the loaded data matches the saved data
|
||||||
|
assert(arr.word_size == sizeof(std::complex<double>));
|
||||||
|
assert(arr.shape.size() == 3 && arr.shape[0] == Nz && arr.shape[1] == Ny && arr.shape[2] == Nx);
|
||||||
|
for(int i = 0; i < Nx*Ny*Nz;i++) assert(data[i] == loaded_data[i]);
|
||||||
|
|
||||||
|
//append the same data to file
|
||||||
|
//npy array on file now has shape (Nz+Nz,Ny,Nx)
|
||||||
|
cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"a");
|
||||||
|
|
||||||
|
//now write to an npz file
|
||||||
|
//non-array variables are treated as 1D arrays with 1 element
|
||||||
|
double myVar1 = 1.2;
|
||||||
|
char myVar2 = 'a';
|
||||||
|
cnpy::npz_save("out.npz","myVar1",&myVar1,{1},"w"); //"w" overwrites any existing file
|
||||||
|
cnpy::npz_save("out.npz","myVar2",&myVar2,{1},"a"); //"a" appends to the file we created above
|
||||||
|
cnpy::npz_save("out.npz","arr1",&data[0],{Nz,Ny,Nx},"a"); //"a" appends to the file we created above
|
||||||
|
|
||||||
|
//load a single var from the npz file
|
||||||
|
cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1");
|
||||||
|
|
||||||
|
//load the entire npz file
|
||||||
|
cnpy::npz_t my_npz = cnpy::npz_load("out.npz");
|
||||||
|
|
||||||
|
// add a new array
|
||||||
|
std::vector<std::complex<double>> new_data(20);
|
||||||
|
for(int i = 0;i < 20;i++)
|
||||||
|
new_data[i] = std::complex<double>(i, 20 - i);
|
||||||
|
cnpy::npz_add_array<std::complex<double> >(my_npz, "new_arr", new_data);
|
||||||
|
|
||||||
|
//save the entire npz file back
|
||||||
|
// TODO: this has some problem, because npz_save_all() is assuming
|
||||||
|
// all array in same type, this is because NPArray struct does not
|
||||||
|
// convey a type info.
|
||||||
|
cnpy::npz_save_all("out_new.npz", my_npz);
|
||||||
|
|
||||||
|
//check that the loaded myVar1 matches myVar1
|
||||||
|
cnpy::NpyArray arr_mv1 = my_npz["myVar1"];
|
||||||
|
double* mv1 = arr_mv1.data<double>();
|
||||||
|
assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1);
|
||||||
|
assert(mv1[0] == myVar1);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
18
cnpy/mat2npz
Normal file
18
cnpy/mat2npz
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from numpy import savez
|
||||||
|
from scipy.io import loadmat
|
||||||
|
|
||||||
|
assert len(sys.argv) > 1
|
||||||
|
|
||||||
|
files = sys.argv[1:]
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
mat_vars = loadmat(f)
|
||||||
|
mat_vars.pop('__version__')
|
||||||
|
mat_vars.pop('__header__')
|
||||||
|
mat_vars.pop('__globals__')
|
||||||
|
|
||||||
|
fn = f.replace('.mat','.npz')
|
||||||
|
savez(fn,**mat_vars)
|
||||||
15
cnpy/npy2mat
Normal file
15
cnpy/npy2mat
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from numpy import load
|
||||||
|
from scipy.io import savemat
|
||||||
|
|
||||||
|
assert len(sys.argv) > 1
|
||||||
|
|
||||||
|
files = sys.argv[1:]
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
data = load(f)
|
||||||
|
fn = f.replace('.npy','')
|
||||||
|
fn = fn.replace('.','_')
|
||||||
|
savemat(fn,{fn : data})
|
||||||
15
cnpy/npz2mat
Executable file
15
cnpy/npz2mat
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from numpy import load
|
||||||
|
from scipy.io import savemat
|
||||||
|
|
||||||
|
assert len(sys.argv) > 1
|
||||||
|
|
||||||
|
files = sys.argv[1:]
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
data = load(f)
|
||||||
|
fn = f.replace('.npz','')
|
||||||
|
fn = fn.replace('.','_') #matlab cant handle dots
|
||||||
|
savemat(fn,data)
|
||||||
Reference in New Issue
Block a user