@@ -815,13 +815,28 @@ void f8_e4m3_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) {
815815 dst[i] = f8_e4m3_to_f16 (src[i]);
816816 }
817817}
818+
818819void f8_e5m2_to_f16_vec (uint8_t * src, uint16_t * dst, int64_t n) {
819820 // support inplace op
820821 for (int64_t i = n - 1 ; i >= 0 ; i--) {
821822 dst[i] = f8_e5m2_to_f16 (src[i]);
822823 }
823824}
824825
826+ void f64_to_f32_vec (double * src, float * dst, int64_t n) {
827+ // support inplace op
828+ for (int64_t i = 0 ; i < n; i++) {
829+ dst[i] = (float )src[i];
830+ }
831+ }
832+
833+ void i64_to_i32_vec (int64_t * src, int32_t * dst, int64_t n) {
834+ // support inplace op
835+ for (int64_t i = 0 ; i < n; i++) {
836+ dst[i] = (int32_t )src[i];
837+ }
838+ }
839+
825840void convert_tensor (void * src,
826841 ggml_type src_type,
827842 void * dst,
@@ -1057,13 +1072,13 @@ ggml_type str_to_ggml_type(const std::string& dtype) {
10571072 } else if (dtype == " F32" ) {
10581073 ttype = GGML_TYPE_F32;
10591074 } else if (dtype == " F64" ) {
1060- ttype = GGML_TYPE_F64 ;
1075+ ttype = GGML_TYPE_F32 ;
10611076 } else if (dtype == " F8_E4M3" ) {
10621077 ttype = GGML_TYPE_F16;
10631078 } else if (dtype == " F8_E5M2" ) {
10641079 ttype = GGML_TYPE_F16;
10651080 } else if (dtype == " I64" ) {
1066- ttype = GGML_TYPE_I64 ;
1081+ ttype = GGML_TYPE_I32 ;
10671082 }
10681083 return ttype;
10691084}
@@ -1185,6 +1200,14 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
11851200 tensor_storage.is_f8_e5m2 = true ;
11861201 // f8 -> f16
11871202 GGML_ASSERT (tensor_storage.nbytes () == tensor_data_size * 2 );
1203+ } else if (dtype == " F64" ) {
1204+ tensor_storage.is_f64 = true ;
1205+ // f64 -> f32
1206+ GGML_ASSERT (tensor_storage.nbytes () * 2 == tensor_data_size);
1207+ } else if (dtype == " I64" ) {
1208+ tensor_storage.is_i64 = true ;
1209+ // i64 -> i32
1210+ GGML_ASSERT (tensor_storage.nbytes () * 2 == tensor_data_size);
11881211 } else {
11891212 GGML_ASSERT (tensor_storage.nbytes () == tensor_data_size);
11901213 }
@@ -1945,7 +1968,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
19451968 // for the CPU and Metal backend, we can copy directly into the tensor
19461969 if (tensor_storage.type == dst_tensor->type ) {
19471970 GGML_ASSERT (ggml_nbytes (dst_tensor) == tensor_storage.nbytes ());
1948- read_data (tensor_storage, (char *)dst_tensor->data , nbytes_to_read);
1971+ if (tensor_storage.is_f64 || tensor_storage.is_i64 ) {
1972+ read_buffer.resize (tensor_storage.nbytes_to_read ());
1973+ read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
1974+ } else {
1975+ read_data (tensor_storage, (char *)dst_tensor->data , nbytes_to_read);
1976+ }
19491977
19501978 if (tensor_storage.is_bf16 ) {
19511979 // inplace op
@@ -1956,9 +1984,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
19561984 } else if (tensor_storage.is_f8_e5m2 ) {
19571985 // inplace op
19581986 f8_e5m2_to_f16_vec ((uint8_t *)dst_tensor->data , (uint16_t *)dst_tensor->data , tensor_storage.nelements ());
1987+ } else if (tensor_storage.is_f64 ) {
1988+ f64_to_f32_vec ((double *)read_buffer.data (), (float *)dst_tensor->data , tensor_storage.nelements ());
1989+ } else if (tensor_storage.is_i64 ) {
1990+ i64_to_i32_vec ((int64_t *)read_buffer.data (), (int32_t *)dst_tensor->data , tensor_storage.nelements ());
19591991 }
19601992 } else {
1961- read_buffer.resize (tensor_storage.nbytes ());
1993+ read_buffer.resize (std::max ( tensor_storage.nbytes (), tensor_storage. nbytes_to_read () ));
19621994 read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
19631995
19641996 if (tensor_storage.is_bf16 ) {
@@ -1970,13 +2002,19 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
19702002 } else if (tensor_storage.is_f8_e5m2 ) {
19712003 // inplace op
19722004 f8_e5m2_to_f16_vec ((uint8_t *)read_buffer.data (), (uint16_t *)read_buffer.data (), tensor_storage.nelements ());
2005+ } else if (tensor_storage.is_f64 ) {
2006+ // inplace op
2007+ f64_to_f32_vec ((double *)read_buffer.data (), (float *)read_buffer.data (), tensor_storage.nelements ());
2008+ } else if (tensor_storage.is_i64 ) {
2009+ // inplace op
2010+ i64_to_i32_vec ((int64_t *)read_buffer.data (), (int32_t *)read_buffer.data (), tensor_storage.nelements ());
19732011 }
19742012
19752013 convert_tensor ((void *)read_buffer.data (), tensor_storage.type , dst_tensor->data ,
19762014 dst_tensor->type , (int )tensor_storage.nelements () / (int )tensor_storage.ne [0 ], (int )tensor_storage.ne [0 ]);
19772015 }
19782016 } else {
1979- read_buffer.resize (tensor_storage.nbytes ());
2017+ read_buffer.resize (std::max ( tensor_storage.nbytes (), tensor_storage. nbytes_to_read () ));
19802018 read_data (tensor_storage, (char *)read_buffer.data (), nbytes_to_read);
19812019
19822020 if (tensor_storage.is_bf16 ) {
@@ -1988,6 +2026,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
19882026 } else if (tensor_storage.is_f8_e5m2 ) {
19892027 // inplace op
19902028 f8_e5m2_to_f16_vec ((uint8_t *)read_buffer.data (), (uint16_t *)read_buffer.data (), tensor_storage.nelements ());
2029+ } else if (tensor_storage.is_f64 ) {
2030+ // inplace op
2031+ f64_to_f32_vec ((double *)read_buffer.data (), (float *)read_buffer.data (), tensor_storage.nelements ());
2032+ } else if (tensor_storage.is_i64 ) {
2033+ // inplace op
2034+ i64_to_i32_vec ((int64_t *)read_buffer.data (), (int32_t *)read_buffer.data (), tensor_storage.nelements ());
19912035 }
19922036
19932037 if (tensor_storage.type == dst_tensor->type ) {
0 commit comments