gguf header loading
All checks were successful
studiorailgun/trpg/pipeline/head This commit looks good
All checks were successful
studiorailgun/trpg/pipeline/head This commit looks good
This commit is contained in:
parent
8e5546d772
commit
56594eb667
@ -1,6 +1,16 @@
|
||||
package org.studiorailgun;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.studiorailgun.interact.GameLoop;
|
||||
import org.studiorailgun.kobold.gguf.GGUFHeader;
|
||||
import org.studiorailgun.kobold.gguf.GGUFLoader;
|
||||
import org.studiorailgun.kobold.gguf.GGUFMetadataKV;
|
||||
import org.studiorailgun.kobold.gguf.GGUFModel;
|
||||
import org.studiorailgun.kobold.gguf.GGUFTensorInfo;
|
||||
|
||||
/**
|
||||
* The main class
|
||||
@ -11,6 +21,25 @@ public class Main {
|
||||
* The main method
|
||||
*/
|
||||
public static void main(String[] args){
|
||||
try {
|
||||
GGUFLoader loader = new GGUFLoader();
|
||||
GGUFModel model = loader.load("C:\\Users\\satellite\\Documents\\ai\\koboldcpp\\Fimbulvetr-Kuro-Lotus-10.7B-Q6_K.gguf");
|
||||
System.out.println("Metadata: ");
|
||||
GGUFHeader header = model.getHeader();
|
||||
for(GGUFMetadataKV pair : header.getMetadataPairs()){
|
||||
System.out.println(pair.getKey() + " - " + pair.getValue_type() + " - " + pair.getValue());
|
||||
}
|
||||
|
||||
System.out.println("\n");
|
||||
|
||||
for(GGUFTensorInfo tensorInfo : model.getTensorInfos()){
|
||||
System.out.println(tensorInfo.getName() + " - " + tensorInfo.getType() + " - " + tensorInfo.getOffset());
|
||||
}
|
||||
|
||||
System.exit(0);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
GameLoop.main();
|
||||
}
|
||||
|
||||
|
||||
43
src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java
Normal file
43
src/main/java/org/studiorailgun/kobold/gguf/GGUFHeader.java
Normal file
@ -0,0 +1,43 @@
|
||||
package org.studiorailgun.kobold.gguf;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The header of a gguf file
|
||||
*/
|
||||
public class GGUFHeader {
|
||||
|
||||
protected int magic;
|
||||
|
||||
protected int version;
|
||||
|
||||
protected long tensorCount;
|
||||
|
||||
protected long metadataKVCount;
|
||||
|
||||
protected List<GGUFMetadataKV> metadataPairs = new LinkedList<GGUFMetadataKV>();
|
||||
|
||||
public int getMagic() {
|
||||
return magic;
|
||||
}
|
||||
|
||||
public int getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public long getTensorCount() {
|
||||
return tensorCount;
|
||||
}
|
||||
|
||||
public long getMetadataKVCount() {
|
||||
return metadataKVCount;
|
||||
}
|
||||
|
||||
public List<GGUFMetadataKV> getMetadataPairs() {
|
||||
return metadataPairs;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
415
src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java
Normal file
415
src/main/java/org/studiorailgun/kobold/gguf/GGUFLoader.java
Normal file
@ -0,0 +1,415 @@
|
||||
package org.studiorailgun.kobold.gguf;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import org.studiorailgun.kobold.gguf.GGUFMetadataKV.GGUFMetadataValue;
|
||||
|
||||
/**
|
||||
* Loads a gguf format model
|
||||
*/
|
||||
public class GGUFLoader {
|
||||
|
||||
//gguf core explanation: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
|
||||
|
||||
/**
|
||||
* The default alignment for the file
|
||||
*/
|
||||
static final int DEFAULT_ALIGNMENT = 32;
|
||||
|
||||
/**
|
||||
* Used for converting bytes read in to little endian
|
||||
*/
|
||||
ByteBuffer readerBuff = ByteBuffer.allocate(8);
|
||||
|
||||
/**
|
||||
* The alignment of the file
|
||||
*/
|
||||
int alignment = DEFAULT_ALIGNMENT;
|
||||
|
||||
/**
|
||||
* The number of bytes read -- used for keeping track of alignment
|
||||
*/
|
||||
long bytesRead = 0;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
public GGUFLoader(){
|
||||
readerBuff.order(ByteOrder.LITTLE_ENDIAN);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the gguf model at the specified path
|
||||
* @param path The path
|
||||
*/
|
||||
public GGUFModel load(String path) throws IOException {
|
||||
InputStream stream = Files.newInputStream(new File(path).toPath());
|
||||
|
||||
GGUFModel rVal = new GGUFModel();
|
||||
|
||||
//read the header
|
||||
rVal.header = this.readHeader(stream);
|
||||
|
||||
//read the tensor info
|
||||
for(int i = 0; i < rVal.header.tensorCount; i++){
|
||||
rVal.tensorInfos.add(this.readTensorInfo(stream));
|
||||
}
|
||||
|
||||
//read padding
|
||||
while(bytesRead % (alignment / 8) > 0){
|
||||
stream.read();
|
||||
bytesRead++;
|
||||
}
|
||||
|
||||
//read the tensor data
|
||||
for(int i = 0; i < rVal.tensorInfos.size(); i++){
|
||||
GGUFTensorInfo tensorInfo = rVal.tensorInfos.get(i);
|
||||
rVal.tensorData.add(this.readTensorData(stream,tensorInfo));
|
||||
}
|
||||
|
||||
stream.close();
|
||||
|
||||
return rVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a GGUF header from an input stream
|
||||
* @param stream The stream
|
||||
* @return The header
|
||||
*/
|
||||
private GGUFHeader readHeader(InputStream stream) throws IOException {
|
||||
GGUFHeader rVal = new GGUFHeader();
|
||||
rVal.magic = this.streamReadInt(stream);
|
||||
rVal.version = this.streamReadInt(stream);
|
||||
rVal.tensorCount = this.streamReadLong(stream);
|
||||
rVal.metadataKVCount = this.streamReadLong(stream);
|
||||
|
||||
System.out.println(rVal.magic + " " + rVal.version + " " + rVal.tensorCount + " " + rVal.metadataKVCount);
|
||||
|
||||
//read in the metadata kv pairs
|
||||
for(int i = 0; i < rVal.metadataKVCount; i++){
|
||||
rVal.metadataPairs.add(this.readMetadataPair(stream));
|
||||
}
|
||||
|
||||
return rVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a GGUF metadata pair
|
||||
* @param stream The stream
|
||||
* @return The pair
|
||||
*/
|
||||
private GGUFMetadataKV readMetadataPair(InputStream stream) throws IOException {
|
||||
GGUFMetadataKV pair = new GGUFMetadataKV();
|
||||
|
||||
pair.key = this.readGGUFString(stream);
|
||||
|
||||
int typeRaw = this.streamReadInt(stream);
|
||||
switch(typeRaw){
|
||||
case 0: {
|
||||
pair.value_type = GGUFMetadataValue.UINT8;
|
||||
pair.value = this.streamReadChar(stream);
|
||||
} break;
|
||||
case 1: {
|
||||
pair.value_type = GGUFMetadataValue.INT8;
|
||||
pair.value = this.streamReadChar(stream);
|
||||
} break;
|
||||
case 2: {
|
||||
pair.value_type = GGUFMetadataValue.UINT16;
|
||||
pair.value = this.streamReadShort(stream);
|
||||
} break;
|
||||
case 3: {
|
||||
pair.value_type = GGUFMetadataValue.INT16;
|
||||
pair.value = this.streamReadShort(stream);
|
||||
} break;
|
||||
case 4: {
|
||||
pair.value_type = GGUFMetadataValue.UINT32;
|
||||
pair.value = this.streamReadInt(stream);
|
||||
} break;
|
||||
case 5: {
|
||||
pair.value_type = GGUFMetadataValue.INT32;
|
||||
pair.value = this.streamReadInt(stream);
|
||||
} break;
|
||||
case 6: {
|
||||
pair.value_type = GGUFMetadataValue.FLOAT32;
|
||||
pair.value = this.streamReadFloat(stream);
|
||||
} break;
|
||||
case 7: {
|
||||
pair.value_type = GGUFMetadataValue.BOOL;
|
||||
pair.value = this.streamReadChar(stream);
|
||||
} break;
|
||||
case 8: {
|
||||
pair.value_type = GGUFMetadataValue.STRING;
|
||||
pair.value = this.readGGUFString(stream);
|
||||
} break;
|
||||
case 9: {
|
||||
pair.value_type = GGUFMetadataValue.ARRAY;
|
||||
pair.value = this.readMetadataArray(stream);
|
||||
} break;
|
||||
case 10: {
|
||||
pair.value_type = GGUFMetadataValue.UINT64;
|
||||
pair.value = this.streamReadLong(stream);
|
||||
} break;
|
||||
case 11: {
|
||||
pair.value_type = GGUFMetadataValue.INT64;
|
||||
pair.value = this.streamReadLong(stream);
|
||||
} break;
|
||||
case 12: {
|
||||
pair.value_type = GGUFMetadataValue.DOUBLE;
|
||||
pair.value = this.streamReadDouble(stream);
|
||||
} break;
|
||||
default: {
|
||||
throw new Error("Invalid metadata pair data type! " + typeRaw);
|
||||
}
|
||||
}
|
||||
|
||||
return pair;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a metadata array value
|
||||
* @param stream The stream
|
||||
* @return The array value
|
||||
* @throws IOException Thrown if the stream has an io exception
|
||||
*/
|
||||
private Object readMetadataArray(InputStream stream) throws IOException {
|
||||
int typeRaw = this.streamReadInt(stream);
|
||||
int len = (int)this.streamReadLong(stream);
|
||||
switch(typeRaw){
|
||||
case 0:
|
||||
case 1: {
|
||||
char[] rVal = new char[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.streamReadChar(stream);
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
case 2:
|
||||
case 3: {
|
||||
short[] rVal = new short[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.streamReadShort(stream);
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
case 4:
|
||||
case 5: {
|
||||
int[] rVal = new int[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.streamReadInt(stream);
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
case 6: {
|
||||
float[] rVal = new float[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.streamReadFloat(stream);
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
case 7: {
|
||||
boolean[] rVal = new boolean[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.streamReadChar(stream) > 0;
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
case 8: {
|
||||
String[] rVal = new String[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.readGGUFString(stream);
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
case 9: {
|
||||
Object[] rVal = new Object[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.readMetadataArray(stream);
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
case 10:
|
||||
case 11: {
|
||||
long[] rVal = new long[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.streamReadLong(stream);
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
case 12: {
|
||||
double[] rVal = new double[len];
|
||||
for(int i = 0; i < len; i++){
|
||||
rVal[i] = this.streamReadDouble(stream);
|
||||
}
|
||||
return rVal;
|
||||
}
|
||||
default: {
|
||||
throw new Error("Failed to read metadata array -- invalid type! " + typeRaw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads info on a tensor from the file
|
||||
* @param stream The stream
|
||||
* @return The tensor info
|
||||
* @throws IOException Thrown if the stream fails to read
|
||||
*/
|
||||
private GGUFTensorInfo readTensorInfo(InputStream stream) throws IOException {
|
||||
GGUFTensorInfo rVal = new GGUFTensorInfo();
|
||||
|
||||
rVal.name = this.readGGUFString(stream);
|
||||
|
||||
rVal.nDimensions = this.streamReadInt(stream);
|
||||
|
||||
rVal.dimensions = new long[rVal.nDimensions];
|
||||
for(int i = 0; i < rVal.nDimensions; i++){
|
||||
rVal.dimensions[i] = this.streamReadLong(stream);
|
||||
}
|
||||
|
||||
rVal.type = GGUFTensorInfo.getType(this.streamReadInt(stream));
|
||||
|
||||
rVal.offset = this.streamReadLong(stream);
|
||||
|
||||
return rVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the binary tensor data for a given tensor info
|
||||
* @param stream The stream
|
||||
* @param tensorInfo The tensor metadata
|
||||
* @return The binary tensor data
|
||||
* @throws IOException Thrown if the stream fails to read
|
||||
*/
|
||||
private ByteBuffer readTensorData(InputStream stream, GGUFTensorInfo tensorInfo) throws IOException {
|
||||
//read up to the offset
|
||||
while(this.bytesRead < tensorInfo.getOffset()){
|
||||
stream.read();
|
||||
this.bytesRead++;
|
||||
}
|
||||
|
||||
long totalSize = 1;
|
||||
for(int i = 0; i < tensorInfo.getnDimensions(); i++){
|
||||
totalSize = totalSize * tensorInfo.getDimensions()[i];
|
||||
}
|
||||
float bitsPerWeight = GGUFTensorInfo.getUnitSize(tensorInfo.getType());
|
||||
System.out.println("Bits per weight: " + bitsPerWeight);
|
||||
// totalSize = totalSize;
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a little-endian char from the stream
|
||||
* @param stream The stream
|
||||
* @return The char
|
||||
* @throws IOException Thrown if the stream fails to read
|
||||
*/
|
||||
private char streamReadChar(InputStream stream) throws IOException {
|
||||
byte[] bytes = stream.readNBytes(1);
|
||||
readerBuff.position(0);
|
||||
readerBuff.put(bytes);
|
||||
readerBuff.position(0);
|
||||
this.bytesRead += 1;
|
||||
return readerBuff.asCharBuffer().get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a little-endian short from the stream
|
||||
* @param stream The stream
|
||||
* @return The short
|
||||
* @throws IOException Thrown if the stream fails to read
|
||||
*/
|
||||
private short streamReadShort(InputStream stream) throws IOException {
|
||||
byte[] bytes = stream.readNBytes(2);
|
||||
readerBuff.position(0);
|
||||
readerBuff.put(bytes);
|
||||
readerBuff.position(0);
|
||||
this.bytesRead += 2;
|
||||
return readerBuff.asShortBuffer().get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a little-endian int from the stream
|
||||
* @param stream The stream
|
||||
* @return The int
|
||||
* @throws IOException Thrown if the stream fails to read
|
||||
*/
|
||||
private int streamReadInt(InputStream stream) throws IOException {
|
||||
byte[] bytes = stream.readNBytes(4);
|
||||
readerBuff.position(0);
|
||||
readerBuff.put(bytes);
|
||||
readerBuff.position(0);
|
||||
this.bytesRead += 4;
|
||||
return readerBuff.asIntBuffer().get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a little-endian float from the stream
|
||||
* @param stream The stream
|
||||
* @return The float
|
||||
* @throws IOException Thrown if the stream fails to read
|
||||
*/
|
||||
private float streamReadFloat(InputStream stream) throws IOException {
|
||||
byte[] bytes = stream.readNBytes(4);
|
||||
readerBuff.position(0);
|
||||
readerBuff.put(bytes);
|
||||
readerBuff.position(0);
|
||||
this.bytesRead += 4;
|
||||
return readerBuff.asFloatBuffer().get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a little-endian long from the stream
|
||||
* @param stream The stream
|
||||
* @return The long
|
||||
* @throws IOException Thrown if the stream fails to read
|
||||
*/
|
||||
private long streamReadLong(InputStream stream) throws IOException {
|
||||
byte[] bytes = stream.readNBytes(8);
|
||||
readerBuff.position(0);
|
||||
readerBuff.put(bytes);
|
||||
readerBuff.position(0);
|
||||
this.bytesRead += 8;
|
||||
return readerBuff.asLongBuffer().get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a little-endian double from the stream
|
||||
* @param stream The stream
|
||||
* @return The double
|
||||
* @throws IOException Thrown if the stream fails to read
|
||||
*/
|
||||
private double streamReadDouble(InputStream stream) throws IOException {
|
||||
byte[] bytes = stream.readNBytes(8);
|
||||
readerBuff.position(0);
|
||||
readerBuff.put(bytes);
|
||||
readerBuff.position(0);
|
||||
this.bytesRead += 8;
|
||||
return readerBuff.asDoubleBuffer().get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a GGUF-format string
|
||||
* @param stream The stream
|
||||
* @return The Java string containing the data from the GGUF-format string
|
||||
*/
|
||||
private String readGGUFString(InputStream stream) throws IOException {
|
||||
long length = this.streamReadLong(stream);
|
||||
|
||||
byte[] bytes = stream.readNBytes((int)length);
|
||||
this.bytesRead += length;
|
||||
|
||||
String rVal = new String(bytes, StandardCharsets.UTF_8);
|
||||
|
||||
return rVal;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,57 @@
|
||||
package org.studiorailgun.kobold.gguf;
|
||||
|
||||
|
||||
/**
|
||||
* A metadata key-value pair
|
||||
*/
|
||||
public class GGUFMetadataKV {
|
||||
|
||||
/**
|
||||
* Metadata value types
|
||||
*/
|
||||
public static enum GGUFMetadataValue {
|
||||
UINT8,
|
||||
INT8,
|
||||
UINT16,
|
||||
INT16,
|
||||
UINT32,
|
||||
INT32,
|
||||
FLOAT32,
|
||||
BOOL,
|
||||
STRING,
|
||||
ARRAY,
|
||||
UINT64,
|
||||
INT64,
|
||||
DOUBLE,
|
||||
}
|
||||
|
||||
/**
|
||||
* The key for the metadata value
|
||||
*/
|
||||
protected String key;
|
||||
|
||||
/**
|
||||
* The type of the value
|
||||
*/
|
||||
protected GGUFMetadataValue value_type; //stored as an int on disk
|
||||
|
||||
/**
|
||||
* The actual value
|
||||
*/
|
||||
protected Object value;
|
||||
|
||||
public String getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public GGUFMetadataValue getValue_type() {
|
||||
return value_type;
|
||||
}
|
||||
|
||||
public Object getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
51
src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java
Normal file
51
src/main/java/org/studiorailgun/kobold/gguf/GGUFModel.java
Normal file
@ -0,0 +1,51 @@
|
||||
package org.studiorailgun.kobold.gguf;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A GGUF format model
|
||||
*/
|
||||
public class GGUFModel {
|
||||
|
||||
/**
|
||||
* The header
|
||||
*/
|
||||
protected GGUFHeader header;
|
||||
|
||||
/**
|
||||
* The tensor info
|
||||
*/
|
||||
protected List<GGUFTensorInfo> tensorInfos = new LinkedList<GGUFTensorInfo>();
|
||||
|
||||
/**
|
||||
* Padding to the nearest multiple of ALIGNMENT
|
||||
*/
|
||||
protected char _padding[];
|
||||
|
||||
/**
|
||||
* The tensor data
|
||||
*/
|
||||
protected List<ByteBuffer> tensorData = new LinkedList<ByteBuffer>();
|
||||
|
||||
public GGUFHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public List<GGUFTensorInfo> getTensorInfos() {
|
||||
return tensorInfos;
|
||||
}
|
||||
|
||||
public char[] get_padding() {
|
||||
return _padding;
|
||||
}
|
||||
|
||||
public List<ByteBuffer> getTensorData() {
|
||||
return tensorData;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
18
src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java
Normal file
18
src/main/java/org/studiorailgun/kobold/gguf/GGUFString.java
Normal file
@ -0,0 +1,18 @@
|
||||
package org.studiorailgun.kobold.gguf;
|
||||
|
||||
/**
|
||||
* A gguf format string
|
||||
*/
|
||||
public class GGUFString {
|
||||
|
||||
/**
|
||||
* The length of the string
|
||||
*/
|
||||
protected long len;
|
||||
|
||||
/**
|
||||
* The string itself
|
||||
*/
|
||||
protected String string;
|
||||
|
||||
}
|
||||
378
src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java
Normal file
378
src/main/java/org/studiorailgun/kobold/gguf/GGUFTensorInfo.java
Normal file
@ -0,0 +1,378 @@
|
||||
package org.studiorailgun.kobold.gguf;
|
||||
|
||||
/**
|
||||
* Info on a tensor in the model
|
||||
*/
|
||||
public class GGUFTensorInfo {
|
||||
|
||||
/**
|
||||
* GGML types
|
||||
*/
|
||||
public static enum GGMLType {
|
||||
GGML_TYPE_F32,
|
||||
GGML_TYPE_F16,
|
||||
GGML_TYPE_Q4_0,
|
||||
GGML_TYPE_Q4_1,
|
||||
GGML_TYPE_Q5_0,
|
||||
GGML_TYPE_Q5_1,
|
||||
GGML_TYPE_Q8_0,
|
||||
GGML_TYPE_Q8_1,
|
||||
GGML_TYPE_Q2_K,
|
||||
GGML_TYPE_Q3_K,
|
||||
GGML_TYPE_Q4_K,
|
||||
GGML_TYPE_Q5_K,
|
||||
GGML_TYPE_Q6_K,
|
||||
GGML_TYPE_Q8_K,
|
||||
GGML_TYPE_IQ2_XXS,
|
||||
GGML_TYPE_IQ2_XS,
|
||||
GGML_TYPE_IQ3_XXS,
|
||||
GGML_TYPE_IQ1_S,
|
||||
GGML_TYPE_IQ4_NL,
|
||||
GGML_TYPE_IQ3_S,
|
||||
GGML_TYPE_IQ2_S,
|
||||
GGML_TYPE_IQ4_XS,
|
||||
GGML_TYPE_I8,
|
||||
GGML_TYPE_I16,
|
||||
GGML_TYPE_I32,
|
||||
GGML_TYPE_I64,
|
||||
GGML_TYPE_F64,
|
||||
GGML_TYPE_IQ1_M,
|
||||
GGML_TYPE_COUNT,
|
||||
}
|
||||
|
||||
protected String name;
|
||||
|
||||
protected int nDimensions;
|
||||
|
||||
protected long[] dimensions;
|
||||
|
||||
protected GGMLType type; //stored as an int on disk
|
||||
|
||||
protected long offset;
|
||||
|
||||
|
||||
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public int getnDimensions() {
|
||||
return nDimensions;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public long[] getDimensions() {
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public GGMLType getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public long getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Gets the GGML enum type from an int representation
|
||||
* @param enumVal The value represented as an int
|
||||
* @return The actual enum value
|
||||
*/
|
||||
protected static GGMLType getType(int enumVal){
|
||||
switch(enumVal){
|
||||
case 0: {
|
||||
return GGMLType.GGML_TYPE_F32;
|
||||
}
|
||||
case 1: {
|
||||
return GGMLType.GGML_TYPE_F16;
|
||||
}
|
||||
case 2: {
|
||||
return GGMLType.GGML_TYPE_Q4_0;
|
||||
}
|
||||
case 3: {
|
||||
return GGMLType.GGML_TYPE_Q4_1;
|
||||
}
|
||||
case 4: {
|
||||
throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_2)");
|
||||
}
|
||||
case 5: {
|
||||
throw new Error("Tensor info type of deprecated type (GGML_TYPE_Q4_3)");
|
||||
}
|
||||
case 6: {
|
||||
return GGMLType.GGML_TYPE_Q5_0;
|
||||
}
|
||||
case 7: {
|
||||
return GGMLType.GGML_TYPE_Q5_1;
|
||||
}
|
||||
case 8: {
|
||||
return GGMLType.GGML_TYPE_Q8_0;
|
||||
}
|
||||
case 9: {
|
||||
return GGMLType.GGML_TYPE_Q8_1;
|
||||
}
|
||||
case 10: {
|
||||
return GGMLType.GGML_TYPE_Q2_K;
|
||||
}
|
||||
case 11: {
|
||||
return GGMLType.GGML_TYPE_Q3_K;
|
||||
}
|
||||
case 12: {
|
||||
return GGMLType.GGML_TYPE_Q4_K;
|
||||
}
|
||||
case 13: {
|
||||
return GGMLType.GGML_TYPE_Q5_K;
|
||||
}
|
||||
case 14: {
|
||||
return GGMLType.GGML_TYPE_Q6_K;
|
||||
}
|
||||
case 15: {
|
||||
return GGMLType.GGML_TYPE_Q8_K;
|
||||
}
|
||||
case 16: {
|
||||
return GGMLType.GGML_TYPE_IQ2_XXS;
|
||||
}
|
||||
case 17: {
|
||||
return GGMLType.GGML_TYPE_IQ2_XS;
|
||||
}
|
||||
case 18: {
|
||||
return GGMLType.GGML_TYPE_IQ3_XXS;
|
||||
}
|
||||
case 19: {
|
||||
return GGMLType.GGML_TYPE_IQ1_S;
|
||||
}
|
||||
case 20: {
|
||||
return GGMLType.GGML_TYPE_IQ4_NL;
|
||||
}
|
||||
case 21: {
|
||||
return GGMLType.GGML_TYPE_IQ3_S;
|
||||
}
|
||||
case 22: {
|
||||
return GGMLType.GGML_TYPE_IQ2_S;
|
||||
}
|
||||
case 23: {
|
||||
return GGMLType.GGML_TYPE_IQ4_XS;
|
||||
}
|
||||
case 24: {
|
||||
return GGMLType.GGML_TYPE_I8;
|
||||
}
|
||||
case 25: {
|
||||
return GGMLType.GGML_TYPE_I16;
|
||||
}
|
||||
case 26: {
|
||||
return GGMLType.GGML_TYPE_I32;
|
||||
}
|
||||
case 27: {
|
||||
return GGMLType.GGML_TYPE_I64;
|
||||
}
|
||||
case 28: {
|
||||
return GGMLType.GGML_TYPE_F64;
|
||||
}
|
||||
case 29: {
|
||||
return GGMLType.GGML_TYPE_IQ1_M;
|
||||
}
|
||||
default: {
|
||||
return GGMLType.GGML_TYPE_COUNT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the size in bits of the GGML type
|
||||
* @param type The type
|
||||
* @return The number of bits that type takes
|
||||
*/
|
||||
protected static float getUnitSize(GGMLType type){
|
||||
//reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
|
||||
switch(type){
|
||||
case GGML_TYPE_F32: {
|
||||
return 32;
|
||||
}
|
||||
case GGML_TYPE_F16: {
|
||||
return 16;
|
||||
}
|
||||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q4_0: {
|
||||
return 4;
|
||||
}
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1: {
|
||||
return 5;
|
||||
}
|
||||
case GGML_TYPE_Q8_0:
|
||||
case GGML_TYPE_Q8_1: {
|
||||
return 8;
|
||||
}
|
||||
case GGML_TYPE_Q2_K: {
|
||||
return 2.5625f;
|
||||
}
|
||||
case GGML_TYPE_Q3_K: {
|
||||
return 3.4375f;
|
||||
}
|
||||
case GGML_TYPE_Q4_K: {
|
||||
return 4.5f;
|
||||
}
|
||||
case GGML_TYPE_Q5_K: {
|
||||
return 5.5f;
|
||||
}
|
||||
case GGML_TYPE_Q6_K: {
|
||||
return 6.5625f;
|
||||
}
|
||||
case GGML_TYPE_Q8_K: {
|
||||
return 8;
|
||||
}
|
||||
case GGML_TYPE_IQ2_XXS: {
|
||||
return 2.0625f;
|
||||
}
|
||||
case GGML_TYPE_IQ2_XS: {
|
||||
return 2.31f;
|
||||
}
|
||||
case GGML_TYPE_IQ3_XXS: {
|
||||
return 3.0625f;
|
||||
}
|
||||
case GGML_TYPE_IQ1_S: {
|
||||
return 1.5f;
|
||||
}
|
||||
case GGML_TYPE_IQ4_NL: {
|
||||
return 4.5f;
|
||||
}
|
||||
case GGML_TYPE_IQ3_S: {
|
||||
return 3.4375f;
|
||||
}
|
||||
case GGML_TYPE_IQ2_S: {
|
||||
return 2.5f;
|
||||
}
|
||||
case GGML_TYPE_IQ4_XS: {
|
||||
return 4.25f;
|
||||
}
|
||||
case GGML_TYPE_I8: {
|
||||
return 8;
|
||||
}
|
||||
case GGML_TYPE_I16: {
|
||||
return 16;
|
||||
}
|
||||
case GGML_TYPE_I32: {
|
||||
return 32;
|
||||
}
|
||||
case GGML_TYPE_F64:
|
||||
case GGML_TYPE_I64: {
|
||||
return 64;
|
||||
}
|
||||
case GGML_TYPE_IQ1_M: {
|
||||
return 1.75f;
|
||||
}
|
||||
case GGML_TYPE_COUNT:
|
||||
default: {
|
||||
throw new Error("Undefined type!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the block size of a given GGML type in bytes
|
||||
* @param type The type
|
||||
* @return The size of a block in the tensor data of this type in bytes
|
||||
*/
|
||||
protected static float getBlockSizeInBytes(GGMLType type){
|
||||
//reference: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
|
||||
switch(type){
|
||||
|
||||
//regular types
|
||||
case GGML_TYPE_I8: {
|
||||
return 1;
|
||||
}
|
||||
case GGML_TYPE_F16:
|
||||
case GGML_TYPE_I16: {
|
||||
return 2;
|
||||
}
|
||||
case GGML_TYPE_F32:
|
||||
case GGML_TYPE_I32: {
|
||||
return 4;
|
||||
}
|
||||
case GGML_TYPE_F64:
|
||||
case GGML_TYPE_I64: {
|
||||
return 8;
|
||||
}
|
||||
|
||||
|
||||
|
||||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q4_0: {
|
||||
return 4;
|
||||
}
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1: {
|
||||
return 5;
|
||||
}
|
||||
case GGML_TYPE_Q8_0:
|
||||
case GGML_TYPE_Q8_1: {
|
||||
return 8;
|
||||
}
|
||||
case GGML_TYPE_Q2_K: {
|
||||
return 2.5625f;
|
||||
}
|
||||
case GGML_TYPE_Q3_K: {
|
||||
return 3.4375f;
|
||||
}
|
||||
case GGML_TYPE_Q4_K: {
|
||||
return 4.5f;
|
||||
}
|
||||
case GGML_TYPE_Q5_K: {
|
||||
return 5.5f;
|
||||
}
|
||||
case GGML_TYPE_Q6_K: {
|
||||
return 6.5625f;
|
||||
}
|
||||
case GGML_TYPE_Q8_K: {
|
||||
return 8;
|
||||
}
|
||||
case GGML_TYPE_IQ2_XXS: {
|
||||
return 2.0625f;
|
||||
}
|
||||
case GGML_TYPE_IQ2_XS: {
|
||||
return 2.31f;
|
||||
}
|
||||
case GGML_TYPE_IQ3_XXS: {
|
||||
return 3.0625f;
|
||||
}
|
||||
case GGML_TYPE_IQ1_S: {
|
||||
return 1.5f;
|
||||
}
|
||||
case GGML_TYPE_IQ4_NL: {
|
||||
return 4.5f;
|
||||
}
|
||||
case GGML_TYPE_IQ3_S: {
|
||||
return 3.4375f;
|
||||
}
|
||||
case GGML_TYPE_IQ2_S: {
|
||||
return 2.5f;
|
||||
}
|
||||
case GGML_TYPE_IQ4_XS: {
|
||||
return 4.25f;
|
||||
}
|
||||
case GGML_TYPE_IQ1_M: {
|
||||
return 1.75f;
|
||||
}
|
||||
case GGML_TYPE_COUNT:
|
||||
default: {
|
||||
throw new Error("Undefined type!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,245 @@
|
||||
package org.studiorailgun.kobold.gguf.quant;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
* 2-bit quanization
|
||||
*/
|
||||
public class Quantization {
|
||||
|
||||
//reference for superblock structs: https://github.com/byroneverson/llm.cpp/blob/master/k_quants.h
|
||||
//also maybe reference: https://github.com/ggerganov/llama.cpp/blob/master/ggml/src/ggml-common.h
|
||||
//tensor encoding scheme wiki page: https://github.com/ggerganov/llama.cpp/wiki/Tensor-Encoding-Schemes
|
||||
//thread explaining quantization scheme: https://github.com/ggerganov/llama.cpp/pull/8151
|
||||
//https://github.com/ggerganov/llama.cpp/blob/75af08c475e285888f66556d0f459c533b7deb95/ggml/src/ggml-impl.h
|
||||
|
||||
/**
|
||||
* Super block size
|
||||
*/
|
||||
static final int QK_K = 256;
|
||||
|
||||
/**
|
||||
* Scale size
|
||||
*/
|
||||
static final int K_SCALE_SIZE = 12;
|
||||
|
||||
/**
|
||||
* A buffer used for type conversion
|
||||
*/
|
||||
static ByteBuffer conversionBuffer = ByteBuffer.allocate(8);
|
||||
|
||||
|
||||
/**
|
||||
* 2-bit quantization
|
||||
*/
|
||||
public static class Block_Q2_K implements Superblock {
|
||||
|
||||
/**
|
||||
* scales and mins, quantized with 4 bits
|
||||
*/
|
||||
char[] scales = new char[QK_K/16];
|
||||
|
||||
/**
|
||||
* quants
|
||||
*/
|
||||
char[] qs = new char[QK_K/4];
|
||||
|
||||
/**
|
||||
* super block scale for quantized scales
|
||||
*/
|
||||
short d;
|
||||
|
||||
/**
|
||||
* super block scale for quantized mins
|
||||
*/
|
||||
short dmin;
|
||||
|
||||
/**
|
||||
* Gets the size of the superblock
|
||||
* @return The size of the superblock
|
||||
*/
|
||||
public int getSize(){
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 3-bit quantization
|
||||
*/
|
||||
public static class Block_Q3_K implements Superblock {
|
||||
|
||||
/**
|
||||
* quants - high bit
|
||||
*/
|
||||
char[] hmask = new char[QK_K/8];
|
||||
|
||||
/**
|
||||
* quants - low 2 bits
|
||||
*/
|
||||
char[] qs = new char[QK_K/4];
|
||||
|
||||
/**
|
||||
* scales, quantized with 6 bits
|
||||
*/
|
||||
char[] scales = new char[12];
|
||||
|
||||
/**
|
||||
* super block scale
|
||||
*/
|
||||
short dmin;
|
||||
|
||||
/**
|
||||
* Gets the size of the superblock
|
||||
* @return The size of the superblock
|
||||
*/
|
||||
public int getSize(){
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 4-bit quantization
|
||||
*/
|
||||
public static class Block_Q4_K implements Superblock {
|
||||
|
||||
/**
|
||||
* Super block scale for quantized scales
|
||||
*/
|
||||
short d;
|
||||
|
||||
/**
|
||||
* Super block scale for quantized mins
|
||||
*/
|
||||
short dmin;
|
||||
|
||||
/**
|
||||
* Shales and mins, quantized with 6 bits
|
||||
*/
|
||||
char[] scales = new char[K_SCALE_SIZE];
|
||||
|
||||
/**
|
||||
* 4-bit quants
|
||||
*/
|
||||
char[] qs = new char[QK_K/2];
|
||||
|
||||
/**
|
||||
* Gets the size of the superblock
|
||||
* @return The size of the superblock
|
||||
*/
|
||||
public int getSize(){
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 5-bit quantization
|
||||
*/
|
||||
public static class Block_Q5_K implements Superblock {
|
||||
|
||||
/**
|
||||
* Super block scale for quantized scales
|
||||
*/
|
||||
short d;
|
||||
|
||||
/**
|
||||
* Super block scale for quantized mins
|
||||
*/
|
||||
short dmin;
|
||||
|
||||
/**
|
||||
* Scales and mins, quantized with 6 bits
|
||||
*/
|
||||
char[] scales = new char[K_SCALE_SIZE];
|
||||
|
||||
/**
|
||||
* quants, high bit
|
||||
*/
|
||||
char[] qh = new char[QK_K/8];
|
||||
|
||||
/**
|
||||
* quants, low 4 bits
|
||||
*/
|
||||
char[] qs = new char[QK_K/2];
|
||||
|
||||
/**
|
||||
* Gets the size of the superblock
|
||||
* @return The size of the superblock
|
||||
*/
|
||||
public int getSize(){
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 6-bit quantization
|
||||
*/
|
||||
public static class Block_Q6_K implements Superblock {
|
||||
|
||||
/**
|
||||
* Quants, lower 4 bits
|
||||
*/
|
||||
char[] ql = new char[QK_K/2];
|
||||
|
||||
/**
|
||||
* Quants, upper 2 bits
|
||||
*/
|
||||
char[] qh = new char[QK_K/4];
|
||||
|
||||
/**
|
||||
* scales, quantized with 8 bits
|
||||
*/
|
||||
char[] scales = new char[QK_K/16];
|
||||
|
||||
/**
|
||||
* super block scale
|
||||
*/
|
||||
short d;
|
||||
|
||||
/**
|
||||
* Gets the size of the superblock
|
||||
* @return The size of the superblock
|
||||
*/
|
||||
public int getSize(){
|
||||
return QK_K/2 + QK_K/4 + QK_K/16 + 2;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 8-bit quantization
|
||||
*/
|
||||
public static class Block_Q8_K implements Superblock {
|
||||
|
||||
/**
|
||||
* Delta
|
||||
*/
|
||||
float d;
|
||||
|
||||
/**
|
||||
* quants
|
||||
*/
|
||||
char[] qs = new char[QK_K];
|
||||
|
||||
/**
|
||||
* sum of the quants in groups of 16
|
||||
*/
|
||||
short[] bsums = new short[QK_K/16];
|
||||
|
||||
/**
|
||||
* Gets the size of the superblock
|
||||
* @return The size of the superblock
|
||||
*/
|
||||
public int getSize(){
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,14 @@
|
||||
package org.studiorailgun.kobold.gguf.quant;
|
||||
|
||||
/**
|
||||
* A superblock of quantized weights
|
||||
*/
|
||||
public interface Superblock {
|
||||
|
||||
/**
|
||||
* Gets the size of the superblock
|
||||
* @return The size of the superblock
|
||||
*/
|
||||
public int getSize();
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user