import { LLMLingua2 } from "@atjsh/llmlingua-2";
import { Tiktoken } from "js-tiktoken/lite";
import o200k_base from "js-tiktoken/ranks/o200k_base";
const modelName = "Arcoldd/llmlingua4j-bert-base-onnx";
const oai_tokenizer = new Tiktoken(o200k_base);
const { promptCompressor } = await LLMLingua2.WithBERTMultilingual(modelName,
{
transformerJSConfig: {
device: "auto",
dtype: "fp32",
},
oaiTokenizer: oai_tokenizer,
modelSpecificOptions: {
subfolder: "",
},
}
);
const compressedText: string = await promptCompressor.compress_prompt(
"LLMLingua-2, a small-size yet powerful prompt compression method trained via data distillation from GPT-4 for token classification with a BERT-level encoder, excels in task-agnostic compression. It surpasses LLMLingua in handling out-of-domain data, offering 3x-6x faster performance.",
{ rate: 0.8 }
);
console.log({ compressedText });
Factory functions to create instances of LLMLingua-2 PromptCompressor with BERT Multilingual model.