Skip to content

Commit

Permalink
adding a cookbook for finetune embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
taha-aiplanet authored and tarun-aiplanet committed May 7, 2024
1 parent 03e4990 commit 2210cc5
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions cookbook/finetuning_embedding_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from beyondllm import source, retrieve, llms
from beyondllm.embeddings import FineTuneEmbeddings
import os


# Setting up an environment variable for API key
os.environ['GOOGLE_API_KEY'] = "your-api-key"

# Importing and preparing the data
data = source.fit("build-career-in-ai.pdf", dtype="pdf", chunk_size=1024, chunk_overlap=0)

# List of files to train the embeddings
list_of_files = ['build-career-in-ai.pdf']

# Initializing a Gemini LLM model
llm = llms.GeminiModel()

# Creating an instance of FineTuneEmbeddings
fine_tuned_model = FineTuneEmbeddings()

# Training the embedding model
embed_model = fine_tuned_model.train(list_of_files, "BAAI/bge-small-en-v1.5", llm, "fintune")

# Option to load an already fine-tuned model
# embed_model = fine_tuned_model.load_model("fintune")

# Creating a retriever using the fine-tuned embeddings
retriever = retrieve.auto_retriever(data, embed_model, type="normal", top_k=4)

# Retrieving information using a query
print(retriever.retrieve("How to excel in AI?"))

0 comments on commit 2210cc5

Please sign in to comment.