from sentence_transformers import SentenceTransformer, util
model_path = "ibm-granite/granite-embedding-311m-multilingual-r2"
# Load the Sentence Transformer model
model = SentenceTransformer(model_path)
input_queries = [
'What is the tallest mountain in Japan?', # English query
'Wer hat das Lied Achy Breaky Heart geschrieben?', # German query
'ドイツの首都はどこですか?', # Japanese query
]
input_passages = [
"富士山は、静岡県と山梨県にまたがる活火山で、標高3776.12 mで日本最高峰の独立峰である。", # Japanese passage
"Achy Breaky Heart is a country song written by Don Von Tress. Originally titled Don't Tell My Heart and performed by The Marcy Brothers in 1991.", # English passage
"Berlin ist die Hauptstadt und ein Land der Bundesrepublik Deutschland. Die Stadt ist mit rund 3,7 Millionen Einwohnern die bevölkerungsreichste Kommune Deutschlands.", # German passage
]
# Cross-lingual retrieval: each query should score highest with its matching passage in a different language
query_embeddings = model.encode(input_queries)
passage_embeddings = model.encode(input_passages)
# calculate cosine similarity — expect high scores on the diagonal (EN→JA, DE→EN, JA→DE)
print(util.cos_sim(query_embeddings, passage_embeddings))
# output: tensor([[0.9393, 0.6899, 0.7627],
# [0.6780, 0.9598, 0.7062],
# [0.7818, 0.7342, 0.9172]])