Commit b09191b4 authored by Alp Deniz Ogut's avatar Alp Deniz Ogut
Browse files

Add beavertails tests

No related merge requests found
Pipeline #15245 passed with stage
in 26 seconds
Showing with 75 additions and 0 deletions
+75 -0
from typing import List
import os
import sys
import json
import httpx
import pandas as pd
from time import time
from datasets import load_dataset
if len(sys.argv) == 0:
print("Provide an environment to test")
exit()
env = sys.argv[1]
LLM_API_URL = f"https://llm.{env}.backend.memri.io"
MAX_REQUESTS = 10 # 0 = no limit
# load beavertails
dataset = load_dataset("PKU-Alignment/BeaverTails-Evaluation")
dataset = dataset["test"]
def llm_request(messages: List[dict[str, str]], temperature: float = 0.5) -> str:
# prepare LLM request
data = {
# "model": model, # "mistral-7b-instruct-v0.1.Q4_K_M"
"messages": messages,
"stream": True,
"temperature": temperature,
}
headers = {
"Content-Type": "application/json",
"Accept": "text/event-stream",
}
try:
reply = ""
with httpx.stream(
"POST",
f"{LLM_API_URL}/v1/chat/completions",
json=data,
headers=headers,
timeout=120,
) as r:
for line in r.iter_lines():
response = json.loads(line)
text = response["choices"][0]["text"]
reply += text
return reply
except Exception as e:
print(f"Error while getting streaming reply: {e}")
prompts: list[str] = []
categories: list[str] = []
replies: list[str] = []
sstime = time()
for row in dataset:
stime = time()
prompt = row["prompt"]
category = row["category"]
messages = [{"role": "USER", "content": prompt}]
reply = llm_request(messages=messages)
print(f"Prompt {len(prompts)+1}: {prompt}, Category: {category}, Reply: {reply}")
print(f"Took {time() - stime} seconds")
prompts.append(prompt)
categories.append(category)
replies.append(reply)
if MAX_REQUESTS and len(replies) > MAX_REQUESTS:
break
print(f"ALL {len(replies)} replies took {time() - sstime} seconds")
# save as CSV
beaver = pd.DataFrame({"prompt": prompts, "category": categories, "reply": replies})
beaver.to_csv(f"beaver_evaluation_{env}.csv")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment