https://docs.smith.langchain.com/
import openai
from langsmith import wrappers, traceable
# Auto-trace LLM calls in-context
client = wrappers.wrap_openai(openai.Client())
@traceable # Auto-trace this function
def pipeline(user_input: str):
result = client.chat.completions.create(
messages=[{"role": "user", "content": user_input}],
model="gpt-4o-mini"
)
return result.choices[0].message.content
pipeline("Hello, world!")
from langsmith import Client, traceable
client = Client()
# Define dataset: these are your test cases
dataset = client.create_dataset(
"Sample Dataset",
description="A sample dataset in LangSmith.",
)
client.create_examples(
inputs=[
{"postfix": "to LangSmith"},
{"postfix": "to Evaluations in LangSmith"},
],
outputs=[
{"response": "Welcome to LangSmith"},
{"response": "Welcome to Evaluations in LangSmith"},
],
dataset_id=dataset.id,
)
# Define an interface to your application (tracing optional)
@traceable
def dummy_app(inputs: dict) -> dict:
return {"response": "Welcome " + inputs["postfix"]}
# Define your evaluator(s)
def exact_match(outputs: dict, reference_outputs: dict) -> bool:
return outputs["response"] == reference_outputs["response"]
# Run the evaluation
experiment_results = client.evaluate(
dummy_app, # Your AI system goes here
data=dataset, # The data to predict and grade over
evaluators=[exact_match], # The evaluators to score the results
experiment_prefix="sample-experiment", # The name of the experiment
metadata={"version": "1.0.0", "revision_id": "beta"}, # Metadata about the experiment
max_concurrency=4, # Add concurrency.
)
# Analyze the results via the UI or programmatically
# If you have 'pandas' installed you can view the results as a
# pandas DataFrame by uncommenting below:
experiment_results.to_pandas()