import requests
import time

# Benchmark LLM inference
response = requests.post("http://localhost:1234/v1/completions", json={
    "prompt": "Hello, how are you?",
    "max_tokens": 100
})