import requests import time # Benchmark LLM inference response = requests.post("http://localhost:1234/v1/completions", json={ "prompt": "Hello, how are you?", "max_tokens": 100 })