41 lines
1.2 KiB
Python
41 lines
1.2 KiB
Python
"""A simple LLM chatbot"""
|
|
import argparse
|
|
from llama_cpp import Llama
|
|
from rich.console import Console
|
|
|
|
|
|
def create_arg_parser():
|
|
"""Create and return the argument parser."""
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-m", "--model", type=str, default="./zephyr-7b-alpha.Q3_K_M.gguf")
|
|
return parser.parse_args()
|
|
|
|
|
|
def get_llama_response(llm, prompt):
|
|
"""Get response from Llama model."""
|
|
try:
|
|
output = llm(prompt, max_tokens=60, stop=["Q:", "\n"], echo=True)
|
|
return output.get('choices', [{}])[0].get('text', "No response generated.")
|
|
except Exception as e:
|
|
return f"Error generating response: {e}"
|
|
|
|
|
|
def main():
|
|
"""Main function to run the chatbot."""
|
|
args = create_arg_parser()
|
|
llm = Llama(model_path=args.model, verbose=False)
|
|
console = Console()
|
|
|
|
while True:
|
|
user_input = console.input("[bold cyan]Your question: [/bold cyan]")
|
|
if user_input.lower() in ['exit', 'quit']:
|
|
break
|
|
|
|
prompt = f"Question: {user_input} Answer: "
|
|
response_text = get_llama_response(llm, prompt)
|
|
console.print(f"[blue]Answer: {response_text}[/blue]")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|