From 27b9d535160c5eb5844441ab1a3b2ae676fa350e Mon Sep 17 00:00:00 2001 From: Isaak Date: Tue, 21 Nov 2023 00:40:09 +0100 Subject: [PATCH] it works! --- main.py | 138 ++++++++++---------------------------------------------- 1 file changed, 24 insertions(+), 114 deletions(-) diff --git a/main.py b/main.py index 638c065..ec0a51c 100644 --- a/main.py +++ b/main.py @@ -1,130 +1,40 @@ -from pathlib import Path -import requests -import llama_cpp_python as llm +"""A simple LLM chatbot""" +import argparse +from llama_cpp import Llama from rich.console import Console -from rich.layout import Layout -from rich.panel import Panel -from rich.text import Text -def download_model(url, model_path): - """ - Download the GGUF model from the given URL if it's not present locally. - Parameters: - url (str): URL to download the model from. - model_path (Path): Local path to save the downloaded model. - """ - if not model_path.exists(): - try: - response = requests.get(url, allow_redirects=True) - response.raise_for_status() - model_path.write_bytes(response.content) - print(f"Model downloaded to {model_path}") - except requests.RequestException as e: - print(f"Error downloading the model: {e}") +def create_arg_parser(): + """Create and return the argument parser.""" + parser = argparse.ArgumentParser() + parser.add_argument("-m", "--model", type=str, default="./zephyr-7b-alpha.Q3_K_M.gguf") + return parser.parse_args() -def load_model(model_path): - """ - Load a GGUF format model for the chatbot. - Parameters: - model_path (Path): Path to the GGUF model file. +def get_llama_response(llm, prompt): + """Get response from Llama model.""" + try: + output = llm(prompt, max_tokens=60, stop=["Q:", "\n"], echo=False) + return output.get('choices', [{}])[0].get('text', "No response generated.") + except Exception as e: + return f"Error generating response: {e}" - Returns: - Model: The loaded GGUF model. - """ - return llm.load(str(model_path)) - -def generate_text(model, prompt, max_tokens=256, temperature=0.1, top_p=0.5, echo=False, stop=None): - """ - Generate a response from the LLM based on the given prompt. - - Parameters: - model: The loaded GGUF model. - prompt (str): The input prompt for the model. - max_tokens (int): Maximum number of tokens for the response. - temperature (float): Token sampling temperature. - top_p (float): Nucleus sampling parameter. - echo (bool): If True, the prompt is included in the response. - stop (list): Tokens at which the model should stop generating text. - - Returns: - str: The generated text response. - """ - if stop is None: - stop = ["#"] - output = model.generate( - prompt, - max_tokens=max_tokens, - temperature=temperature, - top_p=top_p, - echo=echo, - stop=stop, - ) - return output["choices"][0]["text"].strip() - -def generate_prompt_from_template(user_input): - """ - Format the user input into a prompt suitable for the chatbot. - - Parameters: - user_input (str): The user's input message. - - Returns: - str: The formatted prompt for the chatbot. - """ - return f"You are a helpful chatbot.\n{user_input}" - -def create_chat_layout(): - """ - Create the layout for the chatbot interface using rich. - - Returns: - Layout: The layout object for the chat interface. - """ - layout = Layout() - - layout.split( - Layout(name="header", size=3), - Layout(ratio=1, name="main"), - Layout(name="footer", size=3) - ) - - layout["header"].update(Panel("[bold magenta]Llama.cpp Chatbot[/]", style="bold blue")) - layout["footer"].update(Text("Type your message and press [bold green]Enter[/]. Type 'exit' to end the chat.", justify="center")) - return layout def main(): - """ - The main function to run the chatbot. - """ - model_path = Path("zephyr-7b-alpha.Q3_K_M.gguf") - model_url = "https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/raw/main/zephyr-7b-alpha.Q3_K_M.gguf" - - if not model_path.exists(): - print("Model not found locally. Downloading...") - download_model(model_url, model_path) - - model = load_model(model_path) - + """Main function to run the chatbot.""" + args = create_arg_parser() + llm = Llama(model_path=args.model, verbose=False) console = Console() - layout = create_chat_layout() - console.print(layout) - - chat_history = "" while True: - user_input = console.input("[bold green]You: [/]") - if user_input.lower() == "exit": + user_input = console.input("[bold cyan]Your question: [/bold cyan]") + if user_input.lower() in ['exit', 'quit']: break - prompt = generate_prompt_from_template(user_input) - bot_response = generate_text(model, prompt, max_tokens=356) - - chat_history += f"[bold green]You:[/] {user_input}\n[bold yellow]Bot:[/] {bot_response}\n" - chat_panel = Panel(chat_history, title="Chat History") - layout["main"].update(chat_panel) - console.print(layout) + prompt = f"Question: {user_input} Answer: " + response_text = get_llama_response(llm, prompt) + console.print(f"[blue]Answer: {response_text}[/blue]") + if __name__ == "__main__": main()