import time import ollama from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import PlainTextResponse, FileResponse from pydantic import BaseModel import uvicorn import os from annoy import AnnoyIndex import numpy as np # Declare constants for the model names CHAT_MODEL = 'llama3' # For generating text responses EMBEDDING_MODEL = 'nomic-embed-text' # For generating 384-dimensional embeddings PERSONALITY_FILE = "personality.txt" # The file containing the system prompt or personality ANNOY_INDEX_FILE = "memory.ann" VECTOR_DIM = 384 # Annoy index dimensionality # Initialize FastAPI app app = FastAPI() # CORS middleware setup app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allow all origins, or specify a domain. allow_credentials=True, allow_methods=["*"], # Allow all HTTP methods. allow_headers=["*"], # Allow all headers. ) # Serve static files from the "static" directory app.mount("/static", StaticFiles(directory="static"), name="static") # Initialize Annoy index annoy_index = AnnoyIndex(VECTOR_DIM, 'angular') # Check if Annoy index exists and load it if present if os.path.exists(ANNOY_INDEX_FILE): annoy_index.load(ANNOY_INDEX_FILE) # Define the PromptModel class PromptModel(BaseModel): prompt: str # Ensure the prompt field is a string # Load the system prompt/personality from the file def load_personality(): try: with open(PERSONALITY_FILE, 'r') as file: return file.read().strip() except FileNotFoundError: return "Default system instructions or personality prompt." # Dummy function to simulate the chat model interaction (using llama3) def chat_with_model(system_prompt, user_prompt): # This is where you'd integrate with the actual AI model, such as llama3. # For now, it returns a dummy response. return f"System says: {system_prompt}. You said: {user_prompt}" # Store conversation and its embedding into Annoy index def store_conversation(prompt, response): # Simulate generating an embedding from the prompt/response embedding = np.random.rand(VECTOR_DIM).tolist() # Dummy embedding generation # Add the new item to the in-memory Annoy index (rebuilding from scratch) global annoy_index annoy_index = AnnoyIndex(VECTOR_DIM, 'angular') # Rebuild a fresh index annoy_index.add_item(annoy_index.get_n_items(), embedding) # Save the updated index annoy_index.build(10) # Build the index with 10 trees annoy_index.save(ANNOY_INDEX_FILE) # Retrieve similar conversations (dummy implementation) def get_similar_conversations(prompt): # Dummy similar conversations retrieval return [] # FastAPI route to handle prompt input @app.post("/send_prompt/") def send_prompt(data: PromptModel): prompt = data.prompt # Load personality/system prompt from file system_prompt = load_personality() # Prepare the conversation with the system prompt and the user's input convo = [ {'role': 'system', 'content': system_prompt}, # System instructions, only for AI's context {'role': 'user', 'content': prompt} # User input ] # Get the AI response (using a proper model integration) response = ollama.chat(CHAT_MODEL, convo)['message']['content'] # Store the new conversation (user prompt and response) store_conversation(prompt, response) # Return only the response, not the system prompt return PlainTextResponse(response) # Serve the index.html file at the root URL @app.get("/") def read_root(): return FileResponse('static/index.html') # Start the FastAPI app if __name__ == "__main__": uvicorn.run(app, host="127.0.0.1", port=8000)