Built a program using a local LLM that requires the wake word i trained to listen to user questions, which then feeds the input into the LLM and the output is fed into the elevenlabs API for the speech to text.
My issue is that when the program first runs it works perfectly fine, waits for the wakeword to start listening, after that it basically ignores the need for a wakeword and just records everything it hears.
Text Processing
def process_text(text):
global chat_history
global detected
if not text.strip():
print("No speech detected. Skipping...")
return
text = fix_acronyms(text)
print(f"Recorded text: {text}")
#print("Hellooooo")
#Add to history
chat_history += f"\nUser: {text}\n"
chat_history = trim_chat_history_to_token_limit(chat_history, prompt_template)
#print(chat_history.splitlines()[-1])
chat_recent = chat_history.splitlines()[-1]
# Main model response
prompt = f"{prompt_template}\n{chat_recent}Assistant:"
response = llm(prompt, stop=["User:", "</s>", "[INST]", "[/INST]","Assistant:"], max_tokens=200)
assistant_reply = response["choices"][0]["text"].strip()
locc = ""
for char in assistant_reply:
if char == ":":
break
else:
locc +=char
low_conf= "Unfortunately,"
words = assistant_reply.split()
#print(words)
####################### WEB SEARCH CAPABILITY ######################
# Can be used to use DuckDuckGo for looking up the question when the llms confidence is low
# Outputs the first web result it finds
#If removed, the AI will instead apologise and state that they are unsure of the answer instead.
elif low_conf in words:
print(" Low confidence detected ; using web search...")
global search_query
global yay
search_query= locc
results = search_client.text(
keywords=search_query,
region="wt_wt",
safesearch="off",
max_results=1
)
print(results)
output= results[0]["body"]
print(f"\nAssistant says:" + results[0]["body"])
# play_response(output)
# start_recorder()
#######################################################################################
else:
print(f"\nAssistant says: {assistant_reply}")
# play_response(assistant_reply)
# start_recorder()
STT function
def start_recorder():
global detected
detected = False
def on_wakeword_detected():
print("\nWake word detected!")
global detected
detected = True
def on_recording_start():
time.sleep(0.5)
print("Recording...")
def on_wakeword_detection_start():
print('\nSay "Hello Wolfy" to begin.')
def on_wakeword_timeout():
global detected
if not detected:
print(f"Timeout. Say 'Hello Wolfy' to begin.")
with AudioToTextRecorder(
wake_words="Hello_wolfy!", # MUST match your ONNX label
openwakeword_model_paths=onnx_path,
openwakeword_inference_framework="onnx",
wakeword_backend="oww",
on_wakeword_detected=on_wakeword_detected,
on_recording_start=on_recording_start,
on_wakeword_timeout=on_wakeword_timeout,
on_wakeword_detection_start=on_wakeword_detection_start,
post_speech_silence_duration=1,
min_length_of_recording=2.0,
wake_word_activation_delay=0.5,
) as recorder:
while (True):
text = recorder.text()
if text:
process_text(text)
time.sleep(1)
I can recognise that my issue is that the function start_recorder() is only called once and so a wakeword is only needed once, but i have no clue how to seperate the need for a wakeword everytime from just straight up having to rebuild the recorder everytime the audio stream from the speech to text finishes.
Built a program using a local LLM that requires the wake word i trained to listen to user questions, which then feeds the input into the LLM and the output is fed into the elevenlabs API for the speech to text.
My issue is that when the program first runs it works perfectly fine, waits for the wakeword to start listening, after that it basically ignores the need for a wakeword and just records everything it hears.
Text Processing
STT function
I can recognise that my issue is that the function start_recorder() is only called once and so a wakeword is only needed once, but i have no clue how to seperate the need for a wakeword everytime from just straight up having to rebuild the recorder everytime the audio stream from the speech to text finishes.