import os
import openai
from RealtimeTTS import TextToAudioStream, AzureEngine
from RealtimeSTT import AudioToTextRecorder

if __name__ == '__main__':
    # Initialize OpenAI key
    openai.api_key = os.environ.get("OPENAI_API_KEY")

    # Text-to-Speech Stream Setup
    stream = TextToAudioStream(

        # Alternatives: SystemEngine or ElevenlabsEngine
        AzureEngine(
            os.environ.get("AZURE_SPEECH_KEY"),
            os.environ.get("AZURE_SPEECH_REGION"),
        ),
        log_characters=True
    )

    # Speech-to-Text Recorder Setup
    recorder = AudioToTextRecorder(
        model="medium",
        language="en",
        wake_words="Jarvis",
        spinner=True,
        wake_word_activation_delay=5
    )

    system_prompt_message = {
        'role': 'system',
        'content': 'Answer precise and short with the polite sarcasm of a butler.'
    }

    def generate_response(messages):
        """Generate assistant's response using OpenAI."""
        for chunk in openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages, stream=True):
            text_chunk = chunk["choices"][0]["delta"].get("content")
            if text_chunk:
                yield text_chunk

    history = []

    def main():
        """Main loop for interaction."""
        while True:
            # Capture user input from microphone
            user_text = recorder.text().strip()

            if not user_text:
                continue

            print(f'>>> {user_text}\n<<< ', end="", flush=True)
            history.append({'role': 'user', 'content': user_text})

            # Get assistant response and play it
            assistant_response = generate_response([system_prompt_message] + history[-10:])
            stream.feed(assistant_response).play()

            history.append({'role': 'assistant', 'content': stream.text()})

    if __name__ == "__main__":
        main()