translator.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import os
  2. import openai
  3. from RealtimeSTT import AudioToTextRecorder
  4. from RealtimeTTS import TextToAudioStream, AzureEngine
  5. if __name__ == '__main__':
  6. # Setup OpenAI API key
  7. openai.api_key = os.environ.get("OPENAI_API_KEY")
  8. # Text-to-Speech Stream Setup (alternative engines: SystemEngine or ElevenlabsEngine)
  9. engine = AzureEngine(
  10. os.environ.get("AZURE_SPEECH_KEY"),
  11. os.environ.get("AZURE_SPEECH_REGION")
  12. )
  13. stream = TextToAudioStream(engine, log_characters=True)
  14. # Speech-to-Text Recorder Setup
  15. recorder = AudioToTextRecorder(
  16. model="medium",
  17. )
  18. # Supported languages and their voices
  19. languages = [
  20. ["english", "AshleyNeural"],
  21. ["german", "AmalaNeural"],
  22. ["french", "DeniseNeural"],
  23. ["spanish", "EstrellaNeural"],
  24. ["portuguese", "FernandaNeural"],
  25. ["italian", "FabiolaNeural"]
  26. ]
  27. def generate_response(messages):
  28. """Generate assistant's response using OpenAI."""
  29. for chunk in openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages, stream=True):
  30. text_chunk = chunk["choices"][0]["delta"].get("content")
  31. if text_chunk:
  32. yield text_chunk
  33. def clear_console():
  34. os.system('clear' if os.name == 'posix' else 'cls')
  35. def select_language():
  36. """Display language options and get user's choice."""
  37. for index, language in enumerate(languages, start=1):
  38. print(f"{index}. {language[0]}")
  39. language_number = input("Select language to translate to (1-6): ")
  40. return languages[int(language_number) - 1]
  41. def main():
  42. """Main translation loop."""
  43. clear_console()
  44. language_info = select_language()
  45. engine.set_voice(language_info[1])
  46. system_prompt_message = {
  47. 'role': 'system',
  48. 'content': f'Translate the given text to {language_info[0]}. Output only the translated text.'
  49. }
  50. while True:
  51. print("\nSay something!")
  52. # Capture user input from microphone
  53. user_text = recorder.text()
  54. print(f"Input text: {user_text}")
  55. user_message = {'role': 'user', 'content': user_text}
  56. # Get assistant response and play it
  57. translation_stream = generate_response([system_prompt_message, user_message])
  58. print("Translation: ", end="", flush=True)
  59. stream.feed(translation_stream)
  60. stream.play()
  61. main()