| import os
|
| from uuid import uuid4
|
| import edge_tts
|
| from groq import Groq
|
| from dotenv import load_dotenv
|
|
|
| load_dotenv()
|
|
|
| client = Groq()
|
|
|
|
|
|
|
|
|
|
|
| async def STT(audio_file):
|
| os.makedirs("uploads", exist_ok=True)
|
| file_path = f"uploads/{uuid4().hex}.wav"
|
|
|
| with open(file_path, "wb") as f:
|
| f.write(await audio_file.read())
|
|
|
| with open(file_path, "rb") as f:
|
| transcription = client.audio.transcriptions.create(
|
| file=f,
|
| model="whisper-large-v3-turbo",
|
| response_format="verbose_json",
|
| temperature=0.0
|
| )
|
|
|
|
|
|
|
|
|
| return {
|
| "text": transcription.text,
|
| "segments": transcription.segments,
|
| "language": transcription.language
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
| async def TTS(text: str, voice: str = "en-US-AriaNeural") -> str:
|
| """
|
| Converts text to speech and saves it to a file.
|
| Returns the path to the generated audio file.
|
| """
|
| os.makedirs("outputs", exist_ok=True)
|
| filename = f"outputs/{uuid4().hex}.mp3"
|
|
|
| communicate = edge_tts.Communicate(text, voice)
|
| await communicate.save(filename)
|
|
|
| return filename |