Spaces:

junaid17
/

cortex

Sleeping

cortex / utils.py

Upload 6 files

db1e5a4 verified 2 months ago

1.51 kB

	import os
	from uuid import uuid4
	import edge_tts
	from groq import Groq
	from dotenv import load_dotenv

	load_dotenv()

	client = Groq()

	# ==================================================
	# 🎧 SPEECH TO TEXT
	# ==================================================

	async def STT(audio_file):
	os.makedirs("uploads", exist_ok=True)
	file_path = f"uploads/{uuid4().hex}.wav"

	with open(file_path, "wb") as f:
	f.write(await audio_file.read())

	with open(file_path, "rb") as f:
	transcription = client.audio.transcriptions.create(
	file=f,
	model="whisper-large-v3-turbo",
	response_format="verbose_json",
	temperature=0.0
	)

	# Optional: cleanup the uploaded file after processing
	# os.remove(file_path)

	return {
	"text": transcription.text,
	"segments": transcription.segments,
	"language": transcription.language
	}


	# ==================================================
	# 🗣️ TEXT TO SPEECH
	# ==================================================

	async def TTS(text: str, voice: str = "en-US-AriaNeural") -> str:
	"""
	Converts text to speech and saves it to a file.
	Returns the path to the generated audio file.
	"""
	os.makedirs("outputs", exist_ok=True)
	filename = f"outputs/{uuid4().hex}.mp3"

	communicate = edge_tts.Communicate(text, voice)
	await communicate.save(filename)

	return filename