#!/usr/bin/python3
# -*- coding: utf-8 -*-
import sys
import subprocess
import http.client
import mimetypes
import uuid
import json
import os
from dictionary import dictionary

def transcribe_file(api_key, file_path, model_id="scribe_v1"):
    boundary = uuid.uuid4().hex
    CRLF = "\r\n"

    # Prepare multipart/form-data body
    parts = []

    # File part
    filename = os.path.basename(file_path)
    with open(file_path, "rb") as f:
        file_content = f.read()
    mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"

    parts.append(
        f'--{boundary}{CRLF}'
        f'Content-Disposition: form-data; name="file"; filename="{filename}"{CRLF}'
        f'Content-Type: {mime_type}{CRLF}{CRLF}'.encode("utf-8")
        + file_content + CRLF.encode("utf-8")
    )

    # Model id part
    parts.append(
        f'--{boundary}{CRLF}'
        f'Content-Disposition: form-data; name="model_id"{CRLF}{CRLF}'
        f'{model_id}{CRLF}'.encode("utf-8")
    )

    # End boundary
    parts.append(f'--{boundary}--{CRLF}'.encode("utf-8"))

    body = b"".join(parts)

    # Make request
    conn = http.client.HTTPSConnection("api.elevenlabs.io")
    headers = {
        "xi-api-key": api_key,
        "Content-Type": f"multipart/form-data; boundary={boundary}",
        "Content-Length": str(len(body)),
    }
    conn.request("POST", "/v1/speech-to-text", body, headers)
    resp = conn.getresponse()
    data = resp.read()
    conn.close()

    try:
        return json.loads(data.decode("utf-8"))
    except Exception:
        return {"status_code": resp.status, "text": data.decode("utf-8", errors="ignore")}

def convert_transcription_to_code(transcription):
    words = [word["text"].lower().strip(" ,.!") for word in transcription.get("words", []) if word.get("type") == 'word']

    output = []
    skip = 0

    i = 0
    while i < len(words):
        matched = False
        for key_tuple, symbol in dictionary.items():
            n = len(key_tuple)
            if words[i:i+n] == list(key_tuple):
                output.append((symbol, "symbol"))
                i += n
                matched = True
                break
        if not matched:
            output.append((words[i], "word"))
            i += 1

    output_code = ""
    previous_type = None
    for (content, type) in output:
        if previous_type == "word" and type == "word":
            output_code += " "
        output_code += content
        previous_type = type

    return output_code

if __name__ == "__main__":
    if len(sys.argv) < 4:
        print("Usage: {} API_KEY IN_FILE_PATH OUT".format(sys.argv[0]))
        sys.exit(1)

    api_key = sys.argv[1]
    file_path = sys.argv[2]
    out_path = sys.argv[3]

    result = transcribe_file(api_key, file_path)

    code = convert_transcription_to_code(result)
    tmp_path = "/tmp/code.cpp"
    with open(tmp_path, "w", encoding="utf-8") as out:
        out.write(code)
        
    cmd = [
        "/usr/bin/g++",
        "-O3",
        "-static",
        "-std=c++20",
        tmp_path,
        "-o",
        out_path
    ]
    subprocess.check_call(cmd)

