-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGroq_cURL.ahk
More file actions
197 lines (167 loc) · 11.5 KB
/
Groq_cURL.ahk
File metadata and controls
197 lines (167 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#Requires AutoHotkey v2
#SingleInstance
SetCapsLockState "AlwaysOff" ; Optional
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/*
Replace "yourChoiceofAPIkey" with working API key.
Replace path-to-file with actual path and file name. "Username" is just placeholding.
Download phiola (lightweight, portable audio recorder with convenient CLI controls) at https://github.com/stsaz/phiola.
F2 - F4 are for testing purposes. Once CMD window disappears, press Ctrl-V in any text field to observe results.
For transcription demonstration, audio file (e.g., WhisperAudioTest.m4a) should be pre-recorded, unless using PTT.
PTT function has been assigned to CapsLock. This, of course, could be changed. If cursor has focus in a text field, transcribed output would be auto-pasted at the cursor. However, SendEvent "{Ctrl down}v{Ctrl up}" may not work well for pasting within certain windows. Can try: Send "^v" instead; or toggle depending on WinTitle.
Switching syntax to AHK v1, if necessary, is straightforward ... mainly just need to note that double-quote escaping in v1 is different.
If working behind a proxy server, will need to update cURL command flags accordingly.
*/
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Groq Related
Global API_Key := "yourChoiceofAPIkey"
Global Chat_Endpoint := "https://api.groq.com/openai/v1/chat/completions"
Global Transcription_Endpoint := "https://api.groq.com/openai/v1/audio/transcriptions"
Global Chat_Model := "mixtral-8x7b-32768"
Global Chat_PromptFile := "C:\Users\Username\Desktop\Promtjson.txt" ; Optional and better able to deal with escaping in json strings (https://developer.zendesk.com/documentation/api-basics/getting-started/installing-and-using-curl/#move-json-data-to-a-file)
Global Transcription_Model := "whisper-large-v3"
Global Transcription_Language := "en"
Global Transcription_ResponseFormat := "text"
Global Transcription_Prompt := "no cap, cap, no space, open paren, close paren, left paren, right paren, ellipsis, colon mark, number one, spacebar, new paragraph, comma, literal period, period"
Global Transcription_AudioTestFile := "C:\Users\Username\Desktop\WhisperAudioTest.m4a"
Global Transcription_AudioOverwrittenFile := "C:\Users\Username\Desktop\WhisperAudio.m4a"
Global Chat_Prompt := "Write out a book title or two from Oscar Wilde's Happy Prince"
Chat_Prompt := RegExReplace(Chat_Prompt, "\R", "\n")
Chat_Prompt := Trim(Chat_Prompt)
Chat_Prompt := StrReplace(Chat_Prompt, "`"", "\\\`"") ; for escaping purposes inside cURL
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; cURL Related
Global Curl_Command := "curl"
;;Global Curl_Command := "curl -x `"http://xxx.xx.x.xxxx:xxxx`"" ; If behind a proxy server
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Misc Windows Shell Related
Global Pipe_toClip := "| clip"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Phiola Related
Path_toPhiola := "C:\Users\Username\Desktop\phiola-2\phiola.exe"
Phiola_Remote_Record := Path_toPhiola . " -Background record -f -o " . Transcription_AudioOverwrittenFile . " -remote"
Phiola_Remote_Stop := Path_toPhiola . " remote stop"
;;ListeningTrayIconFile := "Path-to-TrayIconFile" ; Useful as a Mic On indicator
F2:: ; Test to ensure cURL works ... add cURL flags as required.
{
Run A_ComSpec ' /C curl https://api.groq.com/openai/v1/chat/completions -H "Content-Type: application/json" -H "Authorization: Bearer yourChoiceofAPIkey" -d "{\"model\": \"mixtral-8x7b-32768\", \"messages\": [{\"role\": \"assistant\", \"content\": \"Write out a random aphorism by Ludwig Wittgenstein\"}]}" | clip'
}
F3:: ; For testing
{
Run A_ComSpec ' /C curl https://api.groq.com/openai/v1/audio/transcriptions -H "Authorization: Bearer yourChoiceofAPIkey" -H "Content-Type: multipart/form-data" -F model="whisper-large-v3" -F response_format="text" -F file="@C:/Users/Username/Desktop/WhisperAudioTest.m4a" -F prompt="comma, period, new paragraph" | clip'
}
F4:: ; For testing
{
Run A_ComSpec " /C " ChatCurling()
}
F5:: ; Useful for re-sending recording if results from first-pass are compromised by "hallucination", etc.
{
Run A_ComSpec " /C " TranscriptionCurling()
;; Clipwait for content, then PostProcessing() as desired.
;; Wait until CMD window disappears, then press Ctrl-v to see output right at the cursor (if focused in a text field)
}
;; SC029:: ; Scan code for Tilde (~)
SC03A:: ; Scan code for CAPSLOCK
{
WinID_Current := WinExist("A")
A_Clipboard := ""
Run A_ComSpec " /C " Phiola_Remote_Record,, "Hide"
;; TraySetIcon(ListeningTrayIconFile)
;; Keywait "SC029"
KeyWait "SC03A"
Send "{Blind}{Control up}{Alt up}{Shift up}"
Sleep 500
Run A_ComSpec " /C " Phiola_Remote_Stop,, "Hide"
Sleep 300
Run A_ComSpec " /C " TranscriptionCurling(),, "Hide"
if !ClipWait(20)
{
MsgBox "Transcription did not happen for some reason despite waiting for 20s."
Return
}
;; TraySetIcon("*") ; Restore default AHK icon
;; Sleep 50
PostProcessing() ; Optional but desirable
WinActivate "ahk_id " WinID_Current
SendEvent "{Ctrl down}v{Ctrl up}" ; Send "^v" may work better in certain places
}
Join(sep, params*)
{
For index, param in params
str .= param . sep
Return str
}
ChatCurling()
{
Key_Header := "-H `"Authorization: Bearer " . API_Key . "`""
Chat_ContentType_Header := "-H `"Content-Type: application/json" . "`""
Chat_Data := "-d " . "`"{\`"model\`": \`"" . Chat_Model . "\`", \`"messages\`": [{\`"role\`": \`"assistant\`", \`"content\`": \`"" . Chat_Prompt . "\`"}]}`""
;; Chat_Data := "-d " . "`"@" . Chat_PromptFile . "`""
Return Join(A_Space, Curl_Command, Chat_Endpoint, Key_Header, Chat_ContentType_Header, Chat_Data, Pipe_toClip)
}
TranscriptionCurling()
{
Key_Header := "-H `"Authorization: Bearer " . API_Key . "`""
Transcription_ContentType_Header := "-H `"Content-Type: multipart/form-data" . "`""
Transcription_Model_Form := "-F model=" . "`"" . Transcription_Model . "`""
Transcription_Language_Form := "-F language=" . "`"" . Transcription_Language . "`""
Transcription_ResponseFormat_Form := "-F response_format=" . "`"" . Transcription_ResponseFormat . "`""
Transcription_AudioFile_Form := "-F file=" . "`"@" . Transcription_AudioOverwrittenFile . "`""
Transcription_Prompt_Form := "-F prompt=" . "`"" . Transcription_Prompt . "`""
Return Join(A_Space, Curl_Command, Transcription_Endpoint, Key_Header, Transcription_ContentType_Header, Transcription_Model_Form, Transcription_Language_Form, Transcription_ResponseFormat_Form, Transcription_AudioFile_Form, Transcription_Prompt_Form, Pipe_toClip)
}
PostProcessing()
{
;; Reference https://www.autohotkey.com/docs/v2/misc/RegEx-QuickRef.htm
;; Below is "empirically validated", not by any means "optimized".
Temp_String := A_Clipboard
Temp_String := RegExReplace(Temp_String, "i)(come on[,.]*|come out[,.]*|come up[,.]*)", "comma")
Temp_String := RegExReplace(Temp_String, "([.])(\w)", "decimalpointdot$2")
Temp_String := RegExReplace(Temp_String, "[.,]", "")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(literal[\s-]period)( |\b)", "$1prddot$3")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(exclamation)([\s-]*)(mark)*([!])( |\b|$)", "!")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(\Q! exclamation\E)([\s-]*)(mark)*( |\b)", "!")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(\Qquestion mark?\E|\Q? question mark\E)( |\b|$)", "?")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(quotation[\s-]mark[[:blank:]]?|open[\s-]quote[[:blank:]]?|close[d]*[\s-]quote[[:blank:]]?|left[\s-]quote[[:blank:]]?|right[\s-]quote[[:blank:]]?)(\b)", "`"$3")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(open[ed]*[\s-]paren[t]*[[:blank:]]?|left[\s-]paren[t]*[[:blank:]]?)", "$1(")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(close[d]*[\s-]paren[t]*|right[\s-]paren[t]*)( |\b)", ")$3")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(comma|kama|karma)( |\b)", ",$3")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(colon mark|Cohen mark|column mark)( |\b)", ":$3")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(semicolon)( |\b)", ";$3")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(hyphen)( |\b)", "-")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(forward slash|4 slash|for slash)( |\b)", "/")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(period|PewDiePie|full stop)( |\b)", ".")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(exclamation)([\s-]*)(mark)*( |\b)", "!")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(question mark)( |\b)", "?")
Temp_String := RegExReplace(Temp_String, "i)( |\b)(apostrophe)( |\b)", "'")
Temp_String := RegExReplace(Temp_String, "i)(ellipsis|dot dot dot)", "...")
Temp_String := RegExReplace(Temp_String, "i)(plus[\s-]minus)", "+/-")
Temp_String := StrReplace(Temp_String, "single dash", "-")
Temp_String := RegExReplace(Temp_String, "`"[[:blank:]]*([\S\s]*?)[[:blank:]]*`"", " `"$1`" ")
Temp_String := RegExReplace(Temp_String, "([[:blank:]]*)(\Q(\E)([[:blank:]]*)([\S\s]*?)([[:blank:]]*)(\Q)\E)([[:blank:]]*)", " $2$4$6 ")
Temp_String := RegExReplace(Temp_String, "(`")([[:blank:]]*)([,.;:!?])", "$1$3")
Temp_String := RegExReplace(Temp_String, "(\Q)\E)([[:blank:]]*)([,.;:!?])", "$1$3")
Temp_String := RegExReplace(Temp_String, "[.!?:]+[[:blank:]]*[a-z]", "$u0")
Temp_String := RegExReplace(Temp_String, "(\w)([[:blank:]]+)(\w)", "$1 $3")
Temp_String := RegExReplace(Temp_String, "([.!?]\s*\()([a-zA-Z])", "$1$u2")
Temp_String := RegExReplace(Temp_String, "i)(number[[:blank:]]*)([0-9])", "#$2")
Temp_String := RegExReplace(Temp_String, "i)(number one)", "#1")
Temp_String := RegExReplace(Temp_String, "i)(number two)", "#2")
Temp_String := RegExReplace(Temp_String, "i)(number three)", "#3")
Temp_String := RegExReplace(Temp_String, "i)(number four)", "#4")
Temp_String := RegExReplace(Temp_String, "i)(number five)", "#5")
Temp_String := RegexReplace(Temp_String, "(\R)$", "") ; Whisper tends to add a single newline at the end ...
Temp_String := Trim(Temp_String)
Temp_String := RegExReplace(Temp_String, "i)[[:blank:]]*(a new paragraph|new paragraph|new, paragraph)\b", "`r`n`r`n")
Temp_String := RegExReplace(Temp_String, "`am)^([[:blank:]]*)(\S)", "$u2")
Temp_String := RegExReplace(Temp_String, "i)(no cap[s]*[[:blank:]]*)(\w)", "$l2")
Temp_String := RegExReplace(Temp_String, "i)(\bcap[s]*[[:blank:]]*)(\w)", "$u2")
Temp_String := RegExReplace(Temp_String, "i)([.!?])([[:blank:]]*[`"|\)][[:blank:]]*)(\w)", "$1$2$u3")
Temp_String := RegExReplace(Temp_String, "i)([.!?])(\Q (\E)", "$1 (")
Temp_String := RegExReplace(Temp_String, "i)(\Q) \E)([.!?])", ")$2")
Temp_String := RegExReplace(Temp_String, "i)([[:blank:]]*no[\s-]space[[:blank:]]*)", "")
Temp_String := RegExReplace(Temp_String, "i)([[:blank:]]*spacebar[[:blank:]]*)", " ")
Temp_String := RegExReplace(Temp_String, "i)(\Q'\E)(\w)", "'$l2")
Temp_String := RegExReplace(Temp_String, "i)([.!?])([[:blank:]]*)(\w)", "$1 $u3")
Temp_String := RegExReplace(Temp_String, "(\QMr\E|\QMrs\E|\QMs\E|\QDr\E|\QSt\E)( )([a-zA-Z])", "$1. $u3")
Temp_String := StrReplace(Temp_String, "prddot", "period")
Temp_String := StrReplace(Temp_String, "decimalpointdot", ".")
A_Clipboard := Temp_String
}