-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
62 lines (48 loc) · 2.2 KB
/
app.py
File metadata and controls
62 lines (48 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import pandas as pd
import numpy as np
import gradio as gr
import pickle
from unidecode import unidecode
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction import DictVectorizer
from scipy.sparse import hstack, coo_matrix
#function to generate prediction
def create_predict(name, company, location, model_name):
filename = model_name + '.pkl'
with open(filename, 'rb') as file:
model = pickle.load(file)
X = convert(name, company, location)
#resize input to suitable with model size
coo_matrix.resize(X, (1, model.n_features_in_))
y = model.predict(X)
return str(round(y[0], 3)) + " million VND"
#function to handle inputs (convert from text to vectors)
def convert(name, company, location):
#first, convert Vietnamese characters to ascii characters
name = unidecode(name)
company = unidecode(company)
location = unidecode(location)
#insert to a dataframe for easier convertion
data = [{'name': name, 'company': company, 'location': location}]
df = pd.DataFrame(data)
#convert text similarly when converting for preprocessing data
df['name'].str.lower()
df['company'].str.lower()
df['name'].replace('[^a-zA-Z0-9]', ' ', regex = True)
df['company'].replace('[^a-zA-Z0-9]', ' ', regex = True)
#convert using TfidfVectorizer and DictVectorizer
vectorizer = TfidfVectorizer()
X_tfidf = vectorizer.fit_transform(df['name'])
encoder = DictVectorizer()
X_categ = encoder.fit_transform(df[['company','location']].to_dict('records'))
X = hstack([X_tfidf, X_categ])
return X
#create input components
name_input = gr.Textbox(label = 'Enter job name:')
company_input = gr.Textbox(label = 'Enter company name:')
location_input = gr.Textbox(label = 'Enter location of job:')
model_input = gr.Dropdown(["decision_dataset1", "decision_dataset2", "forest_dataset1", "forest_dataset2"], label = 'Select model to predict:')
#create output
output = gr.Textbox(label = "Salary prediction:")
app = gr.Interface(fn = create_predict, inputs=[name_input, company_input, location_input, model_input], outputs=output)
app.launch()