-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
145 lines (120 loc) · 5.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import tkinter as tk
from tkinter import messagebox, filedialog
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
# Download NLTK data files
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
# Sample data (increased size and diversity)
data = {
'text': [
'I love this product!', 'This is the worst experience I have ever had.',
'It was okay, not great but not terrible either.', 'Absolutely fantastic service!',
'I am very disappointed with this.', 'Happy with my purchase.',
'Not what I expected.', 'Great quality and fast shipping.',
'Terrible, will not buy again.', 'I am satisfied with the results.',
'The product is decent.', 'I hate this product!',
'The experience was satisfactory.', 'Amazing! Highly recommended.',
'Not up to the mark.', 'Pretty good, but could be better.',
'Completely useless, very unhappy.', 'I will definitely buy this again.',
'Worst purchase ever.', 'It serves the purpose.',
'Excellent quality!', 'Very bad product.',
'Mediocre at best.', 'Outstanding experience!',
'Not pleased with this.', 'Content with the service.',
'Awful experience, never again.', 'Best purchase I have made.',
'Not worth the money.', 'Reasonably good.',
'This exceeded my expectations!', 'I regret buying this.',
'Neither good nor bad.', 'Impressive performance!',
'Disappointed with the quality.', 'Does the job well enough.',
'Horrible customer service.', 'I\'m thrilled with this purchase!',
'Below average product.', 'Meets my needs adequately.'
],
'sentiment': [
'positive', 'negative', 'neutral', 'positive', 'negative',
'positive', 'neutral', 'positive', 'negative', 'positive',
'neutral', 'negative', 'neutral', 'positive', 'negative',
'neutral', 'negative', 'positive', 'negative', 'neutral',
'positive', 'negative', 'neutral', 'positive', 'negative',
'neutral', 'negative', 'positive', 'negative', 'neutral',
'positive', 'negative', 'neutral', 'positive', 'negative',
'neutral', 'negative', 'positive', 'negative', 'neutral'
]
}
df = pd.DataFrame(data)
# Preprocess the text data
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
tokens = word_tokenize(text)
filtered_tokens = [word for word in tokens if word not in stop_words]
return ' '.join(filtered_tokens)
df['text'] = df['text'].apply(preprocess_text)
# Split data into training and test sets
X = df['text']
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Vectorize text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)
# Train a Logistic Regression classifier
model = LogisticRegression(random_state=42, max_iter=1000, multi_class='ovr')
model.fit(X_train_vec, y_train)
# Evaluate the model
y_pred = model.predict(X_test_vec)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'Classification Report:\n{classification_report(y_test, y_pred)}')
# Predict sentiment of new text
def predict_sentiment(text):
text = preprocess_text(text)
text_vec = vectorizer.transform([text])
return model.predict(text_vec)[0]
def analyze_sentiment():
text = text_entry.get("1.0", "end-1c")
sentiment = predict_sentiment(text)
result = f"Sentiment: {sentiment}"
result_label.config(text=result)
def save_text():
text = text_entry.get("1.0", "end-1c")
file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt")])
if file_path:
with open(file_path, "w") as file:
file.write(text)
def load_text():
file_path = filedialog.askopenfilename(defaultextension=".txt", filetypes=[("Text Files", "*.txt")])
if file_path:
with open(file_path, "r") as file:
text = file.read()
text_entry.delete("1.0", tk.END)
text_entry.insert(tk.END, text)
def clear_text():
text_entry.delete("1.0", tk.END)
result_label.config(text="")
root = tk.Tk()
root.title("Sentiment Analysis")
text_label = tk.Label(root, text="Enter text to analyze:")
text_label.pack()
text_entry = tk.Text(root, height=10, width=50)
text_entry.pack()
button_frame = tk.Frame(root)
button_frame.pack()
analyze_button = tk.Button(button_frame, text="Analyze Sentiment", command=analyze_sentiment)
analyze_button.pack(side=tk.LEFT)
save_button = tk.Button(button_frame, text="Save Text", command=save_text)
save_button.pack(side=tk.LEFT)
load_button = tk.Button(button_frame, text="Load Text", command=load_text)
load_button.pack(side=tk.LEFT)
clear_button = tk.Button(button_frame, text="Clear Text", command=clear_text)
clear_button.pack(side=tk.LEFT)
result_label = tk.Label(root, text="")
result_label.pack()
root.mainloop()