-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
1178 lines (947 loc) · 48.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Import necessary libraries
import tkinter as tk # (GUI toolkit for creating desktop applications)
# ttk (themed tk) is a module in tkinter that provides access to the Tk themed widget set,
# offering a more modern and customizable look for GUI elements compared to standard tkinter widgets
from tkinter import ttk, filedialog, messagebox, font as tkfont
# Example: ttk.Button(parent, text="Click me") creates a themed button
import tkinterdnd2 as tkdnd # (Extension for drag and drop functionality)
# Example: root.drop_target_register(tkdnd.DND_FILES) enables file drop on a window
import fitz # (PyMuPDF library for PDF handling)
# Example: doc = fitz.open("example.pdf") opens a PDF file
from PIL import Image, ImageTk # (Python Imaging Library for image processing)
# Example: img = Image.open("example.jpg") opens an image file
import io
import requests
from openai import OpenAI
from os import getenv
import pytesseract # (Optical Character Recognition library)
# Example: text = pytesseract.image_to_string(Image.open('image.png')) extracts text from an image
import pyperclip
import threading
import queue
from duckduckgo_search import DDGS
import time
from openai import OpenAIError
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg
from PIL import Image, ImageTk
from pix2tex.cli import LatexOCR
import numpy as np
# Function to get model information from OpenRouter API
def get_model_info(model_name):
"""
Retrieves information about a specific AI model from the OpenRouter API.
This function sends a GET request (a way to request data from a server) to the OpenRouter API
to fetch details about available models. It then searches for the specified model by name
and returns its information.
Parameters:
- model_name (str): The name of the AI model to look up
Returns:
- dict or None: A dictionary (a data structure that stores key-value pairs) containing model
information if found, None otherwise
Example:
model_info = get_model_info("gpt-3.5-turbo")
if model_info:
print(f"Model context length: {model_info['context_length']}")
"""
# Send a GET request to the OpenRouter API
response = requests.get(
'https://openrouter.ai/api/v1/models',
headers={'Authorization': f'Bearer {getenv("OPENROUTER_API_KEY")}'}
)
models = response.json()
# Search for the specified model in the response
for model in models['data']:
if model['id'] == model_name:
return model
return None
# Set up OpenAI client with OpenRouter base URL and API key
model = "nousresearch/hermes-3-llama-3.1-405b"
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=getenv("OPENROUTER_API_KEY"),
)
# Get model info and set max tokens
model_info = get_model_info(model)
max_tokens = model_info['context_length'] if model_info else 131072
# Function to send completion request to the AI model
def completion(messages, max_retries=3, retry_delay=5):
"""
Sends a completion request to the AI model and returns the response.
This function uses the OpenAI client (a tool for interacting with the AI model) to create
a chat completion based on the provided messages. It then extracts and returns the content
of the AI's response.
Parameters:
- messages (list): A list of message dictionaries to send to the AI
Returns:
- str: The content of the AI's response
Example:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What's the capital of France?"}
]
response = completion(messages)
print(response) # Outputs: "The capital of France is Paris."
"""
"""
Sends a completion request to the AI model and returns the response.
Parameters:
- messages (list): A list of message dictionaries to send to the AI
- max_retries (int): Maximum number of retry attempts
- retry_delay (int): Delay in seconds between retry attempts
Returns:
- str: The content of the AI's response, or an error message
"""
for attempt in range(max_retries):
try:
completion = client.chat.completions.create(
model=model,
messages=messages,
)
return completion.choices[0].message.content
except OpenAIError as e:
if attempt < max_retries - 1:
print(f"Error occurred: {str(e)}. Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
else:
return f"Error: Unable to get a response from the AI after {max_retries} attempts. Please try again later."
except Exception as e:
return f"Unexpected error: {str(e)}"
# Main application class
class PDFStudyAssistant:
def __init__(self, root):
"""
Initializes the PDFStudyAssistant application.
This method sets up the main window, initializes variables, and creates
the initial user interface.
Parameters:
- root (tk.Tk): The root window (main window) of the application
Example:
root = tk.Tk()
app = PDFStudyAssistant(root)
"""
self.root = root
self.root.title("PDF Study Assistant")
self.root.geometry("1000x600") # Set a default size
self.root.pack_propagate(False) # Prevent automatic resizing
# Create the main frame (a container for other widgets)
# ttk.Frame is a themed container widget used to group other widgets
self.main_frame = ttk.Frame(self.root)
self.main_frame.pack(fill=tk.BOTH, expand=True)
# Initialize conversation with system message
self.messages = [
{"role": "system", "content": "You are a helpful study assistant with the ability to analyze PDF content and answer questions about it. You can also perform web searches to gather additional information. When you need to search the web, simply include '/search' followed by your query in your response. For example, if you need to find information about climate change, you could say 'Let me search for more information. /search latest research on climate change'. Use this search capability when you need to provide up-to-date information or when the context from the PDF is insufficient to answer a question comprehensively. If the initial search results don't contain the exact information needed, you can perform a subsequent search using one of the links provided in the previous search results. For instance, you might say 'Let me check one of the provided links for more specific information. /search https://example.com specific query'. This allows you to dig deeper into reliable sources for more detailed or precise information. Remember, do not include square brackets in your search queries."}
]
# Initialize various attributes
self.pdf_canvas = None # (A widget for displaying graphics)
self.line_numbers = None
self.selected_text = ""
self.is_highlighting = False
self.highlight_start = None
self.highlighted_text = ""
self.highlight_rectangle = None
self.selection_start = None
self.selection_rectangle = None
self.current_pdf = None
self.latex_ocr = LatexOCR()
self.ddgs = DDGS()
self.chat_model = "claude-3-haiku" # You can change this to any of the available models
self.current_page = 0
self.page_cache = {} # (A dictionary to store rendered pages for quick access)
# Set up the initial UI and AI processing queue
self.ai_queue = queue.Queue() # (A thread-safe data structure for communication between threads)
self.setup_initial_ui()
self.start_ai_thread()
def setup_initial_ui(self):
"""
Sets up the initial user interface for the application.
This method creates the initial frame with buttons and labels for
browsing or dragging and dropping a PDF file.
Example:
self.setup_initial_ui()
# This creates the initial UI with a "Browse PDF" button and a drop area
"""
# Create the initial frame (a container for widgets)
self.initial_frame = ttk.Frame(self.main_frame)
self.initial_frame.pack(fill=tk.BOTH, expand=True)
# Add a button to browse for PDF files
# ttk.Button creates a themed button widget
self.browse_button = ttk.Button(self.initial_frame, text="Browse PDF", command=self.browse_pdf)
self.browse_button.pack(pady=10)
# Add a label for drag and drop instructions
# ttk.Label creates a themed label widget for displaying text
self.drop_label = ttk.Label(self.initial_frame, text="Or drag and drop PDF here")
self.drop_label.pack(pady=10)
# Create a drop area for drag and drop functionality
self.drop_area = ttk.Frame(self.initial_frame, width=200, height=100, relief="groove", borderwidth=2)
self.drop_area.pack(pady=10)
# Register the drop area for drag and drop events
self.drop_area.drop_target_register(tkdnd.DND_FILES)
self.drop_area.dnd_bind('<<Drop>>', self.on_drop)
def setup_main_ui(self):
"""
Sets up the main user interface of the application.
This method is called after a PDF is loaded. It creates the PDF viewer,
AI chat panel, and various controls for interacting with the PDF and AI.
Example:
self.setup_main_ui()
# This creates the main UI with PDF viewer, chat panel, and controls
"""
# Clear the initial UI
for widget in self.main_frame.winfo_children():
widget.destroy()
# Create a PanedWindow (a widget that allows resizable panels)
# tk.PanedWindow creates a widget with adjustable panes
# Create a horizontal PanedWindow containing the PDF viewer and AI chat panel
self.paned_window = tk.PanedWindow(self.main_frame, orient=tk.HORIZONTAL)
# This PanedWindow will contain:
# 1. Left panel: PDF viewer with line numbers, canvas, and scrollbar
# 2. Right panel: AI chat interface with chat history, input field, and send button
# tk.BOTH is used to fill the widget both horizontally and vertically
# expand=True allows the widget to grow if extra space is available
# Example: This makes the paned_window fill its parent container completely
self.paned_window.pack(fill=tk.BOTH, expand=True)
# Set up the left panel for PDF viewing
self.left_panel = ttk.Frame(self.paned_window)
self.paned_window.add(self.left_panel, stretch="always")
# Add line numbers to the left of the PDF viewer
# tk.Text creates a text widget for displaying multiple lines of text
self.line_numbers = tk.Text(self.left_panel, width=4, padx=5, pady=5, state='disabled')
self.line_numbers.pack(side=tk.LEFT, fill=tk.Y) # fill=tk.Y means the widget will expand vertically to fill its container
# Create the PDF canvas (a widget for displaying the PDF pages)
# tk.Canvas creates a drawing area for graphics and images
self.pdf_canvas = tk.Canvas(self.left_panel, bg='white')
self.pdf_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# Add a scrollbar for the PDF canvas
# ttk.Scrollbar creates a themed scrollbar widget
# Add scrollbars
v_scrollbar = ttk.Scrollbar(self.left_panel, orient=tk.VERTICAL, command=self.pdf_canvas.yview)
v_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
h_scrollbar = ttk.Scrollbar(self.left_panel, orient=tk.HORIZONTAL, command=self.pdf_canvas.xview)
h_scrollbar.pack(side=tk.BOTTOM, fill=tk.X)
self.pdf_canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
# Bind mouse wheel event to the canvas
self.pdf_canvas.bind("<MouseWheel>", self.on_mousewheel) # For Windows and MacOS
self.pdf_canvas.bind("<Button-4>", self.on_mousewheel) # For Linux
self.pdf_canvas.bind("<Button-5>", self.on_mousewheel) # For Linux
# Set up the right panel for AI chat
self.right_panel = ttk.Frame(self.paned_window)
self.paned_window.add(self.right_panel, stretch="always")
# Create a text widget for chat history
self.chat_history = tk.Text(self.right_panel, wrap=tk.WORD, state='disabled')
self.chat_history.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
self.chat_history.images = []
# Add a scrollbar for the chat history
self.chat_scrollbar = ttk.Scrollbar(self.right_panel, orient="vertical", command=self.chat_history.yview)
self.chat_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.chat_history.configure(yscrollcommand=self.chat_scrollbar.set)
# Create an input frame for user messages
self.input_frame = ttk.Frame(self.right_panel)
self.input_frame.pack(side=tk.BOTTOM, fill=tk.X)
# Add an entry widget (a single-line text input field) for user input
# ttk.Entry creates a themed single-line texIt input widget
self.user_input = ttk.Entry(self.input_frame)
self.user_input.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 5))
# Add a send button for user messages
self.send_button = ttk.Button(self.input_frame, text="Send", command=self.send_message)
self.send_button.pack(side=tk.RIGHT)
# Create a toolbar (a frame with buttons) at the bottom of the window
self.toolbar = ttk.Frame(self.root)
self.toolbar.pack(side=tk.BOTTOM, fill=tk.X)
# Add various buttons to the toolbar
self.browse_button = ttk.Button(self.toolbar, text="Browse PDF", command=self.browse_pdf)
self.browse_button.pack(side=tk.LEFT, padx=5, pady=5)
self.highlight_button = ttk.Button(self.toolbar, text="Highlight", command=self.toggle_highlight_mode)
self.highlight_button.pack(side=tk.LEFT, padx=5, pady=5)
self.submit_highlight_button = ttk.Button(self.toolbar, text="Submit Highlighted Text", command=self.submit_highlighted_text, state=tk.DISABLED)
self.submit_highlight_button.pack(side=tk.LEFT, padx=5, pady=5)
self.submit_pdf_button = ttk.Button(self.toolbar, text="Submit PDF to AI", command=self.submit_pdf_to_ai, state=tk.DISABLED)
self.submit_pdf_button.pack(side=tk.LEFT, padx=5, pady=5)
self.prev_page_button = ttk.Button(self.toolbar, text="Previous Page", command=self.prev_page)
self.prev_page_button.pack(side=tk.LEFT, padx=5, pady=5)
self.next_page_button = ttk.Button(self.toolbar, text="Next Page", command=self.next_page)
self.next_page_button.pack(side=tk.LEFT, padx=5, pady=5)
self.toggle_ai_button = ttk.Button(self.toolbar, text="Toggle AI", command=self.toggle_ai_panel)
self.toggle_ai_button.pack(side=tk.RIGHT, padx=5, pady=5)
# ... (existing code)
# Add page navigation to the toolbar
self.page_nav_frame = ttk.Frame(self.toolbar)
self.page_nav_frame.pack(side=tk.LEFT, padx=5, pady=5)
self.current_page_var = tk.StringVar()
self.current_page_var.set(f"{self.current_page + 1}")
self.total_pages_var = tk.StringVar()
self.total_pages_var.set(f"/ {len(self.current_pdf)}" if self.current_pdf else "/ 0")
self.page_entry = ttk.Entry(self.page_nav_frame, textvariable=self.current_page_var, width=5)
self.page_entry.pack(side=tk.LEFT)
self.page_entry.bind('<Return>', self.go_to_page)
self.total_pages_label = ttk.Label(self.page_nav_frame, textvariable=self.total_pages_var)
self.total_pages_label.pack(side=tk.LEFT)
# Add font selection options
self.setup_font_options()
# Set up drag and drop for the main window
self.root.drop_target_register(tkdnd.DND_FILES)
self.root.dnd_bind('<<Drop>>', self.on_drop)
# Setup selection bindings for text selection in the PDF
self.setup_selection_bindings()
def go_to_page(self, event=None):
try:
page_num = int(self.current_page_var.get()) - 1 # Convert to 0-based index
if 0 <= page_num < len(self.current_pdf):
self.current_page = page_num
self.display_page()
else:
messagebox.showwarning("Invalid Page", "Please enter a valid page number.")
except ValueError:
messagebox.showwarning("Invalid Input", "Please enter a valid number.")
def setup_font_options(self):
"""
Sets up the font selection options in the toolbar.
"""
# Get a list of available fonts
self.available_fonts = sorted(tkfont.families())
# Create variables to store the selected font and size
self.font_var = tk.StringVar(self.root)
self.font_var.set("TkDefaultFont") # Set default font
self.font_size_var = tk.StringVar(self.root)
self.font_size_var.set("10") # Set default font size
# Create font selection dropdown
self.font_menu = ttk.Combobox(self.toolbar, textvariable=self.font_var, values=self.available_fonts, width=15)
self.font_menu.pack(side=tk.LEFT, padx=5, pady=5)
# Create font size dropdown
font_sizes = [str(i) for i in range(8, 25)]
self.font_size_menu = ttk.Combobox(self.toolbar, textvariable=self.font_size_var, values=font_sizes, width=3)
self.font_size_menu.pack(side=tk.LEFT, padx=5, pady=5)
# Bind the selection events to the change_font method
self.font_menu.bind("<<ComboboxSelected>>", self.change_font)
self.font_size_menu.bind("<<ComboboxSelected>>", self.change_font)
def change_font(self, *args):
"""
Changes the font of the chat history.
"""
selected_font = self.font_var.get()
selected_size = int(self.font_size_var.get())
new_font = tkfont.Font(family=selected_font, size=selected_size)
self.chat_history.configure(font=new_font)
def setup_selection_bindings(self):
"""
Sets up event bindings for text selection in the PDF.
This method binds the necessary events to enable text selection and
copying in the PDF viewer.
Example:
self.setup_selection_bindings()
# This enables text selection in the PDF viewer
"""
self.pdf_canvas.bind("<ButtonPress-1>", self.start_selection)
self.pdf_canvas.bind("<B1-Motion>", self.update_selection)
self.pdf_canvas.bind("<ButtonRelease-1>", self.end_selection)
def remove_selection_bindings(self):
"""
Removes event bindings for text selection in the PDF.
This method unbinds the events that enable text selection and copying
in the PDF viewer.
"""
self.pdf_canvas.unbind("<ButtonPress-1>")
self.pdf_canvas.unbind("<B1-Motion>")
self.pdf_canvas.unbind("<ButtonRelease-1>")
def on_drop(self, event):
"""
Handles the drop event for drag and drop functionality.
This method is called when a file is dropped onto the drop area. It
checks if the dropped file is a PDF and loads it if it is.
Parameters:
- event (tk.Event): The drop event
"""
# Handle file drop event
file_path = event.data
if file_path.lower().endswith('.pdf'):
self.load_pdf(file_path)
else:
messagebox.showerror("Error", "Please drop a PDF file.")
def start_selection(self, event):
"""
Starts the text selection process.
This method is called when the user presses the mouse button to start
selecting text in the PDF viewer.
Parameters:
- event (tk.Event): The mouse button press event
"""
self.selection_start = self.get_adjusted_coords(event.x, event.y)
def update_selection(self, event):
"""
Updates the text selection rectangle as the user drags the mouse.
This method is called while the user is dragging the mouse to update
the selection rectangle in the PDF viewer.
Parameters:
- event (tk.Event): The mouse motion event
"""
if self.selection_start:
x0, y0 = self.selection_start
x1, y1 = self.get_adjusted_coords(event.x, event.y)
if self.selection_rectangle:
self.pdf_canvas.delete(self.selection_rectangle)
self.selection_rectangle = self.pdf_canvas.create_rectangle(x0, y0, x1, y1, outline="blue", fill="blue", stipple="gray50")
def end_selection(self, event):
"""
Ends the text selection process and copies the selected text.
This method is called when the user releases the mouse button after
selecting text in the PDF viewer. It extracts the selected text and
copies it to the clipboard.
Parameters:
- event (tk.Event): The mouse button release event
"""
if self.selection_start:
x0, y0 = self.selection_start
x1, y1 = self.get_adjusted_coords(event.x, event.y)
page = self.current_pdf[self.current_page]
# Create a rectangle (rect) in PDF coordinates
# The scale_factor is used to convert from screen coordinates to PDF coordinates
#
# PDF coordinates:
# - Origin (0,0) is at the bottom-left corner of the page
# - Units are typically in points (1/72 of an inch)
#
# Screen coordinates:
# - Origin (0,0) is at the top-left corner of the canvas
# - Units are in pixels
#
# The scale_factor represents the ratio of screen pixels to PDF points
# For example, if scale_factor is 2, it means 2 screen pixels = 1 PDF point
#
# We divide by scale_factor to convert from screen coordinates to PDF coordinates:
# PDF_coordinate = screen_coordinate / scale_factor
rect = fitz.Rect(
min(x0, x1) / self.scale_factor, # left (convert smaller x to PDF coordinate)
min(y0, y1) / self.scale_factor, # top (convert smaller y to PDF coordinate)
max(x0, x1) / self.scale_factor, # right (convert larger x to PDF coordinate)
max(y0, y1) / self.scale_factor # bottom (convert larger y to PDF coordinate)
)
# Debug print
print(f"Selection rectangle: {rect}")
print(f"Scale factor: {self.scale_factor}")
words = page.get_text("words", clip=rect)
text_block = page.get_text("block", clip=rect)
text_raw = page.get_text("text", clip=rect)
# Debug print
print(f"Extracted words: {words}")
print(f"Extracted text block: {text_block}")
print(f"Extracted text raw: {text_raw}")
selected_text = " ".join(w[4] for w in words)
# If no text is extracted, try OCR
if not selected_text.strip():
# Extract image from the selected area
pix = page.get_pixmap(matrix=fitz.Matrix(self.scale_factor, self.scale_factor), clip=rect)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Perform OCR on the image
selected_text = pytesseract.image_to_string(img)
# Debug print
print(f"Selected text: '{selected_text}'")
if selected_text.strip():
self.copy_to_clipboard(selected_text)
messagebox.showinfo("Selection", "Text copied to clipboard!")
else:
# Try getting text without clipping
full_text = page.get_text("text")
print(f"Full page text: {full_text[:100]}...") # Print first 100 characters
messagebox.showinfo("Selection", "No text selected.")
self.selection_start = None
if self.selection_rectangle:
self.pdf_canvas.delete(self.selection_rectangle)
self.selection_rectangle = None
def copy_selected_text(self):
"""
Copies the selected text to the clipboard.
This method copies the currently selected text in the PDF viewer to
the clipboard.
"""
if self.selected_text:
pyperclip.copy(self.selected_text)
print("Text copied to clipboard")
else:
print("No text selected")
def browse_pdf(self):
"""
Opens a file dialog to browse and select a PDF file.
This method is called when the user clicks the "Browse PDF" button.
It opens a file dialog to allow the user to select a PDF file.
"""
# Open file dialog to select PDF
file_path = filedialog.askopenfilename(filetypes=[("PDF files", "*.pdf")])
if file_path:
self.load_pdf(file_path)
def load_pdf(self, file_path):
"""
Loads a PDF file and sets up the main user interface.
This method is called when a PDF file is selected or dropped. It loads
the PDF file, initializes variables, and sets up the main user interface.
Parameters:
- file_path (str): The path to the PDF file
"""
try:
self.current_pdf = fitz.open(file_path)
self.current_page = 0
self.page_cache = {}
self.setup_main_ui()
self.update_total_pages()
self.display_page()
self.submit_pdf_button.config(state=tk.NORMAL)
messagebox.showinfo("Success", "PDF loaded successfully!")
except Exception as e:
messagebox.showerror("Error", f"Error loading PDF: {str(e)}")
def update_total_pages(self):
if self.current_pdf:
self.total_pages_var.set(f"/ {len(self.current_pdf)}")
def get_adjusted_coords(self, x, y):
"""
Adjusts the mouse coordinates based on the PDF canvas scale.
This method is used to convert mouse coordinates from the canvas
to the PDF page coordinates, taking into account the scale factor.
Parameters:
- x (int): The x-coordinate on the canvas
- y (int): The y-coordinate on the canvas
Returns:
- tuple: The adjusted (x, y) coordinates on the PDF page
"""
canvas_x = self.pdf_canvas.canvasx(x)
canvas_y = self.pdf_canvas.canvasy(y)
return canvas_x, canvas_y
def display_page(self):
"""
Displays the current PDF page on the canvas.
This method is called when the current page is changed or when the
PDF is loaded. It renders the PDF page as an image and displays it
on the canvas.
"""
if self.current_pdf and self.pdf_canvas:
if self.current_page in self.page_cache:
photo = self.page_cache[self.current_page]
else:
page = self.current_pdf[self.current_page]
# Set the scale factor to 2 to increase the resolution of the PDF page
self.scale_factor = 2
# Create a high-resolution pixmap (image) of the PDF page
# The fitz.Matrix(2, 2) doubles the resolution in both x and y directions
pix = page.get_pixmap(matrix=fitz.Matrix(self.scale_factor, self.scale_factor))
# Convert the pixmap to a PIL (Python Imaging Library) Image
# This step is necessary because Tkinter can't directly use the pixmap
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Convert the PIL Image to a Tkinter-compatible PhotoImage
# This is the format that Tkinter can display on the canvas
photo = ImageTk.PhotoImage(img)
# Store the photo in the page cache for faster access in the future
self.page_cache[self.current_page] = photo
# Clear any existing content on the PDF canvas
self.pdf_canvas.delete("all")
# Set the scrollable region of the canvas to match the size of the photo
# This ensures that scrollbars appear if the image is larger than the canvas
self.pdf_canvas.config(scrollregion=(0, 0, photo.width(), photo.height()))
# Place the photo on the canvas at the top-left corner (0, 0)
# 'anchor=tk.NW' means the image's northwest (top-left) corner will be at (0, 0)
self.pdf_canvas.create_image(0, 0, anchor=tk.NW, image=photo)
# Store a reference to the photo in the canvas object
# This prevents the image from being garbage collected by Python
self.pdf_canvas.image = photo
# Update the line numbers displayed alongside the PDF
self.update_line_numbers()
# Update the current page number in the entry widget
self.current_page_var.set(str(self.current_page + 1))
# Update the total pages (in case it has changed)
self.total_pages_var.set(f"/ {len(self.current_pdf)}")
def update_line_numbers(self):
"""
Updates the line numbers for the current PDF page.
This method is called when the current page is changed or when the
PDF is loaded. It extracts the text from the PDF page and displays
the line numbers on the left side of the PDF viewer.
"""
# Update line numbers for the current page
page = self.current_pdf[self.current_page]
text = page.get_text("text")
lines = text.split('\n')
line_numbers = '\n'.join(str(i) for i in range(1, len(lines) + 1))
self.line_numbers.config(state='normal')
self.line_numbers.delete('1.0', tk.END)
self.line_numbers.insert('1.0', line_numbers)
self.line_numbers.config(state='disabled')
def toggle_highlight_mode(self):
"""
Toggles the text highlighting mode.
This method is called when the user clicks the "Highlight" button.
It toggles the text highlighting mode and updates the button text
accordingly.
"""
# Toggle text highlighting mode
self.is_highlighting = not self.is_highlighting
self.highlight_button.config(text="Stop Highlighting" if self.is_highlighting else "Highlight")
if self.is_highlighting:
self.remove_selection_bindings()
self.pdf_canvas.bind("<ButtonPress-1>", self.start_highlight)
self.pdf_canvas.bind("<B1-Motion>", self.update_highlight)
self.pdf_canvas.bind("<ButtonRelease-1>", self.end_highlight)
else:
self.pdf_canvas.unbind("<ButtonPress-1>")
self.pdf_canvas.unbind("<B1-Motion>")
self.pdf_canvas.unbind("<ButtonRelease-1>")
self.setup_selection_bindings()
def start_highlight(self, event):
"""
Starts the text highlighting process.
This method is called when the user presses the mouse button to start
highlighting text in the PDF viewer.
Parameters:
- event (tk.Event): The mouse button press event
"""
# Start the highlighting process
self.highlight_start = self.get_adjusted_coords(event.x, event.y)
def update_highlight(self, event):
"""
Updates the highlight rectangle as the user drags the mouse.
This method is called while the user is dragging the mouse to update
the highlight rectangle in the PDF viewer.
Parameters:
- event (tk.Event): The mouse motion event
"""
# Update the highlight rectangle as the user drags the mouse
if self.highlight_start:
x0, y0 = self.highlight_start
x1, y1 = self.get_adjusted_coords(event.x, event.y)
if self.highlight_rectangle:
self.pdf_canvas.delete(self.highlight_rectangle)
self.highlight_rectangle = self.pdf_canvas.create_rectangle(x0, y0, x1, y1, outline="yellow", fill="yellow", stipple="gray50")
def end_highlight(self, event):
"""
Ends the text highlighting process and submits the highlighted text.
This method is called when the user releases the mouse button after
highlighting text in the PDF viewer. It extracts the highlighted text
and submits it to the AI for analysis. If the highlighted area contains
a mathematical equation, it attempts to convert it to LaTeX.
Parameters:
- event (tk.Event): The mouse button release event
"""
if self.highlight_start:
x0, y0 = self.highlight_start
x1, y1 = self.get_adjusted_coords(event.x, event.y)
page = self.current_pdf[self.current_page]
rect = fitz.Rect(min(x0, x1)/self.scale_factor, min(y0, y1)/self.scale_factor,
max(x0, x1)/self.scale_factor, max(y0, y1)/self.scale_factor)
# Extract image from the highlighted area
pix = page.get_pixmap(matrix=fitz.Matrix(self.scale_factor, self.scale_factor), clip=rect)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
try:
# Convert to LaTeX
latex = self.latex_ocr(img)
if latex:
self.highlighted_text = f"${latex}$"
self.copy_to_clipboard(self.highlighted_text)
messagebox.showinfo("Highlight", "LaTeX expression copied to clipboard!")
else:
# If LaTeX conversion fails, fall back to text extraction
words = page.get_text("words", clip=rect)
self.highlighted_text = " ".join(w[4] for w in words)
self.copy_to_clipboard(self.highlighted_text)
messagebox.showinfo("Highlight", "Text copied to clipboard!")
except Exception as e:
print(f"Error in LaTeX conversion: {str(e)}")
# Fall back to text extraction
words = page.get_text("words", clip=rect)
self.highlighted_text = " ".join(w[4] for w in words)
self.copy_to_clipboard(self.highlighted_text)
messagebox.showinfo("Highlight", "Error in LaTeX conversion. Copied as text.")
self.highlight_start = None
if self.highlight_rectangle:
self.pdf_canvas.delete(self.highlight_rectangle)
self.highlight_rectangle = None
if self.highlighted_text:
self.submit_highlight_button.config(state=tk.NORMAL)
def copy_to_clipboard(self, text):
self.root.clipboard_clear()
self.root.clipboard_append(text)
self.root.update() # Necessary to finalize the clipboard operation
pyperclip.copy(text) # As a fallback, also use pyperclip
print(f"Copied to clipboard: {text}") # Debug print
def submit_highlighted_text(self):
"""
Submits the highlighted text to the AI for analysis.
This method is called when the user clicks the "Submit Highlighted Text"
button. It adds the highlighted text to the AI queue for analysis.
"""
# Submit highlighted text to AI for analysis
if self.highlighted_text:
self.ai_queue.put(("highlight", self.highlighted_text))
self.highlighted_text = ""
self.submit_highlight_button.config(state=tk.DISABLED)
def submit_pdf_to_ai(self):
"""
Submits the entire PDF content to the AI for analysis.
This method extracts text from the current page and the last two pages (if available)
of the PDF and submits it to the AI for analysis. It uses OCR for scanned or
photographed pages.
"""
if self.current_pdf:
total_pages = len(self.current_pdf)
pages_to_submit = []
# Add the current page
pages_to_submit.append(self.current_page)
# Add the last two pages if they exist and are different from the current page
if total_pages > 1:
if self.current_page != total_pages - 1:
pages_to_submit.append(total_pages - 1)
if total_pages > 2 and self.current_page != total_pages - 2:
pages_to_submit.append(total_pages - 2)
# Remove duplicates and sort
pages_to_submit = sorted(set(pages_to_submit))
full_text = ""
for page_num in pages_to_submit:
page = self.current_pdf[page_num]
# Try to get text using PyMuPDF
page_text = page.get_text()
# If no text is extracted, use OCR
if not page_text.strip():
# Convert page to image
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# Perform OCR
page_text = pytesseract.image_to_string(img)
full_text += f"Page {page_num + 1}:\n{page_text}\n\n"
# Truncate the text if it's too long
max_chars = 8000 # Adjust this value as needed
if len(full_text) > max_chars:
full_text = full_text[:max_chars] + "... [truncated]"
self.ai_queue.put(("pdf", full_text))
messagebox.showinfo("PDF Submitted", "The content of the current page and the last two pages (if available) has been submitted to the AI for analysis.")
else:
messagebox.showerror("Error", "No PDF is currently loaded.")
def send_message(self):
"""
Sends a user message to the AI.
This method is called when the user clicks the "Send" button.
It retrieves the user's message, adds it to the chat history,
and submits it to the AI for analysis.
"""
user_message = self.user_input.get()
self.update_chat_history(f"You: {user_message}\n")
if user_message.startswith("/chat "):
query = user_message[6:] # Remove "/chat " from the beginning
try:
response = self.ddgs.chat(query, model=self.chat_model)
self.update_chat_history(f"DuckDuckGo AI: {response}\n")
except Exception as e:
error_message = f"Error in DuckDuckGo chat: {str(e)}"
self.update_chat_history(f"Error: {error_message}\n")
elif user_message.startswith("/search "):
query = user_message[8:] # Remove "/search " from the beginning
search_results = self.perform_web_search(query)
result_text = "Search Results:\n"
for result in search_results:
result_text += f"- {result['title']}: {result['href']}\n"
self.update_chat_history(result_text)
self.ai_queue.put(("search", result_text))
else:
self.ai_queue.put(("message", user_message))
self.user_input.delete(0, tk.END)
def update_chat_history(self, message):
"""
Updates the chat history with new messages.
This method is called when a new message is received from the AI
or when the user sends a message. It appends the message to the
chat history and scrolls to the bottom.
Parameters:
- message (str): The message to append to the chat history
"""
# Update chat history with new messages
self.chat_history.config(state='normal')
# Split the message into parts
parts = message.split('$$')
for i, part in enumerate(parts):
if i % 2 == 0:
# Regular text
self.chat_history.insert(tk.END, part)
else:
# LaTeX content
latex_image = render_latex(part)
self.chat_history.image_create(tk.END, image=latex_image)
# Keep a reference to prevent garbage collection
self.chat_history.images.append(latex_image)
self.chat_history.insert(tk.END, '\n')
self.chat_history.config(state='disabled')
self.chat_history.see(tk.END)
def toggle_ai_panel(self):
"""
Toggles the visibility of the AI chat panel.
This method is called when the user clicks the "Toggle AI" button.
It hides or shows the AI chat panel and updates the button text
accordingly.
"""
if self.right_panel.winfo_viewable():
self.paned_window.forget(self.right_panel)
self.toggle_ai_button.config(text="Show AI")
else:
self.paned_window.add(self.right_panel) # Remove the weight parameter
self.toggle_ai_button.config(text="Hide AI")
self.paned_window.update()
def prev_page(self):
"""
Goes to the previous page of the PDF.
This method is called when the user clicks the "Previous Page" button.
It decreases the current page number and displays the new page.
"""
# Go to previous page of PDF
if self.current_page > 0:
self.current_page -= 1
self.display_page()
def next_page(self):
"""
Goes to the next page of the PDF.
This method is called when the user clicks the "Next Page" button.
It increases the current page number and displays the new page.
"""
# Go to next page of PDF
if self.current_pdf and self.current_page < len(self.current_pdf) - 1:
self.current_page += 1
self.display_page()
def on_mousewheel(self, event):
"""
Handles mouse wheel scrolling on the PDF canvas.
This method is called when the user scrolls the mouse wheel over the PDF canvas.