Skip to content

Commit

Permalink
Fix indexing to work multithread
Browse files Browse the repository at this point in the history
  • Loading branch information
rquesada committed Sep 30, 2024
1 parent e27dbb9 commit 231a1f4
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 102 deletions.
235 changes: 140 additions & 95 deletions backend/managers/RagManager.py

Large diffs are not rendered by default.

15 changes: 13 additions & 2 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,21 @@ class File(Base):
id = Column(String, primary_key=True)
name = Column(String, nullable=False)
assistant_id = Column(String, nullable=False)
num_chunks = Column(String, nullable=False)
file_id = Column(String, nullable=False)
indexing_status = Column(String, nullable=False)

class Page(Base):
__tablename__ = "page"
id = Column(String, primary_key=True)
file_id = Column(String, nullable=False)
assistant_id = Column(String, nullable=False)

class Chunk(Base):
__tablename__ = "chunk"
id = Column(String, primary_key=True)
page_id = Column(String, nullable=False)
file_id = Column(String, nullable=False)
assistant_id = Column(String, nullable=False)

class Message(Base):
__tablename__ = "message"
id = Column(String, primary_key=True)
Expand Down
3 changes: 2 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ langchain_community
langchain_chroma
langchain_openai
pypdf
httpx
httpx
langchain_ollama
3 changes: 1 addition & 2 deletions backend/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,7 @@ class ConversationSchema(ConversationBaseSchema):
# File schemas
class FileBaseSchema(BaseModel):
name: str
num_chunks: str
file_id: str
assistant_id: str
indexing_status: str
class Config:
orm_mode = True
Expand Down
2 changes: 0 additions & 2 deletions migrations/versions/29df33c77244_added_file_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ def upgrade() -> None:
sa.Column('id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('name', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('assistant_id', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('file_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('num_chunks', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('indexing_status', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
Expand Down
35 changes: 35 additions & 0 deletions migrations/versions/5a1a6050b8f0_added_chunk_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""added chunk table
Revision ID: 5a1a6050b8f0
Revises: c189fb6eda90
Create Date: 2024-09-26 15:28:51.352477
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
import sqlmodel


# revision identifiers, used by Alembic.
revision: str = '5a1a6050b8f0'
down_revision: Union[str, None] = 'c189fb6eda90'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.create_table('chunk',
sa.Column('id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('page_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('file_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('assistant_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('chunk')
# ### end Alembic commands ###
33 changes: 33 additions & 0 deletions migrations/versions/c189fb6eda90_added_page_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""added page table
Revision ID: c189fb6eda90
Revises: dcaf2be4345d
Create Date: 2024-09-26 14:36:50.706957
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
import sqlmodel


# revision identifiers, used by Alembic.
revision: str = 'c189fb6eda90'
down_revision: Union[str, None] = 'dcaf2be4345d'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.create_table('page',
sa.Column('id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('assistant_id', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('file_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id')
)

def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('page')
# ### end Alembic commands ###

0 comments on commit 231a1f4

Please sign in to comment.