Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backend/src/documents/upload_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ def upload_file_data(
bucket_name,
default_folder,
cloud_storage: CloudStorage = None,
) -> str:
) -> (str, str):
decoded_file_content = decode_file_content(file_content)
secure_filename, document_id = generate_secure_filename(file_name)

key = f"{default_folder}{secure_filename}"
cloud_storage.put_object(bucket_name, key, decoded_file_content, {"original_filename": file_name})

return document_id
return document_id, key


def decode_file_content(file_content) -> bytes:
Expand Down
18 changes: 15 additions & 3 deletions backend/src/documents/write_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,29 @@


@context.inject
def write_document(document_url: str, document_type: str | None, extracted_data: dict, database: Database = None):
def write_document(document_id: str, document_url: str, database: Database = None):
document_to_store = {
"document_id": document_id,
"document_url": document_url,
"status": "processing",
}

database.write_document(document_to_store)


@context.inject
def update_document(document_url: str, document_type: str | None, extracted_data: dict, database: Database = None):
document_id = convert_document_url_to_id(document_url)

item_to_store = {
document_to_store = {
"document_id": document_id,
"document_url": document_url,
"document_type": document_type,
"extracted_data": extracted_data,
"status": "complete",
}

database.write_document(item_to_store)
database.write_document(document_to_store)


def convert_document_url_to_id(document_url: str):
Expand Down
5 changes: 3 additions & 2 deletions backend/src/external/aws/lambdas/get_extracted_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ def lambda_handler(event, context):
}

response = {
"status": document_info["status"],
"document_id": document_id,
"document_key": document_info["document_url"],
"document_type": document_info["document_type"],
"document_key": document_info.get("document_url"),
"document_type": document_info.get("document_type"),
"extracted_data": document_info.get("extracted_data", {}),
"signed_url": storage_access_url,
"base64_encoded_file": base64.b64encode(document_data).decode("utf-8"),
Expand Down
8 changes: 7 additions & 1 deletion backend/src/external/aws/lambdas/s3_file_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
import os

from src import context
from src.database.database import Database
from src.documents import write_document
from src.documents.upload_document import upload_file_data
from src.external.aws.dynamodb import DynamoDb
from src.external.aws.s3 import S3
from src.storage import CloudStorage

appContext = context.ApplicationContext()
appContext.register(CloudStorage, S3())
appContext.register(Database, DynamoDb())


def lambda_handler(event, context):
Expand All @@ -22,7 +26,9 @@ def lambda_handler(event, context):
try:
bucket_name = os.environ.get("S3_BUCKET_NAME", "ocr-poc-flex")
default_folder = "input/"
document_id = upload_file_data(body["file_name"], body["file_content"], bucket_name, default_folder)
document_id, key = upload_file_data(body["file_name"], body["file_content"], bucket_name, default_folder)
s3_url = f"s3://{bucket_name}/{key}"
write_document.write_document(document_id, s3_url)
except Exception as e:
return {
"statusCode": 500,
Expand Down
2 changes: 1 addition & 1 deletion backend/src/external/aws/lambdas/sqs_dynamo_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def lambda_handler(event, context):
document_type = message_body.get("document_type")
extracted_data = message_body.get("extracted_data", {})

write_document.write_document(document_url, document_type, extracted_data)
write_document.update_document(document_url, document_type, extracted_data)

sqs_client = appContext.implementation(SQSClient)
sqs_client.delete_message(QueueUrl=sqs_queue_url, ReceiptHandle=record["receiptHandle"])
Expand Down
8 changes: 5 additions & 3 deletions backend/tests/documents/test_upload_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,18 @@ def teardown_function():
def test_uploading_file_data_returns_document_id():
mock_file_content = b"Hello, this is a test file."
decoded_file_content = base64.b64encode(mock_file_content).decode("utf-8")
expected_folder = "mock_folder/"

mock_cloud_storage = mock.MagicMock()
mock_cloud_storage.put_object.return_value = None
context.register(CloudStorage, mock_cloud_storage)

actual_document_id = upload_file_data("original.txt", decoded_file_content, "mock_bucket", "mock_folder")
actual_document_id, actual_key = upload_file_data(
"original.txt", decoded_file_content, "mock_bucket", expected_folder
)

assert actual_document_id is not None
assert isinstance(actual_document_id, str)
assert str(uuid.UUID(actual_document_id)) == actual_document_id
assert actual_key.startswith(expected_folder)


def test_generating_secure_filename_works():
Expand Down
38 changes: 29 additions & 9 deletions backend/tests/documents/test_write_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,16 @@ def test_write_document_works():
context.register(Database, mock_database)

expected_document_id = "DogCow"
expected_document_url = f"s3://bucket/moof/{expected_document_id}.txt"
expected_document_type = "W2"
expected_extracted_data = {
"name": "Clarus",
}
expected_status = "processing"
expected_document_url = "s3://DogCow/Key"

expected_item = {
"document_id": expected_document_id,
"status": expected_status,
"document_url": expected_document_url,
"document_type": expected_document_type,
"extracted_data": expected_extracted_data,
}

write_document.write_document(expected_document_url, expected_document_type, expected_extracted_data)
write_document.write_document(expected_document_id, expected_document_url)

mock_database.write_document.assert_called_with(expected_item)

Expand All @@ -43,4 +40,27 @@ def test_write_document_fails():
context.register(Database, mock_database)

with pytest.raises(DatabaseException):
write_document.write_document("s3://bucket/moof/DogCow.txt", "W2", {"name": "Clarus"})
write_document.write_document("CowDog", "s3://CowDogUrl")


def test_update_document_works():
mock_database = mock.MagicMock()
context.register(Database, mock_database)

expected_document_id = "DogCow"
expected_status = "complete"
expected_document_url = f"s3://bucket/moof/{expected_document_id}.txt"
expected_document_type = "W2"
expected_extracted_data = {
"name": "Clarus",
}
expected_item = {
"document_id": expected_document_id,
"document_url": expected_document_url,
"document_type": expected_document_type,
"extracted_data": expected_extracted_data,
"status": expected_status,
}

write_document.update_document(expected_document_url, expected_document_type, expected_extracted_data)
mock_database.write_document.assert_called_with(expected_item)
1 change: 1 addition & 0 deletions iac/endpoints.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ module "document_endpoints" {

environment_variables = {
S3_BUCKET_NAME = aws_s3_bucket.document_storage.bucket
DYNAMODB_TABLE = aws_dynamodb_table.extract_table.name
}

authorizer = aws_api_gateway_authorizer.authorizer.id
Expand Down
35 changes: 24 additions & 11 deletions ui/src/pages/VerifyPage.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ export default function VerifyPage({ signOut }) {
const navigate = useNavigate();

async function pollApiRequest(attempts = 30, delay = 2000) {
// Helper function to sleep for the specified delay
const sleep = () => new Promise((resolve) => setTimeout(resolve, delay));

for (let i = 0; i < attempts; i++) {
try {
const response = await authorizedFetch(`/api/document/${documentId}`, {
Expand All @@ -22,26 +25,36 @@ export default function VerifyPage({ signOut }) {
},
});

if (response.ok) {
const result = await response.json(); // parse response

setResponseData(result); // store API data in state
setLoading(false); // stop loading when data is received
setError(false); // clear any previous errors
return;
} else if (response.status === 401 || response.status === 403) {
if (response.status === 401 || response.status === 403) {
alert('You are no longer signed in! Please sign in again.');
signOut();
return;
} else {
} else if (!response.ok) {
console.warn(`Attempt ${i + 1} failed: ${response.statusText}`);
await sleep();
continue;
}

const result = await response.json(); // parse response

if (result.status !== 'complete') {
console.info(
`Attempt ${i + 1} is not complete. Trying again in a little bit.`
);
await sleep();
continue;
}

setResponseData(result); // store API data in state
setLoading(false); // stop loading when data is received
setError(false); // clear any previous errors
return;
} catch (error) {
console.error(`Attempt ${i + 1} failed:`, error);
await sleep();
}

await new Promise((resolve) => setTimeout(resolve, delay));
}

console.error('Attempt failed after max attempts');
setLoading(false);
setError(true);
Expand Down
Loading