Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 85 additions & 8 deletions src/parse/parse.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import nearly from "nearley";
import type { ParsedCatalogEntry } from "./types";
import { HRowType, HSectionType } from "@/tokenize";
import type {
HRow,
HSection,
TextRow,
TokenizedCatalogEntry,
import { HRowType, HSectionType, ConcentrationLeadingHeaderExceptionValue, ConcentrationTrailingHeaderExceptionValue } from "@/tokenize";
import {
type HRow,
type HSection,
type TextRow,
type TokenizedCatalogEntry,
} from "@/tokenize";
import { writeFile } from "fs/promises";
import { FileName } from "@/classify";
Expand Down Expand Up @@ -125,10 +125,57 @@ export const parseTokens = (sections: HSection[]) => {
.filter(metaSection => metaSection.type === HSectionType.CONCENTRATION)
.map(metaSection => {
metaSection.entries = metaSection.entries.filter(
row =>
row.type !== HRowType.COMMENT && row.type !== HRowType.SUBSUBHEADER,
row => row.type !== HRowType.SUBSUBHEADER
);

metaSection.entries = metaSection.entries.flatMap((row, index) => {
console.log("NEW KOBE READING ROW")
console.log(row)
// if this row is a comment and the previous row is an exception elective header,
// then this row is probably a comment that is meant to be a X_OF_MANY row
if ((row.type == HRowType.COMMENT || row.type == HRowType.SECTION_INFO) && index > 0) {
if (row.description.startsWith("If")) {
// special case introduced by "Concentration in Campaigns and Elections" in the following major
// https://catalog.northeastern.edu/archive/2021-2022/undergraduate/arts-media-design/journalism/journalism-political-science-ba/#programrequirementstext
return [];
}
const prevRow = metaSection.entries[index - 1]!;
console.log("KOBE CHECKING PREVIOUS ROW")
console.log(prevRow)
if (prevRow.type == HRowType.HEADER && isConcentrationExceptionValue(prevRow.description)) {
console.log("CONVERTING TO X_OF_MANY")
return [
{
type: HRowType.X_OF_MANY,
description: row.description,
hour: row.hour,
},
];
} else {
return [];
}
}

// if this row is a header and the 'Required Courses' exception type,
// then the description of the section should be used to identify the concentration section
// otherwise, remove the 'Electives' exception type
if (row.type == HRowType.HEADER && isConcentrationExceptionValue(row.description)) {
if (isConcentrationLeadingHeaderExceptionValue(row.description) && index == 0) {
console.log("KOBE CONVERTING TO CONCENTRATION " + metaSection.description)
return [
{
...row,
description: metaSection.description,
}
];
} else {
console.log("KOBE REMOVING EXCEPTION")
return []
}
}
return row;
});

if (
metaSection.entries.length >= 1 &&
metaSection.entries[0]?.type != HRowType.HEADER
Expand All @@ -150,3 +197,33 @@ export const parseTokens = (sections: HSection[]) => {
concentrations,
};
};

/**
* Checks if the text is a concentration exception type.
* https://www.geeksforgeeks.org/what-is-type-predicates-in-typescript/
*/
/*
function isConcentrationExceptionValue(value: string): value is ConcentrationValueExceptionType {
return Object.values(ConcentrationValueExceptionType).includes(value as ConcentrationValueExceptionType);
}
*/
function isConcentrationExceptionValue(
value: string
): value is (ConcentrationLeadingHeaderExceptionValue | ConcentrationTrailingHeaderExceptionValue) {
return (
Object.values(ConcentrationLeadingHeaderExceptionValue).includes(
value as ConcentrationLeadingHeaderExceptionValue
) ||
Object.values(ConcentrationTrailingHeaderExceptionValue).includes(
value as ConcentrationTrailingHeaderExceptionValue
)
);
}

function isConcentrationLeadingHeaderExceptionValue(value: string): value is ConcentrationLeadingHeaderExceptionValue {
return Object.values(ConcentrationLeadingHeaderExceptionValue).includes(value as ConcentrationLeadingHeaderExceptionValue);
}

function isConcentrationTrailingHeaderExceptionValue(value: string): value is ConcentrationTrailingHeaderExceptionValue {
return Object.values(ConcentrationTrailingHeaderExceptionValue).includes(value as ConcentrationTrailingHeaderExceptionValue);
}
3 changes: 2 additions & 1 deletion src/tokenize/tokenize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,8 @@ const getRowType = (
throw Error(`td class was not "codecol": "${tdClasses}"`);
}

const tdText = parseText(td);

if (trClasses.has("subheader")) {
const isSubSubHeader = $(tr).find("span").hasClass("commentindent");
if (isSubSubHeader) {
Expand All @@ -385,7 +387,6 @@ const getRowType = (
return HRowType.HEADER;
}

const tdText = parseText(td);
// Different range types
if (
RANGE_LOWER_BOUNDED_MAYBE_EXCEPTIONS_1.test(tdText) ||
Expand Down
41 changes: 41 additions & 0 deletions src/tokenize/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,44 @@ export type TokenizedCatalogEntry = TypedCatalogEntry & {
programRequiredHours: number;
sections: HSection[];
};

/**
* Enumerations specifying the headers that lead to common concentration name issue.
* Typically, the headers within the requirement tables specifiy the requirement section name.
* However, in some cases, the headers of concentration sections specify the concentration requirements
* instead of the concentration name. This causes issues with concentration names becoming the requirement section name.
* This type is used to identify concentration header issues, and provide separate tokenization and parsing logic.
*
* Read more here:
* https://www.notion.so/sandboxnu/Concentration-Issue-1a118273b1f4806da9e9fa99c9ca9a27?pvs=4
*/
export enum ConcentrationExceptionValue {
ELECTIVES = "Electives",
REQUIRED_COURSES = "Required Courses",
}

/**
* Leading headers are identified to be headers that may need to be replaced by the concentration name.
*/
export enum ConcentrationLeadingHeaderExceptionValue {
REQUIRED_COURSES = "Required Courses",
// caused by: https://catalog.northeastern.edu/archive/2021-2022/undergraduate/arts-media-design/journalism/journalism-political-science-ba/#programrequirementstext
THEORETICAL_REQUIREMENTS = "Theoretical Requirement",
CORE_COURSE = "Core Course",
EXPERIENTIAL_REQUIREMENT = "Experiential/Practicum Requirement",
CORE_REQUIREMENT = "Core Requirement",

}

/**
*
*/
export enum ConcentrationTrailingHeaderExceptionValue {
ELECTIVES = "Electives",
// caused by: https://catalog.northeastern.edu/archive/2021-2022/undergraduate/arts-media-design/journalism/journalism-political-science-ba/#programrequirementstext
CAMPAIGNS_AND_ELECTIONS_ELECTIVES = "Campaigns and Elections Electives",
REGIONAL_REQUIREMENTS = "Regional Requirements",
EXPERIENTIAL_REQUIREMENT = "Experiential/Practicum Requirement",
CORE_COURSE = "Core Courses",
}