Skip to content

Commit

Permalink
Merge branch 'main' into collapse-create-base-text-units
Browse files Browse the repository at this point in the history
  • Loading branch information
natoverse committed Sep 23, 2024
2 parents 67e6ac4 + fbc483e commit 380a00a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 10 deletions.
7 changes: 3 additions & 4 deletions graphrag/index/workflows/v1/subflows/create_base_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,18 @@ def create_base_documents(

rejoined = docs_with_text_units.merge(
source,
left_on="id",
right_on="id",
on="id",
how="right",
)
rejoined.rename(columns={"text": "raw_content"}, inplace=True)
rejoined["id"] = rejoined["id"].astype(str)

# attribute columns are converted to strings and then collapsed into a single json object
if document_attribute_columns is not None and len(document_attribute_columns) > 0:
if document_attribute_columns:
for column in document_attribute_columns:
rejoined[column] = rejoined[column].astype(str)
rejoined["attributes"] = rejoined[document_attribute_columns].apply(
lambda row: ({**row}),
lambda row: {**row},
axis=1,
)
rejoined.drop(columns=document_attribute_columns, inplace=True)
Expand Down
10 changes: 4 additions & 6 deletions tests/verbs/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,10 @@ def compare_outputs(
"""Compare the actual and expected dataframes, optionally specifying columns to compare.
This uses assert_series_equal since we are sometimes intentionally omitting columns from the actual output."""
cols = expected.columns if columns is None else columns
try:
assert len(actual) == len(expected)
except AssertionError:
print("Expected:", cols)
print("Actual:", actual.columns)
raise

assert len(actual) == len(
expected
), f"Expected: {len(expected)}, Actual: {len(actual)}"

for column in cols:
assert column in actual.columns
Expand Down

0 comments on commit 380a00a

Please sign in to comment.