Update extract_keywords.py (#54)

exorde-labs · Feb 28, 2024 · bc9101b · bc9101b
1 parent cf16f1d
commit bc9101b
Showing 1 changed file with 11 additions and 2 deletions.
diff --git a/exorde/extract_keywords.py b/exorde/extract_keywords.py
@@ -92,7 +92,12 @@ def is_valid_keyword(word):
         isalpha_count = sum(1 for char in word if char.isalpha())
         total_chars = len(word)
         punctuation = re.compile(r'[^\w\s,]')
-        return (uppercase_count / total_chars >= 0.3) and (punctuation.search(word) is not None) and (isalpha_count>1)
+        # Prevent division by zero
+        if total_chars > 0:
+            return (uppercase_count / total_chars >= 0.3) and (punctuation.search(word) is not None) and (isalpha_count>1)
+        else:
+            return False
+
 
     words = nltk.word_tokenize(text)
     filtered_words = filter(is_valid_keyword, words)
@@ -138,7 +143,11 @@ def is_valid_acronym(word):
         uppercase_count = sum(1 for char in word if char.isupper())
         isalpha_count = sum(1 for char in word if char.isalpha())
         total_chars = len(word)
-        return (uppercase_count / total_chars >= 0.3) and (isalpha_count>=1) and len(word) >= 2
+        # Prevent division by zero
+        if total_chars > 0:
+            return (uppercase_count / total_chars >= 0.3) and (isalpha_count>=1) and len(word) >= 2
+        else:
+            return False
 
     # split by space and special punctuation: comma, point, period
     # not nltk tokenize