Skip to content

Commit c3bc123

Browse files
committed
update BERTopic test
Signed-off-by: Tim Schopf <tim.schopf@t-online.de>
1 parent b1e1ff4 commit c3bc123

File tree

2 files changed

+4
-5
lines changed

2 files changed

+4
-5
lines changed

tests/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ keybert>=0.5.0
33
flair==0.11.3
44
scipy==1.7.3
55
bertopic>=0.16.1
6-
datasets==2.13.2
6+
scikit-learn>=1.0.1
77
umap-learn==0.5.4

tests/test_vectorizers.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
import flair
44
import spacy
55
from bertopic import BERTopic
6-
from datasets import load_dataset
76
from flair.models import SequenceTagger
87
from flair.tokenization import SegtokSentenceSplitter
98
from keybert import KeyBERT
9+
from sklearn.datasets import fetch_20newsgroups
1010

1111
import tests.utils as utils
1212
from keyphrase_vectorizers import KeyphraseCountVectorizer, KeyphraseTfidfVectorizer
@@ -172,9 +172,8 @@ def test_online_vectorizer():
172172

173173

174174
def test_bertopic():
175-
data = load_dataset("ag_news")
176-
texts = data['train']['text']
177-
texts = texts[:100]
175+
data = fetch_20newsgroups(subset='train')
176+
texts = data.data[:100]
178177
topic_model = BERTopic(vectorizer_model=KeyphraseCountVectorizer())
179178
topics, probs = topic_model.fit_transform(documents=texts)
180179
new_topics = topic_model.reduce_outliers(texts, topics)

0 commit comments

Comments
 (0)