File tree Expand file tree Collapse file tree 2 files changed +4
-5
lines changed Expand file tree Collapse file tree 2 files changed +4
-5
lines changed Original file line number Diff line number Diff line change @@ -3,5 +3,5 @@ keybert>=0.5.0
3
3
flair == 0.11.3
4
4
scipy == 1.7.3
5
5
bertopic >= 0.16.1
6
- datasets == 2.13.2
6
+ scikit-learn >= 1.0.1
7
7
umap-learn == 0.5.4
Original file line number Diff line number Diff line change 3
3
import flair
4
4
import spacy
5
5
from bertopic import BERTopic
6
- from datasets import load_dataset
7
6
from flair .models import SequenceTagger
8
7
from flair .tokenization import SegtokSentenceSplitter
9
8
from keybert import KeyBERT
9
+ from sklearn .datasets import fetch_20newsgroups
10
10
11
11
import tests .utils as utils
12
12
from keyphrase_vectorizers import KeyphraseCountVectorizer , KeyphraseTfidfVectorizer
@@ -172,9 +172,8 @@ def test_online_vectorizer():
172
172
173
173
174
174
def test_bertopic ():
175
- data = load_dataset ("ag_news" )
176
- texts = data ['train' ]['text' ]
177
- texts = texts [:100 ]
175
+ data = fetch_20newsgroups (subset = 'train' )
176
+ texts = data .data [:100 ]
178
177
topic_model = BERTopic (vectorizer_model = KeyphraseCountVectorizer ())
179
178
topics , probs = topic_model .fit_transform (documents = texts )
180
179
new_topics = topic_model .reduce_outliers (texts , topics )
You can’t perform that action at this time.
0 commit comments