Code trying 2

import requests
from bs4 import BeautifulSoup
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# Web kazıma için fonksiyon
def scrape_website(url):
    response = requests.get(url)
    if 'text/html' in response.headers['Content-Type']:
        soup = BeautifulSoup(response.text, 'html.parser')
        articles = soup.find_all('article')
        return [{'title': article.h2.text, 'content': article.p.text} for article in articles]
    else:
        return "Non-HTML content received, cannot scrape."

# robots.txt dosyasına saygı duymak için fonksiyon
def can_fetch(url):
    try:
        robots_url = f"{url}/robots.txt"
        response = requests.get(robots_url)
        if response.status_code == 200:
            return 'User-agent: *' in response.text and 'Disallow:' not in response.text
        return False
    except requests.exceptions.RequestException:
        return False

# Web kazıma işlemi
website_url = 'http://example.com/articles'
if can_fetch(website_url):
    scraped_data = scrape_website(website_url)
    print(scraped_data)
else:
    print("robots.txt rules do not allow scraping this website.")

# Veri setini yükle
data = fetch_20newsgroups()

# Eğitim ve test verilerini ayır
train_data, test_data, train_labels, test_labels = train_test_split(data.data, data.target, test_size=0.25, random_state=42)

# Modeli oluştur
model = make_pipeline(TfidfVectorizer(), MultinomialNB())

# Modeli eğit
model.fit(train_data, train_labels)

# Test verileri üzerinde tahmin yap
predicted_labels = model.predict(test_data)

# Performansı değerlendir
print(confusion_matrix(test_labels, predicted_labels))
print(classification_report(test_labels, predicted_labels))

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Code trying 2 #59

Web kazıma için fonksiyon

robots.txt dosyasına saygı duymak için fonksiyon

Web kazıma işlemi

Veri setini yükle

Eğitim ve test verilerini ayır

Modeli oluştur

Modeli eğit

Test verileri üzerinde tahmin yap

Performansı değerlendir

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Code trying 2 #59

Description

Web kazıma için fonksiyon

robots.txt dosyasına saygı duymak için fonksiyon

Web kazıma işlemi

Veri setini yükle

Eğitim ve test verilerini ayır

Modeli oluştur

Modeli eğit

Test verileri üzerinde tahmin yap

Performansı değerlendir

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions