LLM Zoomcamp 2025: Module 3 Homework

Author

Tony Wu

1 Problem 1

import requests
import pandas as pd
from tqdm.auto import tqdm
import minsearch

url_prefix = 'https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/03-evaluation/'
docs_url = url_prefix + 'search_evaluation/documents-with-ids.json'
documents = requests.get(docs_url).json()

ground_truth_url = url_prefix + 'search_evaluation/ground-truth-data.csv'
df_ground_truth = pd.read_csv(ground_truth_url)
ground_truth = df_ground_truth.to_dict(orient='records')

def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['document']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)
index.fit(documents)

def search(q):
    boost = {'question': 1.5, 'section': 0.1}

    results = index.search(
        query=q['question'],
        filter_dict={'course': q['course']},
        boost_dict=boost,
        num_results=5
    )

    return results

results = evaluate(ground_truth, search_function=search)
print(f"The hitrate is: {results['hit_rate']}")
The hitrate is: 0.848714069591528

2 Problem 2

from minsearch import VectorSearch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline

texts = []

for doc in documents:
    t = doc['question']
    texts.append(t)

pipeline = make_pipeline(
    TfidfVectorizer(min_df=3),
    TruncatedSVD(n_components=128, random_state=1)
)
X = pipeline.fit_transform(texts)

vindex = VectorSearch(keyword_fields={'course'})
vindex.fit(X, documents)

def vectorsearch(q):
    query_vec = pipeline.transform([q['question']])  # transform question to vector

    results = vindex.search(
        query_vector=query_vec,
        filter_dict={'course': q['course']},  # respect course filtering
        num_results=5
    )

    return results

results = evaluate(ground_truth, search_function=vectorsearch)
print(f"The MRR is: {results['mrr']}")
The MRR is: 0.3573085512571141

3 Problem 3

from minsearch import VectorSearch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline

texts = []

for doc in documents:
    t = doc['question'] + ' ' + doc['text']
    texts.append(t)

pipeline = make_pipeline(
    TfidfVectorizer(min_df=3),
    TruncatedSVD(n_components=128, random_state=1)
)
X = pipeline.fit_transform(texts)

vindex = VectorSearch(keyword_fields={'course'})
vindex.fit(X, documents)

def vectorsearch(q):
    query_vec = pipeline.transform([q['question']])  # transform question to vector

    results = vindex.search(
        query_vector=query_vec,
        filter_dict={'course': q['course']},  # respect course filtering
        num_results=5
    )

    return results

results = evaluate(ground_truth, search_function=vectorsearch)
print(f"The hitrate is: {results['hit_rate']}")
The hitrate is: 0.8210503566025502

4 Problem 4

import requests 
from qdrant_client import QdrantClient, models

EMBEDDING_DIMENSIONALITY = 512
collection_name = "hw3"
qd_client = QdrantClient("http://localhost:6333")
qd_client.delete_collection(collection_name=collection_name)
qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)
qd_client.create_payload_index(
    collection_name=collection_name,
    field_name="course",
    field_schema="keyword"
)
points = []

for i, doc in enumerate(documents):
    text = doc['question'] + ' ' + doc['text']
    vector = models.Document(text=text, model="jinaai/jina-embeddings-v2-small-en")
    point = models.PointStruct(
        id=i,
        vector=vector,
        payload=doc
    )
    points.append(point)

qd_client.upsert(
    collection_name=collection_name,
    points=points
)

def qdsearch(q):

    results = qd_client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=q['question'],
            model="jinaai/jina-embeddings-v2-small-en"
        ),
        limit=5, # top closest matches
        with_payload=True #to get metadata in the results
    )

    return [p.payload for p in results.points if p.payload and 'id' in p.payload]

results = evaluate(ground_truth, search_function=qdsearch)
print(f"The MRR is: {results['mrr']}")
The MRR is: 0.8248685253223843

5 Problem 5

import numpy as np
def cosine(u, v):
    u_norm = np.sqrt(u.dot(u))
    v_norm = np.sqrt(v.dot(v))
    return u.dot(v) / (u_norm * v_norm)

results_url = url_prefix + 'rag_evaluation/data/results-gpt4o-mini.csv'
df_results = pd.read_csv(results_url)

pipeline = make_pipeline(
    TfidfVectorizer(min_df=3),
    TruncatedSVD(n_components=128, random_state=1)
)

pipeline.fit(df_results.answer_llm + ' ' + df_results.answer_orig + ' ' + df_results.question)

v_llm = pipeline.transform(df_results.answer_llm)
v_orig = pipeline.transform(df_results.answer_orig)
cosines = [cosine(u, v) for u, v in zip(v_llm, v_orig)]
print(f"The average cosine similarity is: {np.mean(cosines)}")
The average cosine similarity is: 0.8415841233490402

6 Problem 6

from rouge import Rouge
rouge = Rouge()
scores = []

for i in range(len(df_results)):
    row = df_results.iloc[i]
    result = rouge.get_scores(row.answer_llm, row.answer_orig)[0]
    f1 = result['rouge-1']['f']
    scores.append(f1)

average = sum(scores) / len(scores)
print(f"Average ROUGE-1 F1: {average}")
Average ROUGE-1 F1: 0.3516946452113944