| |
| |
|
|
| import pandas as pd |
| import numpy as np |
| from supabase import create_client, Client |
|
|
| |
| URL = "https://oflclzbsbgkadqiagxqk.supabase.co" |
| KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im9mbGNsemJzYmdrYWRxaWFneHFrIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MDY0OTY3OTIsImV4cCI6MjAyMjA3Mjc5Mn0.2IGuSFqHbNp75vs-LskGjK0fw3ypqbiHJ9MKAAaYE8s" |
| supabase: Client = create_client(URL, KEY) |
|
|
| def convert_table_to_pandas_dataframe(supabase, table_name): |
| |
| data = supabase.table(table_name).select("*").execute() |
| |
| |
| df = pd.DataFrame(data.data) |
|
|
| return df |
|
|
| books_df = convert_table_to_pandas_dataframe(supabase, "books") |
|
|
| pd.set_option('display.max_colwidth', 50) |
| pd.set_option('display.max_columns', None) |
|
|
| books_df.head(5) |
|
|
| books_df['combined'] = books_df['description'] + ' ' + books_df['title'] + ' ' + books_df['author_name'] |
|
|
|
|
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
| |
| tfidf = TfidfVectorizer(stop_words='english') |
|
|
| |
| tfidf_matrix = tfidf.fit_transform(books_df['combined']) |
|
|
| feature_names = tfidf.get_feature_names() |
|
|
| |
| tfidf_matrix.shape |
|
|
|
|
| |
| |
|
|
| feature_names[2000:2500] |
|
|
| |
| |
|
|
| |
| document_tfidf_vector = tfidf_matrix[10] |
|
|
| |
| total_terms_in_document = document_tfidf_vector.sum() |
|
|
| print("Document vector: ", tfidf_matrix[10]) |
| print("Total terms in document {}: {}".format(10, total_terms_in_document)) |
|
|
| tfidf |
|
|
| print(tfidf_matrix[0].shape) |
|
|
| |
| |
| from sklearn.metrics.pairwise import linear_kernel |
|
|
| |
| cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix) |
|
|
| indices = pd.Series(books_df.index, index=books_df['title']).drop_duplicates() |
|
|
| def get_original_book_id(title): |
| return books_df.loc[books_df['title'] == title, 'id'].values[0] |
|
|
| |
| def get_top_five_recommendations(title, cosine_sim=cosine_sim): |
| |
| idx = indices[title] |
|
|
| |
| sim_scores = list(enumerate(cosine_sim[idx])) |
| |
| |
| sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) |
|
|
| |
| sim_scores = sim_scores[:11] |
|
|
| |
| book_indices = [i[0] for i in sim_scores] |
|
|
| |
| |
|
|
| ids = [] |
| for title in books_df['title'].iloc[book_indices]: |
| ids.append(get_original_book_id(title)) |
| ids.pop(0) |
| return ids |
|
|
| get_top_five_recommendations('Walls of Ash') |
|
|
| books_df[books_df['id'].isin(get_top_five_recommendations('Walls of Ash'))]['url'] |
|
|
|
|
| from flask import Flask, jsonify, request |
| from flask_ngrok import run_with_ngrok |
|
|
| app = Flask(__name__) |
| run_with_ngrok(app) |
|
|
| import json |
| @app.route('/predict/<int:id>', methods=['GET']) |
| def predict(id): |
| title = books_df[books_df['id'] == id]['title'].values[0] |
| print(title) |
| prediction_result = [int(x) for x in get_top_five_recommendations(title)] |
| return json.dumps(prediction_result) |
|
|
| from waitress import serve |
|
|
| if __name__ == '__main__': |
| serve(app, host="0.0.0.0", port=8080) |
|
|
|
|