# ------------Libraries------------------ import pandas as pd import numpy as np import glob import gradio as gr import os import time from groq import Groq import gradio as gr from langchain.document_loaders.pdf import PyPDFDirectoryLoader from dotenv import load_dotenv from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.schema.document import Document from langchain.schema import AIMessage, HumanMessage, SystemMessage from transformers import AutoModel, AutoTokenizer from openai import OpenAI from pinecone import Pinecone, ServerlessSpec, CloudProvider, AwsRegion, VectorType import uuid import logging from tqdm import tqdm # Optional load_dotenv() # -----------ENVs------------------- PINECONE_API = os.getenv("PINECONE_API") PINECONE_ENV = os.getenv("PINECONE_ENV") PINECONE_NAMESPACE = ["policies", "management"] #GROQ_API_KEY = os.getenv("GROQ_API_KEY") GROQ_API_KEY = os.getenv("TEST_API") GROQ_CHAT_URL = "https://api.groq.com/openai/v1/chat/completions" LLM_MODEL = "llama-3.3-70b-versatile" # LLM_MODEL = "meta-llama/llama-4-maverick-17b-128e-instruct" NVIDIA_API = os.getenv("NVIDEA_EMBEDDING_API") # NVidia Embedding import client = OpenAI( api_key=NVIDIA_API, base_url="https://integrate.api.nvidia.com/v1", ) # ----------Pinecone-------------------- pc = Pinecone(api_key=PINECONE_API) # ----------Index Init-------------------- # index = pc.Index("unilever") index = pc.Index("unilever-without-metadata") # ----------Nvidea Embedding Generator-------------------- def get_embedding(text="None"): response = client.embeddings.create( input=text, model="nvidia/nv-embed-v1", encoding_format="float", extra_body={"input_type": "query", "truncate": "NONE"}, ) return response.data[0].embedding # ----------query_pinecone-------------------- # def query_pinecone(embedding): # # Use keyword arguments to pass the embedding and other parameters # result = index.query(vector=embedding, top_k=20, namespace="management_with_metadata", include_metadata=True) # return result['matches'] # ------------------------ # Pinecone Query # ------------------------ def query_pinecone(embedding): """Blocking Pinecone query.""" result = index.query_namespaces( vector=embedding, namespaces=PINECONE_NAMESPACE, metric="cosine", top_k=25, include_metadata=True, ) return result["matches"] # # ----------query_groq_stream-------------------- # def query_groq_stream(user_input, relevant_context): # client = Groq(api_key=GROQ_API_KEY) # try: # chat_completion = client.chat.completions.create( # messages=[ # {"role": "system", "content": f"You are a helpful assistant. Use all this information: {relevant_context}"}, # {"role": "user", "content": f"{user_input}"}, # ], # model=LLM_MODEL, # temperature=0.3, # stream=True, # ) # for chunk in chat_completion: # content = chunk.choices[0].delta.content or "" # yield content # Stream out piece-by-piece # except Exception as e: # print("Groq streaming failed:", e) # yield "[Error] Unable to process the request at the moment." groq_client = Groq(api_key=GROQ_API_KEY) # ------------------------ # Groq Streaming LLM # ------------------------ def query_groq_stream(user_input, relevant_context): """Blocking Groq streaming call.""" # groq_client = Groq(api_key=GROQ_API_KEY) try: chat_completion = groq_client.chat.completions.create( messages=[ { "role": "system", "content": f""" You are **MAQ**, a helpful assistant for Unilever employees. 🎯 Respond professionally, using this document context: {relevant_context} ✅ Always format responses in Markdown. ✅ At the end of each answer, include a section: ## Sources: - [Document Name](Link) """, }, {"role": "user", "content": f"{user_input}"}, ], model=LLM_MODEL, temperature=0.3, stream=True, ) for chunk in chat_completion: content = chunk.choices[0].delta.content or "" yield content except Exception as e: print("Groq streaming failed:", e) yield "[Error] Unable to process the request at the moment." # --------Tokenizer and Tokens Counter---------------------- # Tokenizer to count number of tokens tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-base-en") def count_tokens(text: str) -> int: # Encode the text into tokens tokens = tokenizer.encode(text) return len(tokens) # --------process_user_query_stream---------------------- def process_user_query_stream(user_query, chat_state, memory_state): embedding = get_embedding(user_query) relevant_chunks = query_pinecone(embedding) context = "\n".join([chunk['metadata']["text"] for chunk in relevant_chunks]) history_str = "\n".join(memory_state) prompt = f""" Context from Unilever documents: {context} Previous interaction: {history_str} Provide a clear, insightful, and professional response to the user's question. """ partial_response = "" for chunk in query_groq_stream(f"{user_query}. Provide Links if any.", prompt): partial_response += chunk yield ( chat_state + [(user_query, partial_response)], chat_state, memory_state, "" # ✅ This clears the input box during streaming ) memory_state.append(f"User: {user_query}") memory_state.append(f"Unilever Assistant: {partial_response}") chat_state.append((user_query, partial_response)) return chat_state, chat_state, memory_state, "" def clear_chat(): return [], [], [] # ----------- Gradio Interface ------------ with gr.Blocks( theme=gr.themes.Soft(primary_hue="blue"), css=""" #centered-logo { display: flex; flex-direction: column; align-items: center; justify-content: center; text-align: center; } #input-row { display: flex; gap: 8px; align-items: center; } .user-textbox { flex: 5; } .tiny-send-btn { padding: 4px 12px !important; font-size: 12px !important; min-width: 60px !important; height: 36px !important; line-height: 1 !important; } """, fill_height=True, fill_width=True ) as unilever_chat: with gr.Row(): with gr.Column(elem_id="centered-logo"): gr.Image( value="unilever_logo.png", show_label=False, container=False, show_download_button=False, show_fullscreen_button=False, show_share_button=False, height=150 ) gr.Markdown("#### **Unilever AI Assistant** — Document-Aware Expert Chat") gr.Markdown("### MANAGEMENT DOCUMENTS — **Version 1.1 (With Metadata)**") chatbot = gr.Chatbot( label="Unilever Document Assistant", height=500, autoscroll=True, show_copy_button=True, show_share_button=False, avatar_images=["🧑", "🤖"] ) with gr.Row(elem_id="input-row"): user_input = gr.Textbox( placeholder="Ask your compliance, policy, or SHE question...", label="Your Query", lines=1, scale=5, elem_classes=["user-textbox"] ) submit_btn = gr.Button( "Send", variant="primary", scale=1, elem_classes=["tiny-send-btn"] ) with gr.Row(): clear_btn = gr.Button("Clear Chat 🗑️", variant="secondary") # Session-specific state chat_state = gr.State([]) memory_state = gr.State([]) # Event bindings with textbox reset submit_btn.click( fn=process_user_query_stream, inputs=[user_input, chat_state, memory_state], outputs=[chatbot, chat_state, memory_state, user_input] ) user_input.submit( fn=process_user_query_stream, inputs=[user_input, chat_state, memory_state], outputs=[chatbot, chat_state, memory_state, user_input] ) clear_btn.click( fn=clear_chat, outputs=[chatbot, chat_state, memory_state] ) # # 🚀 Force normal click to open links in new tab and focus # unilever_chat.load( # fn=None, # js=""" # document.querySelectorAll('.gr-markdown a').forEach(link => { # link.addEventListener('click', function(e) { # e.preventDefault(); # window.open(this.href, '_blank').focus(); # }); # }); # """, # inputs=[], # outputs=[] # ) # Launch the interface unilever_chat.launch() # ------------------------------