Carlos Salgado commited on
Commit
c49ca87
2 Parent(s): 793ea5f 61dfc10

Merge branch 'main' into ingest

Browse files
.github/workflows/ci-cd.yml CHANGED
@@ -25,3 +25,4 @@ jobs:
25
  run: |
26
  docker build -t ${{ secrets.DOCKER_USERNAME }}/docverifyrag:latest .
27
  docker push ${{ secrets.DOCKER_USERNAME }}/docverifyrag:latest
 
 
25
  run: |
26
  docker build -t ${{ secrets.DOCKER_USERNAME }}/docverifyrag:latest .
27
  docker push ${{ secrets.DOCKER_USERNAME }}/docverifyrag:latest
28
+
Dockerfile CHANGED
@@ -1,27 +1,44 @@
1
- # Use an official Python runtime as a parent image
2
- FROM python:3.9-slim
3
 
4
- # Set environment variables
5
- ENV EXAMPLE 1
6
- ENV EXAMPLE 2
7
 
8
- # Set the working directory in the container
9
- WORKDIR /app
 
10
 
11
- # Install system dependencies
12
- RUN apt-get update \
13
- && apt-get install -y --no-install-recommends gcc \
14
- && rm -rf /var/lib/apt/lists/*
15
 
16
- # Install application dependencies
17
- COPY requirements.txt /app/
 
 
 
 
 
 
 
 
 
 
 
 
18
  RUN pip install --no-cache-dir -r requirements.txt
19
 
20
- # Copy the current directory contents into the container at /app
21
- COPY . /app/
 
 
 
 
 
 
 
 
 
22
 
23
- # Expose the port the app runs on
24
- EXPOSE 5000
25
 
26
- # Run the application
27
- CMD ["python", "app.py"]
 
1
+ # Stage 1: Build frontend
2
+ FROM node:latest AS frontend
3
 
4
+ # Set working directory for frontend
5
+ WORKDIR /app/frontend
 
6
 
7
+ # Copy frontend source code
8
+ COPY frontend/package.json frontend/package-lock.json ./
9
+ COPY frontend .
10
 
11
+ # Install dependencies
12
+ RUN npm install
 
 
13
 
14
+ # Build frontend
15
+ RUN npm run build
16
+
17
+ # Stage 2: Build backend
18
+ FROM python:3.9-slim AS backend
19
+
20
+ # Set working directory for backend
21
+ WORKDIR /app/backend
22
+
23
+ # Copy backend source code
24
+ COPY backend .
25
+
26
+ # Install backend dependencies
27
+ COPY backend/requirements.txt .
28
  RUN pip install --no-cache-dir -r requirements.txt
29
 
30
+ # Stage 3: Serve frontend and backend using nginx and gunicorn
31
+ FROM nginx:latest AS production
32
+
33
+ # Copy built frontend files from the frontend stage to nginx
34
+ COPY --from=frontend /app/frontend/dist /usr/share/nginx/html
35
+
36
+ # Copy built backend code from the backend stage
37
+ COPY --from=backend /app/backend /app/backend
38
+
39
+ # Expose port 80 for nginx
40
+ EXPOSE 80
41
 
42
+ # Start gunicorn server for backend
43
+ CMD ["gunicorn", "--bind", "0.0.0.0:8000", "app:app"]
44
 
 
 
README.md CHANGED
@@ -23,13 +23,8 @@
23
 
24
  </details>
25
 
26
- ## Video Demo
27
-
28
- [link](https://link.com)
29
-
30
- ## Web App
31
-
32
- [link](https://link.com)
33
 
34
  ## Screenshots
35
 
@@ -61,17 +56,35 @@
61
 
62
  ### Install locally
63
 
 
 
64
  1. Clone the repository:
65
  ```bash
66
  $ git clone https://github.com/eliawaefler/DocVerifyRAG.git
67
  ```
68
 
69
- 2. Navigate to the project directory:
70
  ```bash
71
- $ cd DocVerifyRAG
72
  ```
73
 
74
  3. Install dependencies:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  ```bash
76
  $ pip install -r requirements.txt
77
  ```
 
23
 
24
  </details>
25
 
26
+ ## TRY the prototype
27
+ [DocVerifyRAG](https://docverify-rag.vercel.app)
 
 
 
 
 
28
 
29
  ## Screenshots
30
 
 
56
 
57
  ### Install locally
58
 
59
+ #### Step 1 - Frontend
60
+
61
  1. Clone the repository:
62
  ```bash
63
  $ git clone https://github.com/eliawaefler/DocVerifyRAG.git
64
  ```
65
 
66
+ 2. Navigate to the frontend directory:
67
  ```bash
68
+ $ cd DocVerifyRAG/frontend
69
  ```
70
 
71
  3. Install dependencies:
72
+ ```bash
73
+ $ npm install
74
+ ```
75
+ 4. Run project:
76
+ ```bash
77
+ $ npm run dev
78
+ ```
79
+
80
+ #### Step 2 - Backend
81
+
82
+ 1. Navigate to the backend directory:
83
+ ```bash
84
+ $ cd DocVerifyRAG/backend
85
+ ```
86
+
87
+ 2. Install dependencies:
88
  ```bash
89
  $ pip install -r requirements.txt
90
  ```
langchain_vectara.ipynb → backend/langchain_vectara.ipynb RENAMED
File without changes
langchain_vectara.py → backend/langchain_vectara.py RENAMED
File without changes
requirements.txt → backend/requirements.txt RENAMED
@@ -2,4 +2,9 @@ langchain_community
2
  langchain-text-splitters
3
  langchain-together
4
  unstructured[local-inference]
5
- python-dotenv
 
 
 
 
 
 
2
  langchain-text-splitters
3
  langchain-together
4
  unstructured[local-inference]
5
+ python-dotenv
6
+ streamlit
7
+ langchain
8
+ openai
9
+ chromadb
10
+ tiktoken
school_plumbing.txt → backend/school_plumbing.txt RENAMED
File without changes
schulgebäudes.txt → backend/schulgebäudes.txt RENAMED
File without changes
together_call.py → backend/together_call.py RENAMED
File without changes
frontend/package.json CHANGED
@@ -2,7 +2,7 @@
2
  "name": "docverifyrag",
3
  "creadits": "Leo Miranda",
4
  "private": true,
5
- "version": "1.0",
6
  "type": "module",
7
  "scripts": {
8
  "dev": "vite",
 
2
  "name": "docverifyrag",
3
  "creadits": "Leo Miranda",
4
  "private": true,
5
+ "version": "0.1.0",
6
  "type": "module",
7
  "scripts": {
8
  "dev": "vite",
frontend/src/assets/verified-symbol-icon.png:Zone.Identifier DELETED
@@ -1,4 +0,0 @@
1
- [ZoneTransfer]
2
- ZoneId=3
3
- ReferrerUrl=https://uxwing.com/wp-content/themes/uxwing/download/arts-graphic-shapes/verified-symbol-icon.png
4
- HostUrl=https://uxwing.com/wp-content/themes/uxwing/download/arts-graphic-shapes/verified-symbol-icon.png
 
 
 
 
 
frontend/src/components/Features.tsx CHANGED
@@ -5,18 +5,14 @@ import {
5
  CardFooter,
6
  CardHeader,
7
  } from "@/components/ui/card";
8
- import image4 from "../assets/looking-ahead.png";
9
- //import { UploadDoc } from "./upload/streamlit_app.py";
10
 
11
  interface FeatureProps {
12
  title: string;
13
- image: string;
14
  }
15
 
16
  const features: FeatureProps[] = [
17
  {
18
  title: "UPLOAD DOCUMENT",
19
- image: image4,
20
  },
21
  ];
22
 
@@ -31,7 +27,7 @@ export const Features = () => {
31
  return (
32
  <section
33
  id="features"
34
- className="container py-24 sm:py-32 space-y-8"
35
  >
36
  <h2 className="text-3xl lg:text-4xl font-bold md:text-center">
37
  Get Started{" "}
@@ -55,20 +51,18 @@ export const Features = () => {
55
  </div>
56
 
57
  <div className="grid md:grid-cols-2 lg:grid-cols-1">
58
- {features.map(({ title, image }: FeatureProps) => (
59
  <Card key={title}>
60
  <CardHeader className="text-3xl lg:text-4xl font-bold md:text-center">
61
  <CardTitle>{title}</CardTitle>
62
  </CardHeader>
63
- <CardFooter>
64
- <img
65
- src={image}
66
- alt="About feature"
67
- className="w-[150px] lg:w-[300px] mx-auto"
68
- />
69
- {/* Upload sections */}
70
-
71
-
72
  </CardFooter>
73
  </Card>
74
  ))}
 
5
  CardFooter,
6
  CardHeader,
7
  } from "@/components/ui/card";
 
 
8
 
9
  interface FeatureProps {
10
  title: string;
 
11
  }
12
 
13
  const features: FeatureProps[] = [
14
  {
15
  title: "UPLOAD DOCUMENT",
 
16
  },
17
  ];
18
 
 
27
  return (
28
  <section
29
  id="features"
30
+ className="container py-28 sm:py-36 space-y-8"
31
  >
32
  <h2 className="text-3xl lg:text-4xl font-bold md:text-center">
33
  Get Started{" "}
 
51
  </div>
52
 
53
  <div className="grid md:grid-cols-2 lg:grid-cols-1">
54
+ {features.map(({ title }: FeatureProps) => (
55
  <Card key={title}>
56
  <CardHeader className="text-3xl lg:text-4xl font-bold md:text-center">
57
  <CardTitle>{title}</CardTitle>
58
  </CardHeader>
59
+ <CardFooter className="flex flex-wrap md:justify-center gap-4">
60
+ <iframe
61
+ src="https://sandramsc-docverifyrag.hf.space"
62
+ style={{ border: 'none' }}
63
+ width="850"
64
+ height="450"
65
+ ></iframe>
 
 
66
  </CardFooter>
67
  </Card>
68
  ))}
frontend/src/components/Hero.tsx CHANGED
@@ -1,10 +1,9 @@
1
- import { HeroCards } from "./HeroCards";
2
  export const Hero = () => {
3
  return (
4
- <section className="container grid lg:grid-cols-2 place-items-center py-20 md:py-32 gap-10">
5
  <div className="text-center lg:text-start space-y-6">
6
- <main className="text-5xl md:text-6xl font-bold">
7
- <h1 className="inline">
8
  <span className="inline bg-gradient-to-r from-[#F596D3] to-[#D247BF] text-transparent bg-clip-text">
9
  Doc
10
  </span>
@@ -13,20 +12,7 @@ export const Hero = () => {
13
  RAG
14
  </span></h1>{" "}
15
  </main>
16
-
17
- <p className="text-xl text-muted-foreground md:w-10/12 mx-auto lg:mx-0">
18
- Lorem ipsum dolor sit amet consectetur, adipisicing elit. Veritatis dolor pariatur sit!
19
- </p>
20
-
21
  </div>
22
-
23
- {/* Hero cards sections */}
24
- <div className="z-10">
25
- <HeroCards />
26
- </div>
27
-
28
- {/* Shadow effect */}
29
- <div className="shadow"></div>
30
  </section>
31
  );
32
  };
 
 
1
  export const Hero = () => {
2
  return (
3
+ <section className="container py-30 sm:py-36 space-y-8">
4
  <div className="text-center lg:text-start space-y-6">
5
+ <main className="text-8xl lg:text-10xl font-bold">
6
+ <h1 className="md:text-center">
7
  <span className="inline bg-gradient-to-r from-[#F596D3] to-[#D247BF] text-transparent bg-clip-text">
8
  Doc
9
  </span>
 
12
  RAG
13
  </span></h1>{" "}
14
  </main>
 
 
 
 
 
15
  </div>
 
 
 
 
 
 
 
 
16
  </section>
17
  );
18
  };
frontend/src/components/HowItWorks.tsx CHANGED
@@ -30,7 +30,7 @@ export const HowItWorks = () => {
30
  return (
31
  <section
32
  id="howItWorks"
33
- className="container text-center py-24 sm:py-32"
34
  >
35
  <h2 className="text-3xl md:text-4xl font-bold ">
36
  Fast and Accurate{" "}
 
30
  return (
31
  <section
32
  id="howItWorks"
33
+ className="container text-center py-22 sm:py-30"
34
  >
35
  <h2 className="text-3xl md:text-4xl font-bold ">
36
  Fast and Accurate{" "}
frontend/src/components/Navbar.tsx CHANGED
@@ -58,7 +58,7 @@ export const Navbar = () => {
58
 
59
  <img
60
  src={image}
61
- alt="About feature"
62
  className="w-[18px] lg:w-[28px] mx-2"
63
  />
64
  ))}DocVerifyRAG</a>
 
58
 
59
  <img
60
  src={image}
61
+ alt="logo.png"
62
  className="w-[18px] lg:w-[28px] mx-2"
63
  />
64
  ))}DocVerifyRAG</a>
frontend/src/components/Statistics.tsx DELETED
@@ -1,41 +0,0 @@
1
- export const Statistics = () => {
2
- interface statsProps {
3
- quantity: string;
4
- description: string;
5
- }
6
-
7
- const stats: statsProps[] = [
8
- {
9
- quantity: "2.7K+",
10
- description: "Users",
11
- },
12
- {
13
- quantity: "1.8K+",
14
- description: "Subscribers",
15
- },
16
- {
17
- quantity: "112",
18
- description: "Downloads",
19
- },
20
- {
21
- quantity: "4",
22
- description: "Products",
23
- },
24
- ];
25
-
26
- return (
27
- <section id="statistics">
28
- <div className="grid grid-cols-2 lg:grid-cols-4 gap-8">
29
- {stats.map(({ quantity, description }: statsProps) => (
30
- <div
31
- key={description}
32
- className="space-y-2 text-center"
33
- >
34
- <h2 className="text-3xl sm:text-4xl font-bold ">{quantity}</h2>
35
- <p className="text-xl text-muted-foreground">{description}</p>
36
- </div>
37
- ))}
38
- </div>
39
- </section>
40
- );
41
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/upload/.streamlit/config.toml DELETED
@@ -1,6 +0,0 @@
1
- [theme]
2
- primaryColor="#F63366"
3
- backgroundColor="#FFFFFF"
4
- secondaryBackgroundColor="#F0F2F6"
5
- textColor="#262730"
6
- font="sans serif"
 
 
 
 
 
 
 
frontend/src/components/upload/app-v1.py DELETED
@@ -1,46 +0,0 @@
1
- import streamlit as st
2
- from langchain.llms import OpenAI
3
- from langchain.text_splitter import CharacterTextSplitter
4
- from langchain.embeddings import OpenAIEmbeddings
5
- from langchain.vectorstores import Chroma
6
- from langchain.chains import RetrievalQA
7
-
8
- def generate_response(uploaded_file, openai_api_key, query_text):
9
- # Load document if file is uploaded
10
- if uploaded_file is not None:
11
- documents = [uploaded_file.read().decode()]
12
- # Split documents into chunks
13
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
14
- texts = text_splitter.create_documents(documents)
15
- # Select embeddings
16
- embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
17
- # Create a vectorstore from documents
18
- db = Chroma.from_documents(texts, embeddings)
19
- # Create retriever interface
20
- retriever = db.as_retriever()
21
- # Create QA chain
22
- qa = RetrievalQA.from_chain_type(llm=OpenAI(openai_api_key=openai_api_key), chain_type='stuff', retriever=retriever)
23
- return qa.run(query_text)
24
-
25
- # Page title
26
- st.set_page_config(page_title='🦜🔗 Ask the Doc App')
27
- st.title('🦜🔗 Ask the Doc App')
28
-
29
- # File upload
30
- uploaded_file = st.file_uploader('Upload an article', type='txt')
31
- # Query text
32
- query_text = st.text_input('Enter your question:', placeholder = 'Please provide a short summary.', disabled=not uploaded_file)
33
-
34
- # Form input and query
35
- result = []
36
- with st.form('myform', clear_on_submit=True):
37
- openai_api_key = st.text_input('OpenAI API Key', type='password', disabled=not (uploaded_file and query_text))
38
- submitted = st.form_submit_button('Submit', disabled=not(uploaded_file and query_text))
39
- if submitted and openai_api_key.startswith('sk-'):
40
- with st.spinner('Calculating...'):
41
- response = generate_response(uploaded_file, openai_api_key, query_text)
42
- result.append(response)
43
- del openai_api_key
44
-
45
- if len(result):
46
- st.info(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/upload/streamlit_app.py DELETED
@@ -1,47 +0,0 @@
1
- import streamlit as st
2
- from langchain.llms import OpenAI
3
- from langchain.text_splitter import CharacterTextSplitter
4
- from langchain.embeddings import OpenAIEmbeddings
5
- from langchain.vectorstores import Chroma
6
- from langchain.chains import RetrievalQA
7
-
8
- def generate_response(uploaded_file, openai_api_key, query_text):
9
- # Load document if file is uploaded
10
- if uploaded_file is not None:
11
- documents = [uploaded_file.read().decode()]
12
- # Split documents into chunks
13
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
14
- texts = text_splitter.create_documents(documents)
15
- # Select embeddings
16
- embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
17
- # Create a vectorstore from documents
18
- db = Chroma.from_documents(texts, embeddings)
19
- # Create retriever interface
20
- retriever = db.as_retriever()
21
- # Create QA chain
22
- qa = RetrievalQA.from_chain_type(llm=OpenAI(openai_api_key=openai_api_key), chain_type='stuff', retriever=retriever)
23
- return qa.run(query_text)
24
-
25
-
26
- # Page title
27
- st.set_page_config(page_title='🦜🔗 Ask the Doc App')
28
- st.title('🦜🔗 Ask the Doc App')
29
-
30
- # File upload
31
- uploaded_file = st.file_uploader('Upload an article', type='txt')
32
- # Query text
33
- query_text = st.text_input('Enter your question:', placeholder = 'Please provide a short summary.', disabled=not uploaded_file)
34
-
35
- # Form input and query
36
- result = []
37
- with st.form('myform', clear_on_submit=True):
38
- openai_api_key = st.text_input('OpenAI API Key', type='password', disabled=not (uploaded_file and query_text))
39
- submitted = st.form_submit_button('Submit', disabled=not(uploaded_file and query_text))
40
- if submitted and openai_api_key.startswith('sk-'):
41
- with st.spinner('Calculating...'):
42
- response = generate_response(uploaded_file, openai_api_key, query_text)
43
- result.append(response)
44
- del openai_api_key
45
-
46
- if len(result):
47
- st.info(response)