Carlos Salgado commited on
Commit
5876325
1 Parent(s): 6a48da2

add api prompt template

Browse files
backend/generate_metadata.py CHANGED
@@ -24,6 +24,15 @@ vectorstore = Vectara(vectara_customer_id=vectara_customer_id,
24
  vectara_corpus_id=vectara_corpus_id,
25
  vectara_api_key=vectara_api_key)
26
 
 
 
 
 
 
 
 
 
 
27
 
28
  def ingest(file_path):
29
  extension = file_path.split('.')[-1]
@@ -51,22 +60,18 @@ def ingest(file_path):
51
  "",
52
  ])
53
  docs = text_splitter.split_documents(documents)
54
- #print(docs)
55
 
56
  return docs
57
 
58
 
59
- # vectara = Vectara.from_documents(docs, embedding=FakeEmbeddings(size=768))
60
- # retriever = vectara.as_retriever()
61
 
62
- # return retriever
63
-
64
-
65
- def extract_metadata(docs):
66
  # plain text
67
  context = "".join(
68
  [doc.page_content.replace('\n\n','').replace('..','') for doc in docs])
69
 
 
 
70
  # Create client
71
  client = openai.OpenAI(
72
  base_url="https://api.together.xyz/v1",
@@ -75,16 +80,15 @@ def extract_metadata(docs):
75
 
76
  # Call the LLM with the JSON schema
77
  chat_completion = client.chat.completions.create(
78
- model="mistralai/Mixtral-8x7B-Instruct-v0.1",
79
- response_format={"type": "json_object", "schema": Metadata.model_json_schema()},
80
  messages=[
81
  {
82
  "role": "system",
83
- "content": f"You are a helpful assistant that understands BIM documents and engineering disciplines. Your answer should be in JSON format and only include the title, a brief one-sentence summary, and the discipline the document belongs to, distinguishing between {[d.value for d in BimDiscipline]} based on the given document."
84
  },
85
  {
86
  "role": "user",
87
- "content": f"Analyze the provided document, which could be in either German or English. Extract the title, summarize it briefly in one sentence, and infer the discipline. Document:\n{context}"
88
  }
89
  ]
90
  )
 
24
  vectara_corpus_id=vectara_corpus_id,
25
  vectara_api_key=vectara_api_key)
26
 
27
+ prompt_template = """
28
+ BimDiscipline = ['plumbing', 'network', 'heating', 'electrical', 'ventilation', 'architecture']
29
+
30
+ You are a helpful assistant that understands BIM documents and engineering disciplines. Your answer should be in JSON format and only include the title, a brief one-sentence summary, and the discipline the document belongs to, distinguishing between {[d.value for d in BimDiscipline]} based on the given document."
31
+
32
+ Analyze the provided document, which could be in either German or English. Extract the title, summarize it briefly in one sentence, and infer the discipline. Document:
33
+ context="
34
+ """
35
+
36
 
37
  def ingest(file_path):
38
  extension = file_path.split('.')[-1]
 
60
  "",
61
  ])
62
  docs = text_splitter.split_documents(documents)
 
63
 
64
  return docs
65
 
66
 
 
 
67
 
68
+ def extract_metadata(docs):
 
 
 
69
  # plain text
70
  context = "".join(
71
  [doc.page_content.replace('\n\n','').replace('..','') for doc in docs])
72
 
73
+ prompt = f'{prompt_template}{context}"'
74
+
75
  # Create client
76
  client = openai.OpenAI(
77
  base_url="https://api.together.xyz/v1",
 
80
 
81
  # Call the LLM with the JSON schema
82
  chat_completion = client.chat.completions.create(
83
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
 
84
  messages=[
85
  {
86
  "role": "system",
87
+ "content": f"You are a helpful assistant that responsds in JSON format"
88
  },
89
  {
90
  "role": "user",
91
+ "content": prompt
92
  }
93
  ]
94
  )
backend/requirements.txt CHANGED
@@ -8,3 +8,4 @@ langchain
8
  openai
9
  chromadb
10
  tiktoken
 
 
8
  openai
9
  chromadb
10
  tiktoken
11
+ python-poppler
backend/schema.py CHANGED
@@ -5,11 +5,11 @@ from pydantic import BaseModel, Field, conlist
5
  from enum import Enum
6
 
7
  class BimDiscipline(str, Enum):
8
- plumbing = 'S - Sanitär'
9
  network = 'D - Datennetz'
10
  heating = 'H - Heizung'
11
  electrical = 'E - Elektro'
12
- ventilation = 'L - Lüftung'
13
  architecture = 'A - Architektur'
14
 
15
  # Define the schema for the output.
 
5
  from enum import Enum
6
 
7
  class BimDiscipline(str, Enum):
8
+ plumbing = 'S - Sanitaer'
9
  network = 'D - Datennetz'
10
  heating = 'H - Heizung'
11
  electrical = 'E - Elektro'
12
+ ventilation = 'L - Lueftung'
13
  architecture = 'A - Architektur'
14
 
15
  # Define the schema for the output.