jbilcke-hf HF staff commited on
Commit
fd6fd81
1 Parent(s): bab3a4d

add support for OpenAI Dall-e 3 (well, it doesn't work yet, some kind of 400 error)

Browse files
Files changed (5) hide show
  1. .env +5 -1
  2. README.md +1 -1
  3. package-lock.json +10 -10
  4. package.json +1 -1
  5. src/app/engine/render.ts +68 -1
.env CHANGED
@@ -3,6 +3,7 @@
3
  # - REPLICATE
4
  # - INFERENCE_ENDPOINT
5
  # - INFERENCE_API
 
6
  RENDERING_ENGINE="INFERENCE_API"
7
 
8
  # Supported values:
@@ -32,7 +33,7 @@ AUTH_HF_API_TOKEN=
32
  AUTH_REPLICATE_API_TOKEN=
33
 
34
  # OpenAI.dom token: available for the LLM engine and the RENDERING engine
35
- AUTH_OPENAI_TOKEN=
36
 
37
  # An experimental RENDERING engine (sorry it is not very documented yet, so you can use one of the other engines)
38
  AUTH_VIDEOCHAIN_API_TOKEN=
@@ -55,6 +56,9 @@ RENDERING_HF_INFERENCE_API_REFINER_MODEL="stabilityai/stable-diffusion-xl-refine
55
  # An experimental RENDERING engine (sorry it is not very documented yet, so you can use one of the other engines)
56
  RENDERING_VIDEOCHAIN_API_URL="http://localhost:7860"
57
 
 
 
 
58
  # ------------- LLM API CONFIG ----------------
59
 
60
  # If you decided to use OpenAI for the LLM engine
 
3
  # - REPLICATE
4
  # - INFERENCE_ENDPOINT
5
  # - INFERENCE_API
6
+ # - OPENAI
7
  RENDERING_ENGINE="INFERENCE_API"
8
 
9
  # Supported values:
 
33
  AUTH_REPLICATE_API_TOKEN=
34
 
35
  # OpenAI.dom token: available for the LLM engine and the RENDERING engine
36
+ AUTH_OPENAI_API_KEY=
37
 
38
  # An experimental RENDERING engine (sorry it is not very documented yet, so you can use one of the other engines)
39
  AUTH_VIDEOCHAIN_API_TOKEN=
 
56
  # An experimental RENDERING engine (sorry it is not very documented yet, so you can use one of the other engines)
57
  RENDERING_VIDEOCHAIN_API_URL="http://localhost:7860"
58
 
59
+ RENDERING_OPENAI_API_BASE_URL="https://api.openai.com/v1"
60
+ RENDERING_OPENAI_API_MODEL="dall-e-3"
61
+
62
  # ------------- LLM API CONFIG ----------------
63
 
64
  # If you decided to use OpenAI for the LLM engine
README.md CHANGED
@@ -24,7 +24,7 @@ If you try to duplicate the project, open the `.env` you will see it requires so
24
 
25
  Provider config:
26
  - `LLM_ENGINE`: can be one of: "INFERENCE_API", "INFERENCE_ENDPOINT", "OPENAI"
27
- - `RENDERING_ENGINE`: can be one of: "INFERENCE_API", "INFERENCE_ENDPOINT", "REPLICATE", "VIDEOCHAIN" for now, unless you code your custom solution
28
 
29
  Auth config:
30
  - `AUTH_HF_API_TOKEN`: only if you decide to use OpenAI for the LLM engine necessary if you decide to use an inference api model or a custom inference endpoint
 
24
 
25
  Provider config:
26
  - `LLM_ENGINE`: can be one of: "INFERENCE_API", "INFERENCE_ENDPOINT", "OPENAI"
27
+ - `RENDERING_ENGINE`: can be one of: "INFERENCE_API", "INFERENCE_ENDPOINT", "REPLICATE", "VIDEOCHAIN", "OPENAI" for now, unless you code your custom solution
28
 
29
  Auth config:
30
  - `AUTH_HF_API_TOKEN`: only if you decide to use OpenAI for the LLM engine necessary if you decide to use an inference api model or a custom inference endpoint
package-lock.json CHANGED
@@ -44,7 +44,7 @@
44
  "konva": "^9.2.2",
45
  "lucide-react": "^0.260.0",
46
  "next": "13.4.10",
47
- "openai": "^4.10.0",
48
  "pick": "^0.0.1",
49
  "postcss": "8.4.26",
50
  "react": "18.2.0",
@@ -3907,9 +3907,9 @@
3907
  "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ=="
3908
  },
3909
  "node_modules/fast-glob": {
3910
- "version": "3.3.1",
3911
- "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz",
3912
- "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==",
3913
  "dependencies": {
3914
  "@nodelib/fs.stat": "^2.0.2",
3915
  "@nodelib/fs.walk": "^1.2.3",
@@ -5178,9 +5178,9 @@
5178
  }
5179
  },
5180
  "node_modules/nanoid": {
5181
- "version": "3.3.6",
5182
- "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.6.tgz",
5183
- "integrity": "sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==",
5184
  "funding": [
5185
  {
5186
  "type": "github",
@@ -5493,9 +5493,9 @@
5493
  }
5494
  },
5495
  "node_modules/openai": {
5496
- "version": "4.15.3",
5497
- "resolved": "https://registry.npmjs.org/openai/-/openai-4.15.3.tgz",
5498
- "integrity": "sha512-j2XSxxiOhF7lCiHUXmuDOGOgtKa36ia9pOQ2m9YCOMA2Ee4QTI+MzdHRoHlp6ewOsvW5NXkoT+xustSZljiGnA==",
5499
  "dependencies": {
5500
  "@types/node": "^18.11.18",
5501
  "@types/node-fetch": "^2.6.4",
 
44
  "konva": "^9.2.2",
45
  "lucide-react": "^0.260.0",
46
  "next": "13.4.10",
47
+ "openai": "^4.15.4",
48
  "pick": "^0.0.1",
49
  "postcss": "8.4.26",
50
  "react": "18.2.0",
 
3907
  "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ=="
3908
  },
3909
  "node_modules/fast-glob": {
3910
+ "version": "3.3.2",
3911
+ "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz",
3912
+ "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==",
3913
  "dependencies": {
3914
  "@nodelib/fs.stat": "^2.0.2",
3915
  "@nodelib/fs.walk": "^1.2.3",
 
5178
  }
5179
  },
5180
  "node_modules/nanoid": {
5181
+ "version": "3.3.7",
5182
+ "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz",
5183
+ "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==",
5184
  "funding": [
5185
  {
5186
  "type": "github",
 
5493
  }
5494
  },
5495
  "node_modules/openai": {
5496
+ "version": "4.15.4",
5497
+ "resolved": "https://registry.npmjs.org/openai/-/openai-4.15.4.tgz",
5498
+ "integrity": "sha512-EnlSl1p8n7Q/HnBf4+VOEcYloBKKe23sKOFfH/WJcw+XVyWav4lwDK4wCmsUY1wS4RFOdbA2EwBUB2p5WEPmoQ==",
5499
  "dependencies": {
5500
  "@types/node": "^18.11.18",
5501
  "@types/node-fetch": "^2.6.4",
package.json CHANGED
@@ -45,7 +45,7 @@
45
  "konva": "^9.2.2",
46
  "lucide-react": "^0.260.0",
47
  "next": "13.4.10",
48
- "openai": "^4.10.0",
49
  "pick": "^0.0.1",
50
  "postcss": "8.4.26",
51
  "react": "18.2.0",
 
45
  "konva": "^9.2.2",
46
  "lucide-react": "^0.260.0",
47
  "next": "13.4.10",
48
+ "openai": "^4.15.4",
49
  "pick": "^0.0.1",
50
  "postcss": "8.4.26",
51
  "react": "18.2.0",
src/app/engine/render.ts CHANGED
@@ -2,6 +2,7 @@
2
 
3
  import { v4 as uuidv4 } from "uuid"
4
  import Replicate from "replicate"
 
5
 
6
  import { RenderRequest, RenderedScene, RenderingEngine } from "@/types"
7
  import { generateSeed } from "@/lib/generateSeed"
@@ -22,6 +23,10 @@ const replicateModelVersion = `${process.env.RENDERING_REPLICATE_API_MODEL_VERSI
22
  const videochainToken = `${process.env.AUTH_VIDEOCHAIN_API_TOKEN || ""}`
23
  const videochainApiUrl = `${process.env.RENDERING_VIDEOCHAIN_API_URL || ""}`
24
 
 
 
 
 
25
  export async function newRender({
26
  prompt,
27
  // negativePrompt,
@@ -57,7 +62,69 @@ export async function newRender({
57
  const guidanceScale = 9
58
 
59
  try {
60
- if (renderingEngine === "REPLICATE") {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  if (!replicateToken) {
62
  throw new Error(`you need to configure your REPLICATE_API_TOKEN in order to use the REPLICATE rendering engine`)
63
  }
 
2
 
3
  import { v4 as uuidv4 } from "uuid"
4
  import Replicate from "replicate"
5
+ import OpenAI from "openai"
6
 
7
  import { RenderRequest, RenderedScene, RenderingEngine } from "@/types"
8
  import { generateSeed } from "@/lib/generateSeed"
 
23
  const videochainToken = `${process.env.AUTH_VIDEOCHAIN_API_TOKEN || ""}`
24
  const videochainApiUrl = `${process.env.RENDERING_VIDEOCHAIN_API_URL || ""}`
25
 
26
+ const openaiApiKey = `${process.env.AUTH_OPENAI_API_KEY || ""}`
27
+ const openaiApiBaseUrl = `${process.env.RENDERING_OPENAI_API_BASE_URL || "https://api.openai.com/v1"}`
28
+ const openaiApiModel = `${process.env.RENDERING_OPENAI_API_MODEL || "dall-e-3"}`
29
+
30
  export async function newRender({
31
  prompt,
32
  // negativePrompt,
 
62
  const guidanceScale = 9
63
 
64
  try {
65
+ if (renderingEngine === "OPENAI") {
66
+
67
+ /*
68
+ const openai = new OpenAI({
69
+ apiKey: openaiApiKey
70
+ });
71
+ */
72
+
73
+ // When using DALL·E 3, images can have a size of 1024x1024, 1024x1792 or 1792x1024 pixels.
74
+ // the improved resolution is nice, but the AI Comic Factory needs a special ratio
75
+ // anyway, let's see what we can do
76
+
77
+ const size =
78
+ width > height ? '1792x1024' :
79
+ width < height ? '1024x1792' :
80
+ '1024x1024'
81
+
82
+ /*
83
+ const response = await openai.createImage({
84
+ model: "dall-e-3",
85
+ prompt,
86
+ n: 1,
87
+ size: size as any,
88
+ // quality: "standard",
89
+ })
90
+ */
91
+
92
+ const res = await fetch(`${openaiApiBaseUrl}/images/generations`, {
93
+ method: "POST",
94
+ headers: {
95
+ Accept: "application/json",
96
+ "Content-Type": "application/json",
97
+ Authorization: `Bearer ${openaiApiKey}`,
98
+ },
99
+ body: JSON.stringify({
100
+ model: "dall-e-3",
101
+ prompt,
102
+ n: 1,
103
+ size,
104
+ // quality: "standard",
105
+ }),
106
+ cache: 'no-store',
107
+ // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
108
+ // next: { revalidate: 1 }
109
+ })
110
+
111
+ if (res.status !== 200) {
112
+ throw new Error('Failed to fetch data')
113
+ }
114
+
115
+ const response = (await res.json()) as { data: { url: string }[] }
116
+
117
+ console.log("response:", response)
118
+ return {
119
+ renderId: uuidv4(),
120
+ status: "completed",
121
+ assetUrl: response.data[0].url || "",
122
+ alt: prompt,
123
+ error: "",
124
+ maskUrl: "",
125
+ segments: []
126
+ } as RenderedScene
127
+ } else if (renderingEngine === "REPLICATE") {
128
  if (!replicateToken) {
129
  throw new Error(`you need to configure your REPLICATE_API_TOKEN in order to use the REPLICATE rendering engine`)
130
  }